Wednesday, October 9, 2024
Google search engine
HomeData Modelling & AIFind length of repeating Substring

Find length of repeating Substring

Given a string s and a positive integer K, the task is to find the length of the longest contiguous substring that appears at least K times in s.

Examples:

Input: s = “abababcdabcd”, K = 2
Output: 4
Explanation: “abcd” is the longest repeating substring at least K times.

Input: s = “aacd”, K = 4
Output: 0

Approach: This can be solved with the following idea:

We will use the RabinKarp algorithm to solve this problem, which is a rolling hash-based algorithm to find the longest common substring that repeats at least k times.

Below are the steps: 

  • Initialize the parameters: base, modulus, and power.
  • Define a function for computing the rolling hash.
  • Define a function for binary search to find the largest substring length.
  • For each possible substring length, use the Rabin-Karp algorithm to check if it repeats at least K times.
  • Return the longest substring length that meets the requirement.

Below is the implementation of the above code:

C++




// C++ code for the above approach:
#include <algorithm>
#include <iostream>
#include <string>
#include <unordered_map>
#include <vector>
 
using namespace std;
 
// Define constants for the base and
// modulus of the hash function
const int BASE = 53;
const int MODULUS = 1e9 + 7;
 
// Define a maximum length for the
// input string
const int MAX_LENGTH = 100000;
 
// Function to compute the rolling hash
// for all substrings of length len in
// the input string s
long long rolling_hash(const string& s, int len,
                       vector<long long>& power,
                       vector<long long>& hashes)
{
 
    // Initialize the hash value for the
    // first substring
    long long hash = 0;
    for (int i = 0; i < len; ++i) {
        hash = (hash * BASE + s[i]) % MODULUS;
    }
 
    // Store the hash value for the first
    // substring in the hashes vector
    hashes[1] = hash;
 
    // Compute the hash value for all
    // subsequent substrings using the
    // rolling hash technique
    for (int i = len; i < s.size(); ++i) {
 
        hash = ((hash - power[len - 1] * s[i - len])
                    % MODULUS
                + MODULUS)
               % MODULUS;
        hash = (hash * BASE + s[i]) % MODULUS;
 
        // Store the hash value in the
        // hashes vector
        hashes[i - len + 2] = hash;
    }
 
    // Return the hash value for the last
    // substring
    return hash;
}
 
// Binary search to find the length of
// the longest substring that repeats
// at least k times
int binary_search(const string& s, int k)
{
    // Initialize the range of possible
    // substring lengths to check
    int low = 0, high = s.size();
    int ans = 0;
 
    // Initialize vectors to store the
    // powers of the base and the hash
    // values for all substrings
    vector<long long> power(MAX_LENGTH), hashes(MAX_LENGTH);
    power[0] = 1;
    for (int i = 1; i < MAX_LENGTH; ++i) {
        power[i] = (power[i - 1] * BASE) % MODULUS;
    }
 
    // Perform binary search on the range
    // of possible substring lengths
    while (low <= high) {
 
        // Compute the midpoint of
        // the range
        int mid = (low + high) / 2;
 
        // Compute the rolling hash for
        // all substrings of length mid
        rolling_hash(s, mid, power, hashes);
 
        // Count the frequency of
        // each hash value
        unordered_map<long long, int> count;
        for (int i = 1; i <= s.size() - mid + 1; ++i) {
            count[hashes[i]]++;
        }
 
        // Check if any hash value
        // appears at least k times
        bool found = false;
        for (const auto& p : count) {
            if (p.second >= k) {
                found = true;
                break;
            }
        }
 
        // If a repeating substring of
        // length mid appears at least k
        // times, update the answer and
        // search the upper half
        // of the range
        if (found) {
            ans = mid;
            low = mid + 1;
        }
        else {
 
            // Otherwise, search the
            // lower half of the range
            high = mid - 1;
        }
    }
 
    // Return the length of the
    // longest substring
    return ans;
}
 
// Driver code
int main()
{
 
    string s = "abababcdabcd";
    int k = 2;
 
    // Function call
    int ans = binary_search(s, k);
 
    cout << ans << endl;
 
    return 0;
}


Java




import java.util.HashMap;
import java.util.Map;
 
public class GFG {
    public static void main(String[] args) {
        String s = "abababcdabcd";
        int k = 2;
       
       // Function call
        int ans = binarySearch(s, k);
        System.out.println(ans);
    }
 
      // Binary search to find the length of
    // the longest substring that repeats
    // at least k times
    public static int binarySearch(String s, int k) {
   
          // Initialize the range of possible
        // substring lengths to check
        int low = 0;
        int high = s.length();
        int ans = 0;
       
          // Perform binary search on the range
        // of possible substring lengths
        while (low <= high) {
           
              // Compute the midpoint of
            // the range
            int mid = (low + high) / 2;
           
              // Compute the rolling hash for
            // all substrings of length mid
            int[] hashes = rollingHash(s, mid);
           
              // Count the frequency of
            // each hash value
            Map<Integer, Integer> count = new HashMap<>();
            for (int hash : hashes) {
                count.put(hash, count.getOrDefault(hash, 0) + 1);
            }
           
              // Check if any hash value
            // appears at least k times
            boolean found = false;
            for (int value : count.values()) {
                if (value >= k) {
                    found = true;
                    break;
                }
            }
           
         // If a repeating substring of
            // length mid appears at least k
            // times, update the answer and
         // search the upper half
            // of the range
            if (found) {
                ans = mid;
                low = mid + 1;
            }
          // Otherwise, search the
          // lower half of the range
          else {
                high = mid - 1;
            }
        }
       
          // Return the length of the
        // longest substring
        return ans;
    }
 
    public static int[] rollingHash(String s, int length) {
       
          // Define constants for the base and
        // modulus of the hash function
        int base = 53;
        int modulus = (int) (Math.pow(10, 9) + 7);
         
       
          int power = 1;
        int hashValue = 0;
        int[] hashes = new int[s.length() - length + 1];
       
          // Initialize the hash value for the
        // first substring
        for (int i = 0; i < length; i++) {
            hashValue = (hashValue * base + (int) s.charAt(i)) % modulus;
            power = (power * base) % modulus;
        }
        hashes[0] = hashValue;
       
          // Compute the hash value for all
        // subsequent substrings using the
        // rolling hash technique
        for (int i = length; i < s.length(); i++) {
            hashValue = (hashValue * base -
            (int) s.charAt(i - length) * power + (int) s.charAt(i)) % modulus;
           
            // Store the hash value in the
            // hashes vector
              hashes[i - length + 1] = hashValue;
        }
       
          // Return the hash value for the last
        // substring
        return hashes;
    }
}


Python3




# Python code for the above approach
 
import collections
 
# Function to compute the rolling hash
# for all substrings of length len in
# the input string s
def rolling_hash(s, length):
    base = 53
    modulus = 10**9 + 7
    power = 1
    hash_value = 0
    hashes = []
 
    for i in range(length):
        hash_value = (hash_value * base + ord(s[i])) % modulus
        power = (power * base) % modulus
 
    hashes.append(hash_value)
 
    for i in range(length, len(s)):
        hash_value = (hash_value * base - ord(s[i - length]) * power + ord(s[i])) % modulus
        hashes.append(hash_value)
 
    return hashes
 
# Binary search to find the length of
# the longest substring that repeats
# at least k times
def binary_search(s, k):
    low = 0
    high = len(s)
    ans = 0
 
    while low <= high:
        mid = (low + high) // 2
 
        hashes = rolling_hash(s, mid)
 
        count = collections.Counter(hashes)
 
        found = False
        for value in count.values():
            if value >= k:
                found = True
                break
 
        if found:
            ans = mid
            low = mid + 1
        else:
            high = mid - 1
 
    return ans
 
# Driver code
if __name__ == "__main__":
    s = "abababcdabcd"
    k = 2
 
    ans = binary_search(s, k)
 
    print(ans)


C#




using System;
using System.Collections.Generic;
 
public class GFG
{
    public static void Main(string[] args)
    {
        string s = "abababcdabcd";
        int k = 2;
 
        // Function call
        int ans = BinarySearch(s, k);
        Console.WriteLine(ans);
    }
 
    // Binary search to find the length of
    // the longest substring that repeats
    // at least k times
    public static int BinarySearch(string s, int k)
    {
        // Initialize the range of possible
        // substring lengths to check
        int low = 0;
        int high = s.Length;
        int ans = 0;
 
        // Perform binary search on the range
        // of possible substring lengths
        while (low <= high)
        {
            // Compute the midpoint of
            // the range
            int mid = (low + high) / 2;
 
            // Compute the rolling hash for
            // all substrings of length mid
            int[] hashes = RollingHash(s, mid);
 
            // Count the frequency of
            // each hash value
            Dictionary<int, int> count = new Dictionary<int, int>();
            foreach (int hash in hashes)
            {
                if (count.ContainsKey(hash))
                    count[hash]++;
                else
                    count[hash] = 1;
            }
 
            // Check if any hash value
            // appears at least k times
            bool found = false;
            foreach (int value in count.Values)
            {
                if (value >= k)
                {
                    found = true;
                    break;
                }
            }
 
            // If a repeating substring of
            // length mid appears at least k
            // times, update the answer and
            // search the upper half
            // of the range
            if (found)
            {
                ans = mid;
                low = mid + 1;
            }
            // Otherwise, search the
            // lower half of the range
            else
            {
                high = mid - 1;
            }
        }
 
        // Return the length of the
        // longest substring
        return ans;
    }
 
    public static int[] RollingHash(string s, int length)
    {
        // Define constants for the base and
        // modulus of the hash function
        int @base = 53;
        int modulus = (int)(Math.Pow(10, 9) + 7);
 
        int power = 1;
        int hashValue = 0;
        int[] hashes = new int[s.Length - length + 1];
 
        // Initialize the hash value for the
        // first substring
        for (int i = 0; i < length; i++)
        {
            hashValue = (int)(((long)hashValue * @base + (int)s[i]) % modulus);
            power = (int)(((long)power * @base) % modulus);
        }
        hashes[0] = hashValue;
 
        // Compute the hash value for all
        // subsequent substrings using the
        // rolling hash technique
        for (int i = length; i < s.Length; i++)
        {
            hashValue = (int)((((long)hashValue * @base - (int)s[i - length] * power + (int)s[i]) % modulus + modulus) % modulus);
 
            // Store the hash value in the
            // hashes vector
            hashes[i - length + 1] = hashValue;
        }
 
        // Return the hash value for the last
        // substring
        return hashes;
    }
}


Javascript




function rolling_hash(s, length) {
     
    // Define constants for the base and
    // modulus of the hash function
    const base = 53;
    const modulus = 10**9 + 7;
    let power = 1;
     
    // Initialize the hash value for the
    // first substring
    let hash_value = 0;
    let hashes = [];
    for (let i = 0; i < length; i++) {
        hash_value = (hash_value * base + s.charCodeAt(i)) % modulus;
        power = (power * base) % modulus;
    }
     
    // Store the hash value for the first
    // substring in the hashes vector
    hashes.push(hash_value);
     
    // Compute the hash value for all
    // subsequent substrings using the
    // rolling hash technique
    for (let i = length; i < s.length; i++) {
        hash_value = (hash_value * base - s.charCodeAt(i - length) * power + s.charCodeAt(i)) % modulus;
         
        // Store the hash value in the
        // hashes vector
        hashes.push(hash_value);
    }
     
    // Return the hash value for the last
    // substring
    return hashes;
}
 
// Binary search to find the length of
// the longest substring that repeats
// at least k times
function binary_search(s, k) {
     
    // Initialize the range of possible
    // substring lengths to check
    let low = 0;
    let high = s.length;
    let ans = 0;
     
    // Perform binary search on the range
    // of possible substring lengths
    while (low <= high) {
         
        // Compute the midpoint of
        // the range
        let mid = Math.floor((low + high) / 2);
         
        // Compute the rolling hash for
        // all substrings of length mid
        let hashes = rolling_hash(s, mid);
         
        // Count the frequency of
        // each hash value
        let count = new Map();
        for (let i = 0; i < hashes.length; i++) {
            if (count.has(hashes[i])) {
                count.set(hashes[i], count.get(hashes[i]) + 1);
            } else {
                count.set(hashes[i], 1);
            }
        }
         
        // Check if any hash value
        // appears at least k times
        let found = false;
        for (let value of count.values()) {
            if (value >= k) {
                found = true;
                break;
            }
        }
         
        // If a repeating substring of
        // length mid appears at least k
        // times, update the answer and
        // search the upper half
        // of the range
        if (found) {
            ans = mid;
            low = mid + 1;
        }
        // Otherwise, search the
        // lower half of the range
        else {
            high = mid - 1;
        }
    }
     
    // Return the length of the
    // longest substring
    return ans;
}
 
 
// Function call
let s = "abababcdabcd";
let k = 2;
let ans = binary_search(s, k);
console.log(ans);


Output

4






Time Complexity: O(N*log(N))
Auxiliary Space: O(N)

Feeling lost in the world of random DSA topics, wasting time without progress? It’s time for a change! Join our DSA course, where we’ll guide you on an exciting journey to master DSA efficiently and on schedule.
Ready to dive in? Explore our Free Demo Content and join our DSA course, trusted by over 100,000 neveropen!

Commit to GfG’s Three-90 Challenge! Purchase a course, complete 90% in 90 days, and save 90% cost click here to explore.

Last Updated :
14 Aug, 2023
Like Article
Save Article


Previous

<!–

8 Min Read | Java

–>


Next


<!–

8 Min Read | Java

–>

Share your thoughts in the comments

RELATED ARTICLES

Most Popular

Recent Comments