Monday, January 13, 2025
Google search engine
HomeData Modelling & AICount number of distinct substrings of a given length

Count number of distinct substrings of a given length

Given a string S of length N consisting of lower-case English alphabets and an integer ‘l’, find the number of distinct substrings of length ‘l’ of the given string. 

Examples: 

Input : s = “abcbab”, l = 2 
Output :
All distinct sub-strings of length 2 
will be {“ab”, “bc”, “cb”, “ba”} 
Thus, answer equals 4. 
Input : s = “ababa”, l = 2 
Output :
 

Naive Approach : 
A simple approach will be to find all the possible substrings, find their hash values and find the number of distinct substrings. 
Time Complexity: O(l*N)

Efficient approach : 
We will solve this problem using Rolling hash algorithm.

  • Find the hash value of first sub-string of length ‘l’. 
    It can be evaluated as (s[0]-97)*x^(l-1) + (s[1]-97)*x^(l-2) … (s[l-1]-97). Let’s call this ‘H₁’.
  • Using this hash value, we will generate the next hash as : 
    H2 = (H1-(s[0]-97)*x^(l-1)*x + (s[l]-97). Generate hashes of all substrings in this way 
    and push them in an unordered set.
  • Count number of distinct values in the set.

Below is the implementation of the above approach :  

C++




// C++ implementation of above approach
#include <bits/stdc++.h>
#define x 26
#define mod 3001
using namespace std;
 
// Function to find the required count
int CntSubstr(string s, int l)
{
    // Variable to the hash
    int hash = 0;
 
    // Finding hash of substring
    // (0, l-1) using random number x
    for (int i = 0; i < l; i++) {
        hash = (hash * x + (s[i] - 97)) % mod;
    }
 
    // Computing x^(l-1)
    int pow_l = 1;
    for (int i = 0; i < l - 1; i++)
        pow_l = (pow_l * x) % mod;
 
    // Unordered set to add hash values
    unordered_set<int> result;
    result.insert(hash);
 
    // Generating all possible hash values
    for (int i = l; i < s.size(); i++) {
        hash = ((hash - pow_l * (s[i - l] - 97)
              + 2 * mod) * x + (s[i] - 97)) % mod;
        result.insert(hash);
    }
 
    // Print the result
    cout << result.size() << endl;
}
 
// Driver Code
int main()
{
    string s = "abcba";
 
    int l = 2;
 
    CntSubstr(s, l);
 
    return 0;
}


Java




// Java implementation of above approach
import java.util.*;
 
class GFG
{
 
    static int x = 26;
    static int mod = 3001;
 
    // Function to find the required count
    static void CntSubstr(char[] s, int l)
    {
        // Variable to the hash
        int hash = 0;
 
        // Finding hash of substring
        // (0, l-1) using random number x
        for (int i = 0; i < l; i++)
        {
            hash = (hash * x + (s[i] - 97)) % mod;
        }
 
        // Computing x^(l-1)
        int pow_l = 1;
        for (int i = 0; i < l - 1; i++)
        {
            pow_l = (pow_l * x) % mod;
        }
 
        // Unordered set to add hash values
        HashSet<Integer> result = new HashSet<Integer>();
        result.add(hash);
 
        // Generating all possible hash values
        for (int i = l; i < s.length; i++)
        {
            hash = ((hash - pow_l * (s[i - l] - 97)
                    + 2 * mod) * x + (s[i] - 97)) % mod;
            result.add(hash);
        }
 
        // Print the result
        System.out.println(result.size());
    }
 
    // Driver Code
    public static void main(String[] args)
    {
        String s = "abcba";
 
        int l = 2;
 
        CntSubstr(s.toCharArray(), l);
    }
}
 
// This code contributed by Rajput-Ji


C#




// C# implementation of the approach
using System;
using System.Collections.Generic;
 
class GFG
{
 
    static int x = 26;
    static int mod = 3001;
 
    // Function to find the required count
    static void CntSubstr(char[] s, int l)
    {
        // Variable to the hash
        int hash = 0;
 
        // Finding hash of substring
        // (0, l-1) using random number x
        for (int i = 0; i < l; i++)
        {
            hash = (hash * x + (s[i] - 97)) % mod;
        }
 
        // Computing x^(l-1)
        int pow_l = 1;
        for (int i = 0; i < l - 1; i++)
        {
            pow_l = (pow_l * x) % mod;
        }
 
        // Unordered set to add hash values
        HashSet<int> result = new HashSet<int>();
        result.Add(hash);
 
        // Generating all possible hash values
        for (int i = l; i < s.Length; i++)
        {
            hash = ((hash - pow_l * (s[i - l] - 97)
                    + 2 * mod) * x + (s[i] - 97)) % mod;
            result.Add(hash);
        }
 
        // Print the result
        Console.WriteLine(result.Count);
    }
 
    // Driver Code
    public static void Main(String[] args)
    {
        String s = "abcba";
 
        int l = 2;
 
        CntSubstr(s.ToCharArray(), l);
    }
}
 
/* This code contributed by PrinciRaj1992 */


Python3




# Python3 implementation of above approach
x = 26
mod = 3001
 
# Function to find the required count
def CntSubstr(s, l) :
 
    # Variable to the hash
    hash = 0;
 
    # Finding hash of substring
    # (0, l-1) using random number x
    for i in range(l) :
        hash = (hash * x + (ord(s[i]) - 97)) % mod;
 
    # Computing x^(l-1)
    pow_l = 1;
    for i in range(l-1) :
        pow_l = (pow_l * x) % mod;
 
    # Unordered set to add hash values
    result = set();
    result.add(hash);
 
    # Generating all possible hash values
    for i in range(l,len(s)) :
        hash = ((hash - pow_l * (ord(s[i - l]) - 97)
            + 2 * mod) * x + (ord(s[i]) - 97)) % mod;
         
        result.add(hash);
 
    # Print the result
    print(len(result)) ;
 
 
# Driver Code
if __name__ == "__main__" :
 
    s = "abcba";
 
    l = 2;
 
    CntSubstr(s, l);
     
# This code is contributed by AnkitRai01


Javascript




<script>
 
// Javascript implementation of above approach
var x = 26;
var mod = 3001;
 
// Function to find the required count
function CntSubstr(s, l)
{
    // Variable to the hash
    var hash = 0;
 
    // Finding hash of substring
    // (0, l-1) using random number x
    for (var i = 0; i < l; i++) {
        hash = (hash * x + (s[i].charCodeAt(0) - 97)) % mod;
    }
 
    // Computing x^(l-1)
    var pow_l = 1;
    for (var i = 0; i < l - 1; i++)
        pow_l = (pow_l * x) % mod;
 
    // Unordered set to add hash values
    var result = new Set();
    result.add(hash);
 
    // Generating all possible hash values
    for (var i = l; i < s.length; i++) {
        hash = ((hash - pow_l * (s[i - l].charCodeAt(0) - 97)
              + 2 * mod) * x + (s[i].charCodeAt(0) - 97)) % mod;
        result.add(hash);
    }
 
    // Print the result
    document.write( result.size );
}
 
// Driver Code
var s = "abcba";
var l = 2;
CntSubstr(s, l);
 
</script>


Output: 

4

 

Time Complexity : O(N)

Auxiliary Space: O(|s|)
 

Feeling lost in the world of random DSA topics, wasting time without progress? It’s time for a change! Join our DSA course, where we’ll guide you on an exciting journey to master DSA efficiently and on schedule.
Ready to dive in? Explore our Free Demo Content and join our DSA course, trusted by over 100,000 neveropen!

RELATED ARTICLES

Most Popular

Recent Comments