Saturday, December 28, 2024
Google search engine
HomeData Modelling & AIAuto-complete feature using Trie

Auto-complete feature using Trie

We are given a Trie with a set of strings stored in it. Now the user types in a prefix of his search query, we need to give him all recommendations to auto-complete his query based on the strings stored in the Trie. We assume that the Trie stores past searches by the users.
For example if the Trie store {“abc”, “abcd”, “aa”, “abbbaba”} and the User types in “ab” then he must be shown {“abc”, “abcd”, “abbbaba”}.
Prerequisite Trie Search and Insert.

Given a query prefix, we search for all words having this query.  

  1. Search for the given query using the standard Trie search algorithm.
  2. If the query prefix itself is not present, return -1 to indicate the same.
  3. If the query is present and is the end of a word in Trie, print query. This can quickly be checked by seeing if the last matching node has isEndWord flag set. We use this flag in Trie to mark the end of word nodes for purpose of searching.
  4. If the last matching node of the query has no children, return.
  5. Else recursively print all nodes under a subtree of last matching node.

Below are few implementations of the above steps. 

C++




// C++ program to demonstrate auto-complete feature
// using Trie data structure.
#include <bits/stdc++.h>
using namespace std;
 
// Alphabet size (# of symbols)
#define ALPHABET_SIZE (26)
 
// Converts key current character into index
// use only 'a' through 'z' and lower case
#define CHAR_TO_INDEX(c) ((int)c - (int)'a')
 
// trie node
struct TrieNode {
    struct TrieNode* children[ALPHABET_SIZE];
 
    // isWordEnd is true if the node represents
    // end of a word
    bool isWordEnd;
};
 
// Returns new trie node (initialized to NULLs)
struct TrieNode* getNode(void)
{
    struct TrieNode* pNode = new TrieNode;
    pNode->isWordEnd = false;
 
    for (int i = 0; i < ALPHABET_SIZE; i++)
        pNode->children[i] = NULL;
 
    return pNode;
}
 
// If not present, inserts key into trie.  If the
// key is prefix of trie node, just marks leaf node
void insert(struct TrieNode* root, const string key)
{
    struct TrieNode* pCrawl = root;
 
    for (int level = 0; level < key.length(); level++) {
        int index = CHAR_TO_INDEX(key[level]);
        if (!pCrawl->children[index])
            pCrawl->children[index] = getNode();
 
        pCrawl = pCrawl->children[index];
    }
 
    // mark last node as leaf
    pCrawl->isWordEnd = true;
}
 
// Returns 0 if current node has a child
// If all children are NULL, return 1.
bool isLastNode(struct TrieNode* root)
{
    for (int i = 0; i < ALPHABET_SIZE; i++)
        if (root->children[i])
            return 0;
    return 1;
}
 
// Recursive function to print auto-suggestions for given
// node.
void suggestionsRec(struct TrieNode* root,
                    string currPrefix)
{
    // found a string in Trie with the given prefix
    if (root->isWordEnd)
        cout << currPrefix << endl;
 
    for (int i = 0; i < ALPHABET_SIZE; i++)
        if (root->children[i]) {
            // child node character value
            char child = 'a' + i;
            suggestionsRec(root->children[i],
                           currPrefix + child);
        }
}
 
// print suggestions for given query prefix.
int printAutoSuggestions(TrieNode* root, const string query)
{
    struct TrieNode* pCrawl = root;
    for (char c : query) {
        int ind = CHAR_TO_INDEX(c);
 
        // no string in the Trie has this prefix
        if (!pCrawl->children[ind])
            return 0;
 
        pCrawl = pCrawl->children[ind];
    }
    // If prefix is present as a word, but
    // there is no subtree below the last
    // matching node.
    if (isLastNode(pCrawl)) {
        cout << query << endl;
        return -1;
    }
    suggestionsRec(pCrawl, query);
    return 1;
}
 
// Driver Code
int main()
{
    struct TrieNode* root = getNode();
    insert(root, "hello");
    insert(root, "dog");
    insert(root, "hell");
    insert(root, "cat");
    insert(root, "a");
    insert(root, "hel");
    insert(root, "help");
    insert(root, "helps");
    insert(root, "helping");
    int comp = printAutoSuggestions(root, "hel");
 
    if (comp == -1)
        cout << "No other strings found with this prefix\n";
 
    else if (comp == 0)
        cout << "No string found with this prefix\n";
 
    return 0;
}


Python3




# Python3 program to demonstrate auto-complete
# feature using Trie data structure.
# Note: This is a basic implementation of Trie
# and not the most optimized one.
 
 
class TrieNode():
    def __init__(self):
        # Initialising one node for trie
        self.children = {}
        self.last = False
 
 
class Trie():
    def __init__(self):
 
        # Initialising the trie structure.
        self.root = TrieNode()
 
    def formTrie(self, keys):
 
        # Forms a trie structure with the given set of strings
        # if it does not exists already else it merges the key
        # into it by extending the structure as required
        for key in keys:
            self.insert(key)  # inserting one key to the trie.
 
    def insert(self, key):
 
        # Inserts a key into trie if it does not exist already.
        # And if the key is a prefix of the trie node, just
        # marks it as leaf node.
        node = self.root
 
        for a in key:
            if not node.children.get(a):
                node.children[a] = TrieNode()
 
            node = node.children[a]
 
        node.last = True
 
    def suggestionsRec(self, node, word):
 
        # Method to recursively traverse the trie
        # and return a whole word.
        if node.last:
            print(word)
 
        for a, n in node.children.items():
            self.suggestionsRec(n, word + a)
 
    def printAutoSuggestions(self, key):
 
        # Returns all the words in the trie whose common
        # prefix is the given key thus listing out all
        # the suggestions for autocomplete.
        node = self.root
 
        for a in key:
            # no string in the Trie has this prefix
            if not node.children.get(a):
                return 0
            node = node.children[a]
 
        # If prefix is present as a word, but
        # there is no subtree below the last
        # matching node.
        if not node.children:
            return -1
 
        self.suggestionsRec(node, key)
        return 1
 
 
# Driver Code
keys = ["hello", "dog", "hell", "cat", "a",
        "hel", "help", "helps", "helping"# keys to form the trie structure.
key = "h"  # key for autocomplete suggestions.
 
# creating trie object
t = Trie()
 
# creating the trie structure with the
# given set of strings.
t.formTrie(keys)
 
# autocompleting the given key using
# our trie structure.
comp = t.printAutoSuggestions(key)
 
if comp == -1:
    print("No other strings found with this prefix\n")
elif comp == 0:
    print("No string found with this prefix\n")
 
# This code is contributed by amurdia and muhammedrijnas


Java




import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
 
public class Trie {
 
    public class TrieNode {
        Map<Character, TrieNode> children;
        char c;
        boolean isWord;
 
        public TrieNode(char c) {
            this.c = c;
            children = new HashMap<>();
        }
 
        public TrieNode() {
            children = new HashMap<>();
        }
 
        public void insert(String word) {
            if (word == null || word.isEmpty())
                return;
            char firstChar = word.charAt(0);
            TrieNode child = children.get(firstChar);
            if (child == null) {
                child = new TrieNode(firstChar);
                children.put(firstChar, child);
            }
 
            if (word.length() > 1)
                child.insert(word.substring(1));
            else
                child.isWord = true;
        }
 
    }
 
    TrieNode root;
 
    public Trie(List<String> words) {
        root = new TrieNode();
        for (String word : words)
            root.insert(word);
 
    }
 
    public boolean find(String prefix, boolean exact) {
        TrieNode lastNode = root;
        for (char c : prefix.toCharArray()) {
            lastNode = lastNode.children.get(c);
            if (lastNode == null)
                return false;
        }
        return !exact || lastNode.isWord;
    }
 
    public boolean find(String prefix) {
        return find(prefix, false);
    }
 
    public void suggestHelper(TrieNode root, List<String> list, StringBuffer curr) {
        if (root.isWord) {
            list.add(curr.toString());
        }
 
        if (root.children == null || root.children.isEmpty())
            return;
 
        for (TrieNode child : root.children.values()) {
            suggestHelper(child, list, curr.append(child.c));
            curr.setLength(curr.length() - 1);
        }
    }
 
    public List<String> suggest(String prefix) {
        List<String> list = new ArrayList<>();
        TrieNode lastNode = root;
        StringBuffer curr = new StringBuffer();
        for (char c : prefix.toCharArray()) {
            lastNode = lastNode.children.get(c);
            if (lastNode == null)
                return list;
            curr.append(c);
        }
        suggestHelper(lastNode, list, curr);
        return list;
    }
   
 
    public static void main(String[] args) {
        List<String> words = List.of("hello", "dog", "hell", "cat", "a", "hel","help","helps","helping");
        Trie trie = new Trie(words);
     
        System.out.println(trie.suggest("hel"));
    }
 
}


C#




// C# program to demonstrate auto-complete feature
// using Trie data structure.
using System;
using System.Collections.Generic;
 
// trie node
class TrieNode {
    public TrieNode[] children = new TrieNode[26];
    // isWordEnd is true if the node represents
    // end of a word
    public bool isWordEnd = false;
}
 
class Trie {
    TrieNode root;
    public Trie() {
        root = new TrieNode();
    }
 
    // If not present, inserts key into trie.  If the
    // key is prefix of trie node, just marks leaf node
    public void Insert(string key) {
        TrieNode pCrawl = root;
        for (int i = 0; i < key.Length; i++) {
            int index = key[i] - 'a';
            if (pCrawl.children[index] == null) {
                pCrawl.children[index] = new TrieNode();
            }
            pCrawl = pCrawl.children[index];
        }
        // mark last node as leaf
        pCrawl.isWordEnd = true;
    }
 
    // Returns 0 if current node has a child
    // If all children are NULL, return 1.
    bool IsLastNode(TrieNode root) {
        for (int i = 0; i < 26; i++) {
            if (root.children[i] != null) {
                return false;
            }
        }
        return true;
    }
     
    // Recursive function to print auto-suggestions for given
    // node.
    void suggestionsRec(TrieNode root, string currPrefix) {
        // found a string in Trie with the given prefix
        if (root.isWordEnd) {
            Console.WriteLine(currPrefix);
        }
        for (int i = 0; i < 26; i++) {
            if (root.children[i] != null) {
                // child node character value
                char c = (char)(i + 'a');
                suggestionsRec(root.children[i], currPrefix + c);
            }
        }
    }
 
    // print suggestions for given query prefix.
    public int printAutoSuggestions(string query) {
        TrieNode pCrawl = root;
        for (int i = 0; i < query.Length; i++) {
            int index = query[i] - 'a';
            // no string in the Trie has this prefix
            if (pCrawl.children[index] == null) {
                return 0;
            }
            pCrawl = pCrawl.children[index];
        }
        // If prefix is present as a word, but
        // there is no subtree below the last
        // matching node.
        if (IsLastNode(pCrawl)) {
            Console.WriteLine(query);
            return -1;
        }
        suggestionsRec(pCrawl, query);
        return 1;
    }
}
 
// Driver Code
class Program {
    static void Main(string[] args) {
        Trie trie = new Trie();
        trie.Insert("hello");
        trie.Insert("dog");
        trie.Insert("hell");
        trie.Insert("cat");
        trie.Insert("a");
        trie.Insert("hel");
        trie.Insert("help");
        trie.Insert("helps");
        trie.Insert("helping");
        int comp = trie.printAutoSuggestions("hel");
         
        if (comp == -1) {
            Console.WriteLine("No other strings found with this prefix");
        }
        else if (comp == 0) {
            Console.WriteLine("No string found with this prefix");
        }
    }
}
 
// This code is contributed by Aman Kumar


Javascript




class TrieNode {
  constructor() {
    this.children = {};
    this.isWord = false;
  }
}
 
class Trie {
  constructor() {
    this.root = new TrieNode();
  }
 
  insert(word) {
    let node = this.root;
    for (let i = 0; i < word.length; i++) {
      if (!node.children[word[i]]) {
        node.children[word[i]] = new TrieNode();
      }
      node = node.children[word[i]];
    }
    node.isWord = true;
  }
 
  suggestHelper(root, list, curr) {
    if (root.isWord) {
      list.push(curr);
    }
    if (!Object.keys(root.children).length) {
      return;
    }
    for (let child in root.children) {
      this.suggestHelper(root.children[child], list, curr + child);
    }
  }
 
  suggest(prefix) {
    let node = this.root;
    let curr = "";
    for (let i = 0; i < prefix.length; i++) {
      if (!node.children[prefix[i]]) {
        return [];
      }
      node = node.children[prefix[i]];
      curr += prefix[i];
    }
    let list = [];
    this.suggestHelper(node, list, curr);
    return list;
  }
}
 
let words = ["hello", "dog", "hell", "cat", "a", "hel","help","helps","helping"];
let trie = new Trie();
words.forEach((word) => trie.insert(word));
console.log(trie.suggest("hel"));
 
// This code is contributed by Shivam Tiwari


Output

hel
hell
hello
help
helping
helps

Time Complexity: O(N*L) where N is the number of words in the trie and L is the length of the longest word in the trie.
Auxiliary Space: O(N*L+N * ALPHABET_SIZE)

How can we improve this? 
The number of matches might just be too large so we have to be selective while displaying them. We can restrict ourselves to display only the relevant results. By relevant, we can consider the past search history and show only the most searched matching strings as relevant results. 
Store another value for the each node where isleaf=True which contains the number of hits for that query search. For example if “hat” is searched 10 times, then we store this 10 at the last node for “hat”. Now when we want to show the recommendations, we display the top k matches with the highest hits. Try to implement this on your own.
This article is contributed by Hemang Sarkar. If you like neveropen and would like to contribute, you can also write an article using write.geeksforgeeks.org or mail your article to review-team@geeksforgeeks.org. See your article appearing on the neveropen main page and help other Geeks.
Please write comments if you find anything incorrect, or you want to share more information about the topic discussed above.
 

Feeling lost in the world of random DSA topics, wasting time without progress? It’s time for a change! Join our DSA course, where we’ll guide you on an exciting journey to master DSA efficiently and on schedule.
Ready to dive in? Explore our Free Demo Content and join our DSA course, trusted by over 100,000 neveropen!

RELATED ARTICLES

Most Popular

Recent Comments