Given a sorted dictionary (array of words) of an alien language, find the order of characters in the language.
Examples:
Input: words[] = {“baa”, “abcd”, “abca”, “cab”, “cad”}
Output: Order of characters is ‘b’, ‘d’, ‘a’, ‘c’
Explanation: Note that words are sorted and in the given language “baa” comes before “abcd”, therefore ‘b’ is before ‘a’ in output. Similarly we can find other orders.Input: words[] = {“caa”, “aaa”, “aab”}
Output: Order of characters is ‘c’, ‘a’, ‘b’
Alien Dictionary using Topological Sorting:
The idea is to create a directed graph where each character represents a node,and edges between nodes indicate the relative order of characters. If the graph created contains a cycle then a valid order of charcters is not possible else the topological sorting algorithm is applied to the graph to determine the character order.
Following are the detailed steps.
- Create a directed graph g with number of vertices equal to the number of unique characters in alien language, where each character represents a node and edges between nodes indicate the relative order of characters.
- Do following for every pair of adjacent words in given sorted array.
- Let the current pair of words be word1 and word2. One by one compare characters of both words and find the first mismatching characters.
- Create an edge in g from mismatching character of word1 to that of word2.
- If the graph created is DAG (Directed Acyclic Grapgh) then print topological sorting of the above created graph else if the graph contains a cycle then input data is inconsistent and valid order of characters does not exist.
Below is the implementation of above approach:
C++
#include <bits/stdc++.h> using namespace std; // Function to add a directed edge between characters u and // v void addEdge(vector< int > adj[], char u, char v) { adj[u - 'a' ].push_back(v - 'a' ); } // Depth-First Search (DFS) function to check for cycles in // the graph void dfs(vector< int > adj[], vector< int >& col, int curr, bool & isCyclic) { // Mark the current node as visited and in the current // path col[curr] = 1; for ( int i = 0; i < adj[curr].size(); i++) { int x = adj[curr][i]; if (col[x] == 1) { // If the node is already in the current path, a // cycle is detected isCyclic = true ; return ; } else if (col[x] == 0) { // Recursively visit adjacent nodes dfs(adj, col, x, isCyclic); } } // Mark the current node as visited and not in the // current path col[curr] = 2; } // Function to check if a cycle exists in the graph using // DFS bool checkCycle(vector< int > adj[], vector< int > col, int k) { bool isCyclic = false ; for ( int i = 0; i < k; i++) { if (col[i] == 0) { dfs(adj, col, i, isCyclic); } } return isCyclic; } // DFS-based topological sorting utility function void topologicalSortUtil(vector< int > adj[], int u, bool visited[], stack< int >& st) { visited[u] = true ; for ( int i = 0; i < adj[u].size(); i++) { int v = adj[u][i]; if (!visited[v]) { topologicalSortUtil(adj, v, visited, st); } } st.push(u); } // Function to perform topological sorting void topologicalSort(vector< int > adj[], int V) { bool visited[V]; stack< int > st; for ( int i = 0; i < V; i++) { visited[i] = false ; } for ( int i = 0; i < V; i++) { if (!visited[i]) { topologicalSortUtil(adj, i, visited, st); } } // Print the characters in topological order while (!st.empty()) { cout << char (st.top() + 'a' ) << " " ; st.pop(); } } // Function to process the words and find the character // order void printOrder(string words[], int n) { // To track the frequency of characters vector< int > frq(26, 0); // Count of unique characters int k = 0; // Count unique characters and their frequencies for ( int i = 0; i < n; i++) { string s = words[i]; for ( int j = 0; j < s.size(); j++) { frq[s[j] - 97]++; if (frq[s[j] - 97] == 1) k++; } } // Create adjacency list for the graph vector< int > adj[k]; // Build the graph by iterating through adjacent words for ( int i = 0; i < n - 1; i++) { string word1 = words[i]; string word2 = words[i + 1]; int j = 0; while (j < word1.length() && j < word2.length()) { if (word1[j] != word2[j]) { // Add edges based on character order addEdge(adj, word1[j], word2[j]); break ; } j++; } } // Color array for cycle detection vector< int > col(k, 0); if (checkCycle(adj, col, k)) { // Detect and handle cycles cout << "Valid Order is not possible\n" ; return ; } // Perform topological sorting and print character order topologicalSort(adj, k); } // Driver Code int main() { string words[] = { "baa" , "abcd" , "abca" , "cab" , "cad" }; int n = sizeof (words) / sizeof (words[0]); printOrder(words, n); return 0; } |
Python3
# Function to add a directed edge between characters u and v def addEdge(adj, u, v): adj[ ord (u) - ord ( 'a' )].append( ord (v) - ord ( 'a' )) # Depth-First Search (DFS) function to check for cycles in the graph def dfs(adj, col, curr, isCyclic): # Mark the current node as visited and in the current path col[curr] = 1 for x in adj[curr]: if col[x] = = 1 : # If the node is already in the current path, a cycle is detected isCyclic[ 0 ] = True return elif col[x] = = 0 : # Recursively visit adjacent nodes dfs(adj, col, x, isCyclic) # Mark the current node as visited and not in the current path col[curr] = 2 # Function to check if a cycle exists in the graph using DFS def checkCycle(adj, col, k): isCyclic = [ False ] for i in range (k): if col[i] = = 0 : dfs(adj, col, i, isCyclic) return isCyclic[ 0 ] # DFS-based topological sorting utility function def topologicalSortUtil(adj, u, visited, st): visited[u] = True for v in adj[u]: if not visited[v]: topologicalSortUtil(adj, v, visited, st) st.append(u) # Function to perform topological sorting def topologicalSort(adj, V): visited = [ False ] * V st = [] for i in range (V): if not visited[i]: topologicalSortUtil(adj, i, visited, st) # Print the characters in topological order while st: print ( chr (st.pop() + ord ( 'a' )), end = " " ) # Function to process the words and find the character order def printOrder(words): # To track the frequency of characters frq = [ 0 ] * 26 # Count of unique characters k = 0 # Count unique characters and their frequencies for word in words: for char in word: frq[ ord (char) - ord ( 'a' )] + = 1 if frq[ ord (char) - ord ( 'a' )] = = 1 : k + = 1 # Create adjacency list for the graph adj = [[] for _ in range (k)] # Build the graph by iterating through adjacent words for i in range ( len (words) - 1 ): word1, word2 = words[i], words[i + 1 ] j = 0 while j < len (word1) and j < len (word2): if word1[j] ! = word2[j]: # Add edges based on character order addEdge(adj, word1[j], word2[j]) break j + = 1 # Color array for cycle detection col = [ 0 ] * k isCyclic = [ False ] if checkCycle(adj, col, k): # Detect and handle cycles print ( "Valid Order is not possible" ) return # Perform topological sorting and print character order topologicalSort(adj, k) # Driver Code if __name__ = = "__main__" : words = [ "baa" , "abcd" , "abca" , "cab" , "cad" ] printOrder(words) # This code is contributed by Dwaipayan Bandyopadhyay |
b d a c
Time Complexity: O(N+K) , where N is number of given words and Kis number of unique characters in given alphabet.
Auxiliary Space: O(N+K) ,
Ready to dive in? Explore our Free Demo Content and join our DSA course, trusted by over 100,000 neveropen!