Friday, January 3, 2025
Google search engine
HomeData Modelling & AIAlien Dictionary

Alien Dictionary

Given a sorted dictionary (array of words) of an alien language, find the order of characters in the language.

Examples:  

Input: words[] = {“baa”, “abcd”, “abca”, “cab”, “cad”}
Output: Order of characters is ‘b’, ‘d’, ‘a’, ‘c’
Explanation: Note that words are sorted and in the given language “baa” comes before “abcd”, therefore ‘b’ is before ‘a’ in output. Similarly we can find other orders.

Input: words[] = {“caa”, “aaa”, “aab”}
Output: Order of characters is ‘c’, ‘a’, ‘b’

Recommended Practice

Alien Dictionary using Topological Sorting:

The idea is to create a directed graph where each character represents a node,and edges between nodes indicate the relative order of characters. If the graph created contains a cycle then a valid order of charcters is not possible else the topological sorting algorithm is applied to the graph to determine the character order.

Following are the detailed steps.

  • Create a directed graph g with number of vertices equal to the number of unique characters in alien language, where each character represents a node and edges between nodes indicate the relative order of characters.
  • Do following for every pair of adjacent words in given sorted array.
    • Let the current pair of words be word1 and word2. One by one compare characters of both words and find the first mismatching characters. 
    • Create an edge in g from mismatching character of word1 to that of word2.
  • If the graph created is DAG (Directed Acyclic Grapgh) then print topological sorting of the above created graph else if the graph contains a cycle then input data is inconsistent and valid order of characters does not exist.

Below is the implementation of above approach:

C++




#include <bits/stdc++.h>
using namespace std;
 
// Function to add a directed edge between characters u and
// v
void addEdge(vector<int> adj[], char u, char v)
{
    adj[u - 'a'].push_back(v - 'a');
}
 
// Depth-First Search (DFS) function to check for cycles in
// the graph
void dfs(vector<int> adj[], vector<int>& col, int curr,
         bool& isCyclic)
{
    // Mark the current node as visited and in the current
    // path
    col[curr] = 1;
   
    for (int i = 0; i < adj[curr].size(); i++) {
        int x = adj[curr][i];
        if (col[x] == 1) {
 
            // If the node is already in the current path, a
            // cycle is detected
            isCyclic = true;
           
            return;
        }
        else if (col[x] == 0) {
 
            // Recursively visit adjacent nodes
            dfs(adj, col, x, isCyclic);
        }
    }
 
    // Mark the current node as visited and not in the
    // current path
    col[curr] = 2;
}
 
// Function to check if a cycle exists in the graph using
// DFS
bool checkCycle(vector<int> adj[], vector<int> col, int k)
{
    bool isCyclic = false;
    for (int i = 0; i < k; i++) {
        if (col[i] == 0) {
            dfs(adj, col, i, isCyclic);
        }
    }
    return isCyclic;
}
 
// DFS-based topological sorting utility function
void topologicalSortUtil(vector<int> adj[], int u,
                         bool visited[], stack<int>& st)
{
    visited[u] = true;
    for (int i = 0; i < adj[u].size(); i++) {
        int v = adj[u][i];
        if (!visited[v]) {
            topologicalSortUtil(adj, v, visited, st);
        }
    }
    st.push(u);
}
 
// Function to perform topological sorting
void topologicalSort(vector<int> adj[], int V)
{
    bool visited[V];
    stack<int> st;
 
    for (int i = 0; i < V; i++) {
        visited[i] = false;
    }
 
    for (int i = 0; i < V; i++) {
        if (!visited[i]) {
            topologicalSortUtil(adj, i, visited, st);
        }
    }
 
    // Print the characters in topological order
    while (!st.empty()) {
        cout << char(st.top() + 'a') << " ";
        st.pop();
    }
}
 
// Function to process the words and find the character
// order
void printOrder(string words[], int n)
{
    // To track the frequency of characters
    vector<int> frq(26, 0);
 
    // Count of unique characters
    int k = 0;
 
    // Count unique characters and their frequencies
    for (int i = 0; i < n; i++) {
        string s = words[i];
        for (int j = 0; j < s.size(); j++) {
            frq[s[j] - 97]++;
            if (frq[s[j] - 97] == 1)
                k++;
        }
    }
 
    // Create adjacency list for the graph
    vector<int> adj[k];
 
    // Build the graph by iterating through adjacent words
    for (int i = 0; i < n - 1; i++) {
        string word1 = words[i];
        string word2 = words[i + 1];
 
        int j = 0;
        while (j < word1.length() && j < word2.length()) {
            if (word1[j] != word2[j]) {
 
                // Add edges based on character order
                addEdge(adj, word1[j], word2[j]);
 
                break;
            }
            j++;
        }
    }
 
    // Color array for cycle detection
    vector<int> col(k, 0);
    if (checkCycle(adj, col, k)) {
        // Detect and handle cycles
        cout << "Valid Order is not possible\n";
        return;
    }
 
    // Perform topological sorting and print character order
    topologicalSort(adj, k);
}
 
// Driver Code
int main()
{
    string words[]
        = { "baa", "abcd", "abca", "cab", "cad" };
    int n = sizeof(words) / sizeof(words[0]);
 
    printOrder(words, n);
 
    return 0;
}


Python3




# Function to add a directed edge between characters u and v
def addEdge(adj, u, v):
    adj[ord(u) - ord('a')].append(ord(v) - ord('a'))
 
# Depth-First Search (DFS) function to check for cycles in the graph
def dfs(adj, col, curr, isCyclic):
    # Mark the current node as visited and in the current path
    col[curr] = 1
 
    for x in adj[curr]:
        if col[x] == 1:
            # If the node is already in the current path, a cycle is detected
            isCyclic[0] = True
            return
        elif col[x] == 0:
            # Recursively visit adjacent nodes
            dfs(adj, col, x, isCyclic)
 
    # Mark the current node as visited and not in the current path
    col[curr] = 2
 
# Function to check if a cycle exists in the graph using DFS
def checkCycle(adj, col, k):
    isCyclic = [False]
    for i in range(k):
        if col[i] == 0:
            dfs(adj, col, i, isCyclic)
    return isCyclic[0]
 
# DFS-based topological sorting utility function
def topologicalSortUtil(adj, u, visited, st):
    visited[u] = True
    for v in adj[u]:
        if not visited[v]:
            topologicalSortUtil(adj, v, visited, st)
    st.append(u)
 
# Function to perform topological sorting
def topologicalSort(adj, V):
    visited = [False] * V
    st = []
 
    for i in range(V):
        if not visited[i]:
            topologicalSortUtil(adj, i, visited, st)
 
    # Print the characters in topological order
    while st:
        print(chr(st.pop() + ord('a')), end=" ")
 
# Function to process the words and find the character order
def printOrder(words):
    # To track the frequency of characters
    frq = [0] * 26
 
    # Count of unique characters
    k = 0
 
    # Count unique characters and their frequencies
    for word in words:
        for char in word:
            frq[ord(char) - ord('a')] += 1
            if frq[ord(char) - ord('a')] == 1:
                k += 1
 
    # Create adjacency list for the graph
    adj = [[] for _ in range(k)]
 
    # Build the graph by iterating through adjacent words
    for i in range(len(words) - 1):
        word1, word2 = words[i], words[i + 1]
        j = 0
        while j < len(word1) and j < len(word2):
            if word1[j] != word2[j]:
                # Add edges based on character order
                addEdge(adj, word1[j], word2[j])
                break
            j += 1
 
    # Color array for cycle detection
    col = [0] * k
    isCyclic = [False]
    if checkCycle(adj, col, k):
        # Detect and handle cycles
        print("Valid Order is not possible")
        return
 
    # Perform topological sorting and print character order
    topologicalSort(adj, k)
 
# Driver Code
if __name__ == "__main__":
    words = ["baa", "abcd", "abca", "cab", "cad"]
    printOrder(words)
# This code is contributed by Dwaipayan Bandyopadhyay


Output

b d a c 


Time Complexity: O(N+K) , where N is number of given words and Kis number of unique characters in given alphabet.
Auxiliary Space: O(N+K) ,

Feeling lost in the world of random DSA topics, wasting time without progress? It’s time for a change! Join our DSA course, where we’ll guide you on an exciting journey to master DSA efficiently and on schedule.
Ready to dive in? Explore our Free Demo Content and join our DSA course, trusted by over 100,000 neveropen!

RELATED ARTICLES

Most Popular

Recent Comments