Given a very long list of URLs, find out last unique URL. Only one traversal of all URLs is allowed. Examples:
Input: https://www.geeksforgeeks.org https://www.geeksforgeeks.org/quiz-corner-gq/ http://qa.geeksforgeeks.org https://practice.geeksforgeeks.org https://ide.geeksforgeeks.org http://write.geeksforgeeks.org https://www.geeksforgeeks.org/quiz-corner-gq/ https://practice.geeksforgeeks.org https://ide.geeksforgeeks.org https://www.geeksforgeeks.org/quiz-corner-gq/ http://qa.geeksforgeeks.org https://practice.geeksforgeeks.org Output: http://write.geeksforgeeks.org
We can solve this problem in one traversal by using Trie with a Doubly Linked List (We can insert and delete in O(1) time). The idea is to insert all URLs into the Trie one by one and check if it is duplicate or not. To know if we have previously encountered the URL, we need to mark the last node of every URL as leaf node. If we encounter a URL for the first time, we insert it into the doubly Linked list and maintain a pointer to that node in linked list in leaf node of the trie. If we encounter a URL that is already in the trie and has pointer to the url in the linked list, we delete the node from the linked list and set its pointer in the trie to null. After all URLs are processed, linked list will only contain the URLs that are distinct and the node at the beginning of the linked list will be last unique URL.
CPP
// C++ program to print distinct URLs using Trie // and Doubly Linked List #include <bits/stdc++.h> using namespace std; // Alphabet size (# of symbols) const int ALPHABET_SIZE = 256; // A linked list node struct DLLNode { string data; DLLNode *next, *prev; }; // trie node struct TrieNode { TrieNode* children[ALPHABET_SIZE]; // isLeaf is true if the node represents // end of a word bool isLeaf; DLLNode* LLptr; }; /* Given a reference (pointer to pointer) to the head of a list and an int, inserts a new node on the front of the list. */ void push(DLLNode*& head_ref, string new_data) { DLLNode* new_node = new DLLNode; // put in the data new_node->data = new_data; // Make next of new node as head and previous // as NULL new_node->next = (head_ref); new_node->prev = NULL; // change prev of head node to new node if (head_ref != NULL) head_ref->prev = new_node; // move the head to point to the new node head_ref = new_node; } /* Function to delete a node in a Doubly Linked List. head_ref --> pointer to head node pointer. del --> pointer to node to be deleted. */ void deleteNode(DLLNode*& head_ref, DLLNode* del) { // base case if (head_ref == NULL || del == NULL) return ; // If node to be deleted is head node if (head_ref == del) head_ref = del->next; // Change next only if node to be deleted is // NOT the last node if (del->next != NULL) del->next->prev = del->prev; // Change prev only if node to be deleted is // NOT the first node if (del->prev != NULL) del->prev->next = del->next; // Finally, free the memory occupied by del delete (del); return ; } // Returns new trie node (initialized to NULLs) TrieNode* getNewTrieNode( void ) { TrieNode* pNode = new TrieNode; if (pNode) { pNode->isLeaf = false ; for ( int i = 0; i < ALPHABET_SIZE; i++) pNode->children[i] = NULL; pNode->LLptr = NULL; } return pNode; } // If not present, inserts key into trie // If the key is prefix of trie node, just marks leaf node void insert(TrieNode* root, string key, DLLNode*& head) { int index; TrieNode* pCrawl = root; for ( int level = 0; level < key.length(); level++) { index = int (key[level]); if (!pCrawl->children[index]) pCrawl->children[index] = getNewTrieNode(); pCrawl = pCrawl->children[index]; } if (pCrawl->isLeaf) { // cout << "Duplicate Found " << key << endl; // delete from linked list if (pCrawl->LLptr) deleteNode(head, pCrawl->LLptr); pCrawl->LLptr = NULL; } else { // mark last node as leaf pCrawl->isLeaf = true ; // insert to linked list push(head, key); pCrawl->LLptr = head; } } // Driver function int main() { string urls[] TrieNode* root = getNewTrieNode(); // Start with the empty list DLLNode* head = NULL; int n = sizeof (urls) / sizeof (urls[0]); // Construct Trie from given URLs for ( int i = 0; i < n; i++) insert(root, urls[i], head); // head of linked list will point to last // distinct URL cout << head->data << endl; return 0; } |
Python3
# Python program to print distinct URLs using Trie # and Doubly Linked List # Alphabet size (# of symbols) ALPHABET_SIZE = 256 # A linked list node class DLLNode: def __init__( self , data): self .data = data self . next = None self .prev = None # trie node class TrieNode: def __init__( self ): self .children = [ None ] * ALPHABET_SIZE # isLeaf is true if the node represents # end of a word self .isLeaf = False self .LLptr = None # Given a reference (pointer to pointer) to the # head of a list and an int, inserts a new node # on the front of the list. def push(head_ref, new_data): # put in the data new_node = DLLNode(new_data) # Make next of new node as head and previous # as NULL new_node. next = head_ref # change prev of head node to new node if head_ref is not None : head_ref.prev = new_node # move the head to point to the new node head_ref = new_node return head_ref # Function to delete a node in a Doubly Linked List. # head_ref --> pointer to head node pointer. # del --> pointer to node to be deleted. def deleteNode(head_ref, del_node): # base case if head_ref is None or del_node is None : return # If node to be deleted is head node if head_ref = = del_node: head_ref = del_node. next # Change next only if node to be deleted is # NOT the last node if del_node. next is not None : del_node. next .prev = del_node.prev # Change prev only if node to be deleted is # NOT the first node if del_node.prev is not None : del_node.prev. next = del_node. next # Finally, free the memory occupied by del del del_node return head_ref # Returns new trie node (initialized to NULLs) def getNewTrieNode(): pNode = TrieNode() pNode.isLeaf = False pNode.LLptr = None return pNode # If not present, inserts key into trie # If the key is prefix of trie node, just marks leaf node def insert(root, key, head): pCrawl = root for level in range ( len (key)): index = ord (key[level]) if pCrawl.children[index] is None : pCrawl.children[index] = getNewTrieNode() pCrawl = pCrawl.children[index] if pCrawl.isLeaf: # cout << "Duplicate Found " << key << endl; # delete from linked list if pCrawl.LLptr is not None : head = deleteNode(head, pCrawl.LLptr) pCrawl.LLptr = None else : # mark last node as leaf pCrawl.isLeaf = True # insert to linked list head = push(head, key) pCrawl.LLptr = head return head # Driver function urls = [ ] root = getNewTrieNode() # Start with the empty list head = None # Construct Trie from given URLs for url in urls: head = insert(root, url, head) # head of linked list will point to last # distinct URL print (head.data) # This code is contributed by Aman Kumar |
Javascript
const ALPHABET_SIZE = 256; // Alphabet size (# of symbols) //DLLNode class is a linked list node, which stores data and the next and previous node class DLLNode { constructor(data) { //constructor for DLLNode, which takes data as an argument this .data = data; this .next = null ; this .prev = null ; } } //TrieNode class is a trie node, which stores an array of children for each node, a boolean for isLeaf, and a pointer to a doubly linked list node class TrieNode { constructor() { this .children = new Array(ALPHABET_SIZE).fill( null ); //array of size ALPHABET_SIZE filled with null this .isLeaf = false ; this .LLptr = null ; //pointer to doubly linked list node } } //push function which takes a reference to the head of a list and an int, inserts a new node on the front of the list function push(head_ref, new_data) { const new_node = new DLLNode(new_data); //creates a new node of type DLLNode with data as the new_data argument new_node.next = head_ref; //sets the next node of the new node to the head node passed in //change the prev of the head node to the new node if the head node is not null if (head_ref !== null ) { head_ref.prev = new_node; } head_ref = new_node; //move the head of the list to point to the new node return head_ref; } //deleteNode function which takes a reference to the head of a list and a node to be deleted, deletes the node from the list function deleteNode(head_ref, del_node) { //base case if head or node to be deleted is null if (head_ref === null || del_node === null ) { return ; } //if node to be deleted is the head node if (head_ref === del_node) { head_ref = del_node.next; } //change next if node to be deleted is not the last node if (del_node.next !== null ) { del_node.next.prev = del_node.prev; } //change prev if node to be deleted is not the first node if (del_node.prev !== null ) { del_node.prev.next = del_node.next; } return head_ref; } //getNewTrieNode function which returns a new trie node initialized to nulls function getNewTrieNode() { const pNode = new TrieNode(); pNode.isLeaf = false ; pNode.LLptr = null ; return pNode; } //insert function which takes a root node, a key, and a head node, and inserts the key into the trie function insert(root, key, head) { let pCrawl = root; //creates a pointer to the root node //loops through the length of the key for (let level = 0; level < key.length; level++) { const index = key.charCodeAt(level); //gets the ascii value of the character at the current level //if the node at the current level is null, create a new trie node if (pCrawl.children[index] === null ) { pCrawl.children[index] = getNewTrieNode(); } pCrawl = pCrawl.children[index]; //moves the pointer to the next node in the trie } //if the node is already a leaf node if (pCrawl.isLeaf) { //if the LLptr is not null, delete the node from the linked list if (pCrawl.LLptr !== null ) { head = deleteNode(head, pCrawl.LLptr); } pCrawl.LLptr = null ; } else { //mark last node as leaf pCrawl.isLeaf = true ; //insert to linked list head = push(head, key); pCrawl.LLptr = head; } return head; } //list of URLs const urls = [ ]; const root = getNewTrieNode(); //create a root node let head = null ; //head of linked list //loop through the URLs and insert them into the trie for (const url of urls) { head = insert(root, url, head); } //head of linked list will point to the last distinct URL console.log(head.data); |
C#
// C# program to print distinct URLs using Trie // and Doubly Linked List using System; using System.Collections.Generic; // trie node class TrieNode { // Alphabet size (# of symbols) const int ALPHABET_SIZE = 256; public TrieNode[] children = new TrieNode[ALPHABET_SIZE]; // isLeaf is true if the node represents // end of a word public bool isLeaf; public DLLNode LLptr; public TrieNode() { isLeaf = false ; LLptr = null ; for ( int i = 0; i < ALPHABET_SIZE; i++) children[i] = null ; } } // A linked list node class DLLNode { public string data; public DLLNode next, prev; } class Program { // If not present, inserts key into trie // If the key is prefix of trie node, just marks leaf // node static void Insert(TrieNode root, string key, ref DLLNode head) { int index; TrieNode pCrawl = root; for ( int level = 0; level < key.Length; level++) { index = ( int )key[level]; if (pCrawl.children[index] == null ) pCrawl.children[index] = new TrieNode(); pCrawl = pCrawl.children[index]; } if (pCrawl.isLeaf) { // delete from linked list if (pCrawl.LLptr != null ) DeleteNode( ref head, pCrawl.LLptr); pCrawl.LLptr = null ; } else { // mark last node as leaf pCrawl.isLeaf = true ; // insert to linked list Push( ref head, key); pCrawl.LLptr = head; } } /* Given a reference (pointer to pointer) to the head of a list and an int, inserts a new node on the front of the list. */ static void Push( ref DLLNode head_ref, string new_data) { DLLNode new_node = new DLLNode(); // put in the data new_node.data = new_data; // Make next of new node as head and previous // as NULL new_node.next = head_ref; new_node.prev = null ; // change prev of head node to new node if (head_ref != null ) head_ref.prev = new_node; // move the head to point to the new node head_ref = new_node; } /* Function to delete a node in a Doubly Linked List. head_ref --> pointer to head node pointer. del --> pointer to node to be deleted. */ static void DeleteNode( ref DLLNode head_ref, DLLNode del) { // base case if (head_ref == null || del == null ) return ; // If node to be deleted is head node if (head_ref == del) head_ref = del.next; // Change next only if node to be deleted is // NOT the last node if (del.next != null ) del.next.prev = del.prev; // Change prev only if node to be deleted is // NOT the first node if (del.prev != null ) del.prev.next = del.next; // Finally, free the memory occupied by del del = null ; } // Driver function static void Main() { string [] urls TrieNode root = new TrieNode(); DLLNode head = null ; int n = urls.Length; for ( int i = 0; i < n; i++) Insert(root, urls[i], ref head); Console.WriteLine(head.data); } } |
Java
import java.util.*; class Trie { private static final int ALPHABET_SIZE = 256 ; // Alphabet size (# of symbols) // DLLNode class is a linked list node, which stores // data and the next and previous node private static class DLLNode { String data; // data stored in the node DLLNode prev; // pointer to the previous node DLLNode next; // pointer to the next node DLLNode(String data) { // constructor for DLLNode, which takes data as an // argument this .data = data; this .prev = null ; this .next = null ; } } // TrieNode class is a trie node, which stores an array // of children for each node, a boolean for isLeaf, and // a pointer to a doubly linked list node private static class TrieNode { TrieNode[] children; // array of size ALPHABET_SIZE // filled with null boolean isLeaf; // boolean to mark if the node is a // leaf node DLLNode LLptr; // pointer to doubly linked list node TrieNode() { children = new TrieNode[ALPHABET_SIZE]; isLeaf = false ; LLptr = null ; } } // push function which takes a reference to the head of // a list and a string, inserts a new node at the front // of the list private static DLLNode push(DLLNode headRef, String newData) { DLLNode newNode = new DLLNode( newData); // creates a new node of type DLLNode // with data as the new_data argument newNode.next = headRef; // sets the next node of the new node // to the head node passed in // change the prev of the head node to the new node // if the head node is not null if (headRef != null ) { headRef.prev = newNode; } headRef = newNode; // move the head of the list to // point to the new node return headRef; } // deleteNode function which takes a reference to the // head of a list and a node to be deleted, deletes the // node from the list private static DLLNode deleteNode(DLLNode headRef, DLLNode delNode) { // base case if head or node to be deleted is null if (headRef == null || delNode == null ) { return headRef; } // if node to be deleted is the head node if (headRef == delNode) { headRef = delNode.next; } // change next if node to be deleted is not the last // node if (delNode.next != null ) { delNode.next.prev = delNode.prev; } // change prev if node to be deleted is not the // first node if (delNode.prev != null ) { delNode.prev.next = delNode.next; } return headRef; } // getNewTrieNode function which returns a new trie node // initialized to nulls private static TrieNode getNewTrieNode() { TrieNode pNode = new TrieNode(); pNode.isLeaf = false ; pNode.LLptr = null ; return pNode; } // insert function which takes a root node, a key, and a // head node, and inserts the key into the trie static DLLNode insert(TrieNode root, String key, DLLNode head) { TrieNode pCrawl = root; // create pointer to root node // loop through the levels for ( int level = 0 ; level < key.length(); level++) { int index = ( int )key.charAt(level); if (pCrawl.children[index] == null ) { pCrawl.children[index] = getNewTrieNode(); } // move pointer to next node pCrawl = pCrawl.children[index]; } // if the node is already a leaf node if (pCrawl.isLeaf) { if (pCrawl.LLptr != null ) { // if the LLptr is not null, delete the node // from the linked list head = deleteNode(head, pCrawl.LLptr); } pCrawl.LLptr = null ; } else { // mark last node as leaf pCrawl.isLeaf = true ; head = push(head, key); pCrawl.LLptr = head; } return head; } // Driver code public static void main(String[] args) { String[] urls = { }; TrieNode root = getNewTrieNode(); DLLNode head = null ; // Function calls for (String url : urls) { head = insert(root, url, head); } System.out.println(head.data); } } |
http://write.geeksforgeeks.org
Time Complexity: O(n*m) where m is the maximum length of a URL, and n is the number of URLs.
Auxiliary Space: O(ALPHABET_SIZE * m * n)
This article is contributed by Aditya Goel. If you like neveropen and would like to contribute, you can also write an article using write.geeksforgeeks.org or mail your article to review-team@geeksforgeeks.org. See your article appearing on the neveropen main page and help other Geeks. Please write comments if you find anything incorrect, or you want to share more information about the topic discussed above.
Ready to dive in? Explore our Free Demo Content and join our DSA course, trusted by over 100,000 neveropen!