Given a string str which represents a sentence, the task is to remove the duplicate words from sentences using regular expression in java.
Examples:
Input: str = “Good bye bye world world”
Output: Good bye world
Explanation:
We remove the second occurrence of bye and world from Good bye bye world world
Input: str = “Ram went went to to to his home”
Output: Ram went to his home
Explanation:
We remove the second occurrence of went and the second and third occurrences of to from Ram went went to to to his home.
Input: str = “Hello hello world world”
Output: Hello world
Explanation:
We remove the second occurrence of hello and world from Hello hello world world.
Approach
- Get the sentence.
- Form a regular expression to remove duplicate words from sentences.
regex = "\\b(\\w+)(?:\\W+\\1\\b)+";
- The details of the above regular expression can be understood as:
- “\\b”: A word boundary. Boundaries are needed for special cases. For example, in “My thesis is great”, “is” wont be matched twice.
- “\\w+” A word character: [a-zA-Z_0-9]
- “\\W+”: A non-word character: [^\w]
- “\\1”: Matches whatever was matched in the 1st group of parentheses, which in this case is the (\w+)
- “+”: Match whatever it’s placed after 1 or more times
- Match the sentence with the Regex. In Java, this can be done using Pattern.matcher().
- return the modified sentence.
Below is the implementation of the above approach:
C++
// C++ program to remove duplicate words // using Regular Expression or ReGex. #include <iostream> #include <regex> using namespace std; // Function to validate the sentence // and remove the duplicate words string removeDuplicateWords(string s) { // Regex to matching repeated words. const regex pattern( "\\b(\\w+)(?:\\W+\\1\\b)+" , regex_constants::icase); string answer = s; for ( auto it = sregex_iterator(s.begin(), s.end(), pattern); it != sregex_iterator(); it++) { // flag type for determining the matching behavior // here it is for matches on 'string' objects smatch match; match = *it; answer.replace(answer.find(match.str(0)), match.str(0).length(), match.str(1)); } return answer; } // Driver Code int main() { // Test Case: 1 string str1 = "Good bye bye world world" ; cout << removeDuplicateWords(str1) << endl; // Test Case: 2 string str2 = "Ram went went to to his home" ; cout << removeDuplicateWords(str2) << endl; // Test Case: 3 string str3 = "Hello hello world world" ; cout << removeDuplicateWords(str3) << endl; return 0; } // This code is contributed by yuvraj_chandra |
Java
// Java program to remove duplicate words // using Regular Expression or ReGex. import java.util.regex.Matcher; import java.util.regex.Pattern; class GFG { // Function to validate the sentence // and remove the duplicate words public static String removeDuplicateWords(String input) { // Regex to matching repeated words. String regex = "\\b(\\w+)(?:\\W+\\1\\b)+" ; Pattern p = Pattern.compile( regex, Pattern.CASE_INSENSITIVE); // Pattern class contains matcher() method // to find matching between given sentence // and regular expression. Matcher m = p.matcher(input); // Check for subsequences of input // that match the compiled pattern while (m.find()) { input = input.replaceAll( m.group(), m.group( 1 )); } return input; } // Driver code public static void main(String args[]) { // Test Case: 1 String str1 = "Good bye bye world world" ; System.out.println( removeDuplicateWords(str1)); // Test Case: 2 String str2 = "Ram went went to to his home" ; System.out.println( removeDuplicateWords(str2)); // Test Case: 3 String str3 = "Hello hello world world" ; System.out.println( removeDuplicateWords(str3)); } } |
Python3
# Python program to remove duplicate words # using Regular Expression or ReGex. import re # Function to validate the sentence # and remove the duplicate words def removeDuplicateWords( input ): # Regex to matching repeated words regex = r '\b(\w+)(?:\W+\1\b)+' return re.sub(regex, r '\1' , input , flags = re.IGNORECASE) # Driver Code # Test Case: 1 str1 = "Good bye bye world world" print (removeDuplicateWords(str1)) # Test Case: 2 str2 = "Ram went went to to his home" print (removeDuplicateWords(str2)) # Test Case: 3 str3 = "Hello hello world world" print (removeDuplicateWords(str3)) # This code is contributed by yuvraj_chandra |
Good bye world Ram went to his home Hello world
Time Complexity : O(n), where n is length of string
Auxiliary Space : O(1)