Given a string str which has various HTML Entities in it, the task is to replace these entities with their corresponding special character.
HTML entity parser is the parser that takes HTML code as input and replaces all the entities of the special characters by the characters itself. The special characters and their entities for HTML are Quotation Mark: the entity is “, and symbol character is “.
Below is the HTML Entities with their corresponding special characters is shown in the table below:
Name/ Description | HTML Entity | Special Character |
---|---|---|
Space | | |
Ampersand | & | & |
Greater than | > | > |
Less than | < | < |
Single Quotation Mark | ' | ‘ |
Double Quotation Mark | " | “ |
Trademark | ® | ® |
Copyright mark | © | © |
Forward Slash | ⁄ | ? |
Examples:
Input: str = “17 > 25 and 25 < 17”
Output: 17 > 25 and 25 < 17
Explanation: In the above example > is replaced by corresponding special character > and < is replaced by <Input: str = “© is symbol of copyright”
Output: © is symbol of copyright
Explanation: In the above example © is replaced by corresponding special character ©
Method 1 – using unordered_map: Below are the steps:
- Store the HTML Entity with their character in a Map.
- Traverse the given string and if any character ‘&’ is encountered then find which HTML Entity is present after this ampersand.
- Add the corresponding character with the Entity in the output string.
- Print the output string as the result.
Below is the implementation of the above approach:
C++
// C++ program for the above approach #include <iostream> #include <unordered_map> using namespace std; class GfG { public : unordered_map<string, string> m; public : // Associating html entity with // special character void initializeMap() { m[ """ ] = "\"" ; m[ "'" ] = "'" ; m[ "&" ] = "&" ; m[ ">" ] = ">" ; m[ "<" ] = "<" ; m[ "⁄" ] = "/" ; m[ " " ] = " " ; m[ "®" ] = "®" ; m[ "©" ] = "©" ; } public : // Function that convert the given // HTML Entity to its parsed String string parseInputString(string input) { // Output string string output = "" ; // Traverse the string for ( int i = 0; i < input.size(); i++) { // If any ampersand is occurred if (input[i] == '&' ) { string buffer; while (i < input.size()) { buffer = buffer + input[i]; // If any ampersand is occurred if (input[i] == ';' && m.find(buffer) != m.end()) { // Append the parsed // character output = output + m[buffer]; // Clear the buffer buffer = "" ; i++; break ; } else { i++; } } if (i >= input.size()) { output = output + buffer; break ; } i--; } else { output = output + input[i]; } } // Return the parsed string return output; } }; // Driver Code int main() { // Given String string input = "17 > 25 and 25 < 17" ; GfG g; // Initialised parsed string g.initializeMap(); // Function Call cout << g.parseInputString(input); return 0; } |
Java
import java.util.HashMap; import java.util.Map; // Java program for the above approach public class HtmlEntityParser { public static Map<String, String> map = new HashMap<>(); static { // Associating html entity with // special character map.put( """ , "\"" ); map.put( "'" , "'" ); map.put( "&" , "&" ); map.put( ">" , ">" ); map.put( "<" , "<" ); map.put( "⁄" , "/" ); map.put( " " , " " ); map.put( "®" , "®" ); map.put( "©" , "©" ); } // Function that convert the given // HTML Entity to its parsed String public static String parseInputString(String input) { // Output string StringBuilder output = new StringBuilder(); // Traverse the input string for ( int i = 0 ; i < input.length(); i++) { // If any ampersand is occurred if (input.charAt(i) == '&' ) { StringBuilder buffer = new StringBuilder(); while (i < input.length()) { buffer.append(input.charAt(i)); // If any ampersand is occurred if (input.charAt(i) == ';' && map.containsKey(buffer.toString())) { // Append the parsed // character output.append(map.get(buffer.toString())); buffer.setLength( 0 ); i++; break ; } else { i++; } } if (i >= input.length()) { output.append(buffer); break ; } i--; } else { output.append(input.charAt(i)); } } // convert the string builder into string // return the answer. return output.toString(); } public static void main(String[] args) { // Initialize the parse string String input = "17 > 25 and 25 < 17" ; // Function call System.out.println(parseInputString(input)); } } |
Python3
# Python program for the above approach class GfG: def __init__( self ): self .m = {} # Associating html entity with special character def initializeMap( self ): self .m[ """ ] = "\"" self .m[ "'" ] = "'" self .m[ "&" ] = "&" self .m[ ">" ] = ">" self .m[ "<" ] = "<" self .m[ "⁄" ] = "/" self .m[ " " ] = " " self .m[ "®" ] = "®" self .m[ "©" ] = "©" # Function that convert the given # HTML Entity to its parsed String def parseInputString( self , input ): # Output string output = "" # Traverse the string i = 0 while i < len ( input ): # If any ampersand is occurred if input [i] = = '&' : buffer = "" while i < len ( input ): buffer = buffer + input [i] # If any semicolon is occurred if input [i] = = ';' and buffer in self .m: # Append the parsed character output = output + self .m[ buffer ] # Clear the buffer buffer = "" i + = 1 break else : i + = 1 if i > = len ( input ): output = output + buffer break i - = 1 else : output = output + input [i] i + = 1 # Return the parsed string return output # Driver Code if __name__ = = '__main__' : # Given String input_str = "17 > 25 and 25 < 17" g = GfG() # Initialised parsed string g.initializeMap() # Function Call print (g.parseInputString(input_str)) # Contributed by adityasha4x71 |
Javascript
// JavaScript program for the above approach class GfG { constructor() { this .m = {}; } // Associating html entity with special character initializeMap() { this .m[ """ ] = "\"" ; this .m[ "'" ] = "'" ; this .m[ "&" ] = "&" ; this .m[ ">" ] = ">" ; this .m[ "<" ] = "<" ; this .m[ "⁄" ] = "/" ; this .m[ " " ] = " " ; this .m[ "®" ] = "®" ; this .m[ "©" ] = "©" ; } // Function that convert the given // HTML Entity to its parsed String parseInputString(input) { // Output string let output = "" ; // Traverse the string let i = 0; while (i < input.length) { // If any ampersand is occurred if (input[i] === '& ') { let buffer = ""; while (i < input.length) { buffer += input[i]; // If any semicolon is occurred if (input[i] === ' ;' && this .m[buffer]) { // Append the parsed character output += this .m[buffer]; // Clear the buffer buffer = "" ; i++; break ; } else { i++; } } if (i >= input.length) { output += buffer; break ; } i--; } else { output += input[i]; } i++; } // Return the parsed string return output; } } // Driver Code // Given String const input_str = "17 > 25 and 25 < 17" ; const g = new GfG(); // Initialised parsed string g.initializeMap(); // Function Call console.log(g.parseInputString(input_str)); |
C#
// C# program for the above approach using System; using System.Collections.Generic; public class HtmlEntityParser { public static Dictionary< string , string > map = new Dictionary< string , string >() { // Associating html entity with // special character { """ , "\"" }, { "'" , "'" }, { "&" , "&" }, { ">" , ">" }, { "<" , "<" }, { "⁄" , "/" }, { " " , " " }, { "®" , "®" }, { "©" , "©" } }; // Function that convert the given // HTML Entity to its parsed String public static string ParseInputString( string input) { // Output string var output = new System.Text.StringBuilder(); // Traverse the input string for ( int i = 0; i < input.Length; i++) { // If any ampersand is occurred if (input[i] == '&' ) { var buffer = new System.Text.StringBuilder(); while (i < input.Length) { buffer.Append(input[i]); // If any ampersand is occurred if (input[i] == ';' && map.ContainsKey(buffer.ToString())) { // Append the parsed // character output.Append(map[buffer.ToString()]); buffer.Length = 0; i++; break ; } else { i++; } } if (i >= input.Length) { output.Append(buffer); break ; } i--; } else { output.Append(input[i]); } } // convert the string builder into string // return the answer. return output.ToString(); } public static void Main( string [] args) { // Initialize the parse string string input = "17 > 25 and 25 < 17" ; // Function call Console.WriteLine(ParseInputString(input)); } } // Contributed by adityasharmadev01 |
17 > 25 and 25 < 17
Time Complexity: O(N)
Auxiliary Space: O(N)
Method 2 – using Pattern Matching: Below are the steps:
- Traverse the given string str.
- While traversing, if any character ‘&’ is encountered then find which HTML Entity is present after this ampersand.
- Add the corresponding character with the Entity in the output string from the above table of matched character in the above table.
- Print the output string as the result after traversing the above string.
Below is the implementation of the above approach:
C++
// C++ program to Parse the HTML Entities #include <iostream> using namespace std; class GfG { public : string parseInputString(string input) { // To store parsed string string output = "" ; for ( int i = 0; i < input.size(); i++) { // Matching pattern of html // entity if (input[i] == '&' ) { string buffer; while (i < input.size()) { buffer = buffer + input[i]; // Check match for (\) if (input[i] == ';' && buffer == """ ) { output = output + "\"" ; buffer = "" ; i++; break ; } // Check match for (') else if (input[i] == ';' && buffer == "'" ) { output = output + "'" ; buffer = "" ; i++; break ; } // Check match for (&) else if (input[i] == ';' && buffer == "&" ) { output = output + "&" ; buffer = "" ; i++; break ; } // Check match for (>) else if (input[i] == ';' && buffer == ">" ) { output = output + ">" ; buffer = "" ; i++; break ; } // Check match for (<) else if (input[i] == ';' && buffer == "<" ) { output = output + "<" ; buffer = "" ; i++; break ; } // Check match for (/) else if (input[i] == ';' && buffer == "⁄" ) { output = output + "/" ; buffer = "" ; i++; break ; } // Check match for (" ") else if (input[i] == ';' && buffer == " " ) { output = output + " " ; buffer = "" ; i++; break ; } // Check match for (®) else if (input[i] == ';' && buffer == "®" ) { output = output + "®" ; buffer = "" ; i++; break ; } // Check match for (©) else if (input[i] == ';' && buffer == "©" ) { output = output + "©" ; buffer = "" ; i++; break ; } else { i++; } } if (i >= input.size()) { output = output + buffer; break ; } i--; } else { output = output + input[i]; } } // Return the parsed string return output; } }; // Driver Code int main() { // Given String string input = "17 > 25 and 25 < 17" ; GfG g; // Initialised parsed string g.initializeMap(); // Function Call cout << g.parseInputString(input); return 0; } |
17 > 25 and 25 < 17
Time Complexity: O(N)
Auxiliary Space: O(N)
Method 3 – using Regular Expression: Below are the steps:
- Store all the expression with it’s mapped value in a Map M.
- For each key in the map, create a regular expression using:
regex e(key);
- Now replace the above regular expression formed with it’s mapped value in the Map M as:
regex_replace(str, e, value); where, str is the input string, e is the expression formed in the above step, and val is the value mapped with expression e in the Map
- Repeat the above steps until all expression are not replaced.
Below is the implementation of the above approach:
C++
// C++ program for the above approach #include <iostream> #include <regex> #include <unordered_map> using namespace std; // Given Expression with mapped value const unordered_map<string, string> m; m = { { """ , "\" }, { "'" , "'" }, { "&" , "&" }, { ">" , ">" }, { "<" , "<" }, { "⁄" , "/" } }; // Function that converts the given // HTML Entity to its parsed String string parseInputString(string input) { for ( auto & it : m) { // Create ReGex Expression regex e(it.first); // Replace the above expression // with mapped value using // regex_replace() input = regex_replace(input, e, it.second); } // Return the parsed string return input; } // Driver Code int main() { // Given String string input = "17 > 25 and 25 < 17" ; // Function Call cout << parseInputString(input); return 0; } |
17 > 25 and 25 < 17
Time Complexity: O(N)
Auxiliary Space: O(N)
Ready to dive in? Explore our Free Demo Content and join our DSA course, trusted by over 100,000 neveropen!