string_extensions.rb in Hoodow-stringex-1.0.1

- old
+ new

@@ -40,11 +40,16 @@
     end
     
     # Performs multiple text manipulations. Essentially a shortcut for typing them all. View source
     # below to see which methods are run.
     def remove_formatting
-      strip_html_tags.convert_accented_entities.convert_misc_entities.convert_misc_characters.to_ascii.collapse
+      strip_html_tags.
+      convert_german_umlauts.
+      convert_accented_entities.
+      convert_misc_entities.
+      convert_misc_characters.
+      to_ascii.collapse
     end
 
     # Removes HTML tags from text. This code is simplified from Tobias Luettke's regular expression
     # in Typo[http://typosphere.org].
     def strip_html_tags(leave_whitespace = false)
@@ -66,9 +71,23 @@
     # 
     # Note: This does not do any conversion of Unicode/Ascii accented-characters. For that
     # functionality please use <tt>to_ascii</tt>.
     def convert_accented_entities
       gsub(/&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/, '\1')
+    end
+
+    # Converts German Umlauts to their transliteration according to German conventions.
+    def convert_german_umlauts
+      map = {
+        "Ä" => "ae",
+        "Ö" => "oe",
+        "Ü" => "ue",
+        "ä" => "ae",
+        "ö" => "oe",
+        "ü" => "ue",
+        "ß" => "ss"
+      }
+      gsub(/#{map.keys.join('|')}/) { |match| map[match] }
     end
 
     # Converts HTML entities (taken from common Textile/RedCloth formattings) into plain text formats.
     # 
     # Note: This isn't an attempt at complete conversion of HTML entities, just those most likely