lib/lucky_sneaks/string_extensions.rb in Hoodow-stringex-1.0.0 vs lib/lucky_sneaks/string_extensions.rb in Hoodow-stringex-1.0.1
- old
+ new
@@ -40,11 +40,16 @@
end
# Performs multiple text manipulations. Essentially a shortcut for typing them all. View source
# below to see which methods are run.
def remove_formatting
- strip_html_tags.convert_accented_entities.convert_misc_entities.convert_misc_characters.to_ascii.collapse
+ strip_html_tags.
+ convert_german_umlauts.
+ convert_accented_entities.
+ convert_misc_entities.
+ convert_misc_characters.
+ to_ascii.collapse
end
# Removes HTML tags from text. This code is simplified from Tobias Luettke's regular expression
# in Typo[http://typosphere.org].
def strip_html_tags(leave_whitespace = false)
@@ -66,9 +71,23 @@
#
# Note: This does not do any conversion of Unicode/Ascii accented-characters. For that
# functionality please use <tt>to_ascii</tt>.
def convert_accented_entities
gsub(/&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/, '\1')
+ end
+
+ # Converts German Umlauts to their transliteration according to German conventions.
+ def convert_german_umlauts
+ map = {
+ "Ä" => "ae",
+ "Ö" => "oe",
+ "Ü" => "ue",
+ "ä" => "ae",
+ "ö" => "oe",
+ "ü" => "ue",
+ "ß" => "ss"
+ }
+ gsub(/#{map.keys.join('|')}/) { |match| map[match] }
end
# Converts HTML entities (taken from common Textile/RedCloth formattings) into plain text formats.
#
# Note: This isn't an attempt at complete conversion of HTML entities, just those most likely