string_extensions.rb in stringex-1.2.0

- old
+ new

@@ -1,17 +1,17 @@
-# coding: utf-8
+# encoding: UTF-8
 
 module LuckySneaks
   # These methods are all added on String class.
   module StringExtensions
     def self.included(base) # :nodoc:
       base.extend(ClassMethods)
     end
-    
+
     # Returns the string converted (via Textile/RedCloth) to HTML format
     # or self [with a friendly warning] if Redcloth is not available.
-    # 
+    #
     # Using <tt>:lite</tt> argument will cause RedCloth to not wrap the HTML in a container
     # P element, which is useful behavior for generating header element text, etc.
     # This is roughly equivalent to ActionView's <tt>textilize_without_paragraph</tt>
     # except that it makes RedCloth do all the work instead of just gsubbing the return
     # from RedCloth.
@@ -29,27 +29,22 @@
       else
         warn "String#to_html was called without RedCloth being successfully required"
         self
       end
     end
-    
-    # Create a URI-friendly representation of the string. This is used internally by 
+
+    # Create a URI-friendly representation of the string. This is used internally by
     # acts_as_url[link:classes/LuckySneaks/ActsAsUrl/ClassMethods.html#M000012]
     # but can be called manually in order to generate an URI-friendly version of any string.
     def to_url
       remove_formatting.downcase.replace_whitespace("-").collapse("-")
     end
-    
+
     # Performs multiple text manipulations. Essentially a shortcut for typing them all. View source
     # below to see which methods are run.
     def remove_formatting
-      strip_html_tags.
-      convert_german_umlauts.
-      convert_accented_entities.
-      convert_misc_entities.
-      convert_misc_characters.
-      to_ascii.collapse
+      strip_html_tags.convert_accented_entities.convert_misc_entities.convert_misc_characters.to_ascii.collapse
     end
 
     # Removes HTML tags from text. This code is simplified from Tobias Luettke's regular expression
     # in Typo[http://typosphere.org].
     def strip_html_tags(leave_whitespace = false)
@@ -59,40 +54,26 @@
       rx = /<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
       (leave_whitespace) ?  gsub(rx, "").strip : gsub(rx, "").gsub(/\s+/, " ").strip
     end
 
     # Converts HTML entities into the respective non-accented letters. Examples:
-    # 
+    #
     #   "&aacute;".convert_accented_entities # => "a"
     #   "&ccedil;".convert_accented_entities # => "c"
     #   "&egrave;".convert_accented_entities # => "e"
     #   "&icirc;".convert_accented_entities # => "i"
     #   "&oslash;".convert_accented_entities # => "o"
     #   "&uuml;".convert_accented_entities # => "u"
-    # 
+    #
     # Note: This does not do any conversion of Unicode/Ascii accented-characters. For that
     # functionality please use <tt>to_ascii</tt>.
     def convert_accented_entities
       gsub(/&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/, '\1')
     end
 
-    # Converts German Umlauts to their transliteration according to German conventions.
-    def convert_german_umlauts
-      map = {
-        "Ä" => "ae",
-        "Ö" => "oe",
-        "Ü" => "ue",
-        "ä" => "ae",
-        "ö" => "oe",
-        "ü" => "ue",
-        "ß" => "ss"
-      }
-      gsub(/#{map.keys.join('|')}/) { |match| map[match] }
-    end
-
     # Converts HTML entities (taken from common Textile/RedCloth formattings) into plain text formats.
-    # 
+    #
     # Note: This isn't an attempt at complete conversion of HTML entities, just those most likely
     # to be generated by Textile.
     def convert_misc_entities
       dummy = dup
       {
@@ -119,22 +100,37 @@
         dummy.gsub!(/&#{textiled};/, normal)
       end
       dummy.gsub(/&[^;]+;/, "")
     end
 
+    # Converts MS Word 'smart punctuation' to ASCII
+    #
+    def convert_smart_punctuation
+      dummy = dup
+      {
+
+        "(“|”|\302\223|\302\224)" => '"',
+        "(‘|’|\302\221|\302\222)" => "'",
+        "…" => "...",
+      }.each do |smart, normal|
+        dummy.gsub!(/#{smart}/, normal)
+      end
+      dummy
+    end
+
     # Converts various common plaintext characters to a more URI-friendly representation.
     # Examples:
-    #   
+    #
     #   "foo & bar".convert_misc_characters # => "foo and bar"
     #   "Chanel #9".convert_misc_characters # => "Chanel number nine"
     #   "user@host".convert_misc_characters # => "user at host"
     #   "google.com".convert_misc_characters # => "google dot com"
     #   "$10".convert_misc_characters # => "10 dollars"
     #   "*69".convert_misc_characters # => "star 69"
     #   "100%".convert_misc_characters # => "100 percent"
     #   "windows/mac/linux".convert_misc_characters # => "windows slash mac slash linux"
-    #   
+    #
     # Note: Because this method will convert any & symbols to the string "and",
     # you should run any methods which convert HTML entities (convert_html_entities and convert_misc_entities)
     # before running this method.
     def convert_misc_characters
       dummy = dup.gsub(/\.{3,}/, " dot dot dot ") # Catch ellipses before single dot rule!
@@ -156,33 +152,34 @@
         /(\s|^)£(\d*)(\s|$)/u => '\2 pounds',
         /(\s|^)¥(\d*)(\s|$)/u => '\2 yen',
         /\s*\*\s*/ => "star",
         /\s*%\s*/ => "percent",
         /\s*(\\|\/)\s*/ => "slash",
+        /(\s*=\s*)/ => " equals "
       }.each do |found, replaced|
         replaced = " #{replaced} " unless replaced =~ /\\1/
         dummy.gsub!(found, replaced)
       end
       dummy = dummy.gsub(/(^|\w)'(\w|$)/, '\1\2').gsub(/[\.,:;()\[\]\/\?!\^'"_]/, " ")
     end
 
     # Replace runs of whitespace in string. Defaults to a single space but any replacement
     # string may be specified as an argument. Examples:
-    # 
+    #
     #   "Foo       bar".replace_whitespace # => "Foo bar"
     #   "Foo       bar".replace_whitespace("-") # => "Foo-bar"
     def replace_whitespace(replace = " ")
       gsub(/\s+/, replace)
     end
 
     # Removes specified character from the beginning and/or end of the string and then performs
     # <tt>String#squeeze(character)</tt>, condensing runs of the character within the string.
-    # 
+    #
     # Note: This method has been superceded by ActiveSupport's squish method.
     def collapse(character = " ")
       sub(/^#{character}*/, "").sub(/#{character}*$/, "").squeeze(character)
     end
-    
+
     module ClassMethods
       # Returns string of random characters with a length matching the specified limit. Excludes 0
       # to avoid confusion between 0 and O.
       def random(limit)
         strong_alphanumerics = %w{