lib/lucky_sneaks/string_extensions.rb in stringex-1.1.0 vs lib/lucky_sneaks/string_extensions.rb in stringex-1.2.0

- old
+ new

@@ -1,17 +1,17 @@ -# coding: utf-8 +# encoding: UTF-8 module LuckySneaks # These methods are all added on String class. module StringExtensions def self.included(base) # :nodoc: base.extend(ClassMethods) end - + # Returns the string converted (via Textile/RedCloth) to HTML format # or self [with a friendly warning] if Redcloth is not available. - # + # # Using <tt>:lite</tt> argument will cause RedCloth to not wrap the HTML in a container # P element, which is useful behavior for generating header element text, etc. # This is roughly equivalent to ActionView's <tt>textilize_without_paragraph</tt> # except that it makes RedCloth do all the work instead of just gsubbing the return # from RedCloth. @@ -29,27 +29,22 @@ else warn "String#to_html was called without RedCloth being successfully required" self end end - - # Create a URI-friendly representation of the string. This is used internally by + + # Create a URI-friendly representation of the string. This is used internally by # acts_as_url[link:classes/LuckySneaks/ActsAsUrl/ClassMethods.html#M000012] # but can be called manually in order to generate an URI-friendly version of any string. def to_url remove_formatting.downcase.replace_whitespace("-").collapse("-") end - + # Performs multiple text manipulations. Essentially a shortcut for typing them all. View source # below to see which methods are run. def remove_formatting - strip_html_tags. - convert_german_umlauts. - convert_accented_entities. - convert_misc_entities. - convert_misc_characters. - to_ascii.collapse + strip_html_tags.convert_accented_entities.convert_misc_entities.convert_misc_characters.to_ascii.collapse end # Removes HTML tags from text. This code is simplified from Tobias Luettke's regular expression # in Typo[http://typosphere.org]. def strip_html_tags(leave_whitespace = false) @@ -59,40 +54,26 @@ rx = /<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/ (leave_whitespace) ? gsub(rx, "").strip : gsub(rx, "").gsub(/\s+/, " ").strip end # Converts HTML entities into the respective non-accented letters. Examples: - # + # # "&aacute;".convert_accented_entities # => "a" # "&ccedil;".convert_accented_entities # => "c" # "&egrave;".convert_accented_entities # => "e" # "&icirc;".convert_accented_entities # => "i" # "&oslash;".convert_accented_entities # => "o" # "&uuml;".convert_accented_entities # => "u" - # + # # Note: This does not do any conversion of Unicode/Ascii accented-characters. For that # functionality please use <tt>to_ascii</tt>. def convert_accented_entities gsub(/&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/, '\1') end - # Converts German Umlauts to their transliteration according to German conventions. - def convert_german_umlauts - map = { - "Ä" => "ae", - "Ö" => "oe", - "Ü" => "ue", - "ä" => "ae", - "ö" => "oe", - "ü" => "ue", - "ß" => "ss" - } - gsub(/#{map.keys.join('|')}/) { |match| map[match] } - end - # Converts HTML entities (taken from common Textile/RedCloth formattings) into plain text formats. - # + # # Note: This isn't an attempt at complete conversion of HTML entities, just those most likely # to be generated by Textile. def convert_misc_entities dummy = dup { @@ -119,22 +100,37 @@ dummy.gsub!(/&#{textiled};/, normal) end dummy.gsub(/&[^;]+;/, "") end + # Converts MS Word 'smart punctuation' to ASCII + # + def convert_smart_punctuation + dummy = dup + { + + "(“|”|\302\223|\302\224)" => '"', + "(‘|’|\302\221|\302\222)" => "'", + "…" => "...", + }.each do |smart, normal| + dummy.gsub!(/#{smart}/, normal) + end + dummy + end + # Converts various common plaintext characters to a more URI-friendly representation. # Examples: - # + # # "foo & bar".convert_misc_characters # => "foo and bar" # "Chanel #9".convert_misc_characters # => "Chanel number nine" # "user@host".convert_misc_characters # => "user at host" # "google.com".convert_misc_characters # => "google dot com" # "$10".convert_misc_characters # => "10 dollars" # "*69".convert_misc_characters # => "star 69" # "100%".convert_misc_characters # => "100 percent" # "windows/mac/linux".convert_misc_characters # => "windows slash mac slash linux" - # + # # Note: Because this method will convert any & symbols to the string "and", # you should run any methods which convert HTML entities (convert_html_entities and convert_misc_entities) # before running this method. def convert_misc_characters dummy = dup.gsub(/\.{3,}/, " dot dot dot ") # Catch ellipses before single dot rule! @@ -156,33 +152,34 @@ /(\s|^)£(\d*)(\s|$)/u => '\2 pounds', /(\s|^)¥(\d*)(\s|$)/u => '\2 yen', /\s*\*\s*/ => "star", /\s*%\s*/ => "percent", /\s*(\\|\/)\s*/ => "slash", + /(\s*=\s*)/ => " equals " }.each do |found, replaced| replaced = " #{replaced} " unless replaced =~ /\\1/ dummy.gsub!(found, replaced) end dummy = dummy.gsub(/(^|\w)'(\w|$)/, '\1\2').gsub(/[\.,:;()\[\]\/\?!\^'"_]/, " ") end # Replace runs of whitespace in string. Defaults to a single space but any replacement # string may be specified as an argument. Examples: - # + # # "Foo bar".replace_whitespace # => "Foo bar" # "Foo bar".replace_whitespace("-") # => "Foo-bar" def replace_whitespace(replace = " ") gsub(/\s+/, replace) end # Removes specified character from the beginning and/or end of the string and then performs # <tt>String#squeeze(character)</tt>, condensing runs of the character within the string. - # + # # Note: This method has been superceded by ActiveSupport's squish method. def collapse(character = " ") sub(/^#{character}*/, "").sub(/#{character}*$/, "").squeeze(character) end - + module ClassMethods # Returns string of random characters with a length matching the specified limit. Excludes 0 # to avoid confusion between 0 and O. def random(limit) strong_alphanumerics = %w{