# encoding: UTF-8 module Stringex # These methods are all added on String class. module StringExtensions def self.included(base) # :nodoc: base.extend(ClassMethods) end # Returns the string converted (via Textile/RedCloth) to HTML format # or self [with a friendly warning] if Redcloth is not available. # # Using :lite argument will cause RedCloth to not wrap the HTML in a container # P element, which is useful behavior for generating header element text, etc. # This is roughly equivalent to ActionView's textilize_without_paragraph # except that it makes RedCloth do all the work instead of just gsubbing the return # from RedCloth. def to_html(lite_mode = false) if defined?(RedCloth) if lite_mode RedCloth.new(self, [:lite_mode]).to_html else if self =~ /
/ RedCloth.new(self).to_html.tr("\t", "") else RedCloth.new(self).to_html.tr("\t", "").gsub(/\n\n/, "") end end else warn "String#to_html was called without RedCloth being successfully required" self end end # Create a URI-friendly representation of the string. This is used internally by # acts_as_url[link:classes/Stringex/ActsAsUrl/ClassMethods.html#M000012] # but can be called manually in order to generate an URI-friendly version of any string. def to_url(options = {}) return self if options[:exclude] && options[:exclude].include?(self) remove_formatting(options).downcase.replace_whitespace("-").collapse("-").limit(options[:limit]) end # Returns the string limited in size to the value of limit. def limit(limit = nil) limit.nil? ? self : self[0...limit] end # Performs multiple text manipulations. Essentially a shortcut for typing them all. View source # below to see which methods are run. def remove_formatting(options = {}) strip_html_tags. convert_smart_punctuation. convert_accented_entities. convert_vulgar_fractions. convert_misc_entities. convert_misc_characters(options). to_ascii. # NOTE: String#to_ascii may convert some Unicode characters to ascii we'd already transliterated # so we need to do it again just to be safe convert_misc_characters(options). collapse end # Removes HTML tags from text. This code is simplified from Tobias Luettke's regular expression # in Typo[http://typosphere.org]. def strip_html_tags(leave_whitespace = false) name = /[\w:_-]+/ value = /([A-Za-z0-9]+|('[^']*?'|"[^"]*?"))/ attr = /(#{name}(\s*=\s*#{value})?)/ rx = /<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/ (leave_whitespace) ? gsub(rx, "").strip : gsub(rx, "").gsub(/\s+/, " ").strip end # Converts HTML entities into the respective non-accented letters. Examples: # # "á".convert_accented_entities # => "a" # "ç".convert_accented_entities # => "c" # "è".convert_accented_entities # => "e" # "î".convert_accented_entities # => "i" # "ø".convert_accented_entities # => "o" # "ü".convert_accented_entities # => "u" # # Note: This does not do any conversion of Unicode/ASCII accented-characters. For that # functionality please use to_ascii. def convert_accented_entities gsub(/&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/, '\1').strip end # Converts HTML entities (taken from common Textile/RedCloth formattings) into plain text formats. # # Note: This isn't an attempt at complete conversion of HTML entities, just those most likely # to be generated by Textile. def convert_misc_entities dummy = dup { "#822[01]" => "\"", "#821[67]" => "'", "#8230" => "...", "#8211" => "-", "#8212" => "--", "#215" => "x", "gt" => ">", "lt" => "<", "(#8482|trade)" => "(tm)", "(#174|reg)" => "(r)", "(#169|copy)" => "(c)", "(#38|amp)" => "and", "nbsp" => " ", "(#162|cent)" => " cent", "(#163|pound)" => " pound", "(#188|frac14)" => "one fourth", "(#189|frac12)" => "half", "(#190|frac34)" => "three fourths", "(#247|divide)" => "divide", "(#176|deg)" => " degrees " }.each do |textiled, normal| dummy.gsub!(/{textiled};/, normal) end dummy.gsub(/&[^;]+;/, "").strip end # Converts vulgar fractions from supported html entities and unicode to # plain text formats. def convert_vulgar_fractions dummy = dup { "(¼|¼|¼)" => "one fourth", "(½|½|½)" => "half", "(¾|¾|¾)" => "three fourths", "(⅓|⅓)" => "one third", "(⅔|⅔)" => "two thirds", "(⅕|⅕)" => "one fifth", "(⅖|⅖)" => "two fifths", "(⅗|⅗)" => "three fifths", "(⅘|⅘)" => "four fifths", "(⅙|⅙)" => "one sixth", "(⅚|⅚)" => "five sixths", "(⅛|⅛)" => "one eighth", "(⅜|⅜)" => "three eighths", "(⅝|⅝)" => "five eighths", "(⅞|⅞)" => "seven eighths" }.each do |textiled, normal| dummy.gsub!(/#{textiled}/, normal) end dummy end # Converts MS Word 'smart punctuation' to ASCII # def convert_smart_punctuation dummy = dup { "(“|”|\302\223|\302\224|\303\222|\303\223)" => '"', "(‘|’|\302\221|\302\222|\303\225)" => "'", "…" => "...", }.each do |smart, normal| dummy.gsub!(/#{smart}/, normal) end dummy.strip end # Converts various common plaintext characters to a more URI-friendly representation. # Examples: # # "foo & bar".convert_misc_characters # => "foo and bar" # "Chanel #9".convert_misc_characters # => "Chanel number nine" # "user@host".convert_misc_characters # => "user at host" # "google.com".convert_misc_characters # => "google dot com" # "$10".convert_misc_characters # => "10 dollars" # "*69".convert_misc_characters # => "star 69" # "100%".convert_misc_characters # => "100 percent" # "windows/mac/linux".convert_misc_characters # => "windows slash mac slash linux" # # Note: Because this method will convert any & symbols to the string "and", # you should run any methods which convert HTML entities (convert_html_entities and convert_misc_entities) # before running this method. def convert_misc_characters(options = {}) dummy = dup.gsub(/\.{3,}/, " dot dot dot ") # Catch ellipses before single dot rule! # Special rules for money { /(\s|^)\$(\d+)\.(\d+)(\s|$)/ => '\2 dollars \3 cents', /(\s|^)£(\d+)\.(\d+)(\s|$)/u => '\2 pounds \3 pence', }.each do |found, replaced| replaced = " #{replaced} " unless replaced =~ /\\1/ dummy.gsub!(found, replaced) end # Special rules for abbreviations dummy.gsub!(/(\s|^)([[:alpha:]](\.[[:alpha:]])+(\.?)[[:alpha:]]*(\s|$))/) do |x| x.gsub(".", "") end # Back to normal rules misc_characters = { /\s*&\s*/ => "and", /\s*#/ => "number", /\s*@\s*/ => "at", /(\S|^)\.(\S)/ => '\1 dot \2', /(\s|^)\$(\d*)(\s|$)/ => '\2 dollars', /(\s|^)£(\d*)(\s|$)/u => '\2 pounds', /(\s|^)¥(\d*)(\s|$)/u => '\2 yen', /\s*\*\s*/ => "star", /\s*%\s*/ => "percent", /(\s*=\s*)/ => " equals ", /\s*\+\s*/ => "plus", /\s*÷\s*/ => "divide", /\s*°\s*/ => "degrees" } misc_characters[/\s*(\\|\/|/)\s*/] = 'slash' unless options[:allow_slash] misc_characters.each do |found, replaced| replaced = " #{replaced} " unless replaced =~ /\\1/ dummy.gsub!(found, replaced) end dummy = dummy.gsub(/(^|[[:alpha:]])'|`([[:alpha:]]|$)/, '\1\2').gsub(/[\.,:;()\[\]\/\?!\^'ʼ"_\|]/, " ").strip end # Replace runs of whitespace in string. Defaults to a single space but any replacement # string may be specified as an argument. Examples: # # "Foo bar".replace_whitespace # => "Foo bar" # "Foo bar".replace_whitespace("-") # => "Foo-bar" def replace_whitespace(replace = " ") gsub(/\s+/, replace) end # Removes specified character from the beginning and/or end of the string and then performs # String#squeeze(character), condensing runs of the character within the string. # # Note: This method has been superceded by ActiveSupport's squish method. def collapse(character = " ") sub(/^#{character}*/, "").sub(/#{character}*$/, "").squeeze(character) end module ClassMethods # Returns string of random characters with a length matching the specified limit. Excludes 0 # to avoid confusion between 0 and O. def random(limit) strong_alphanumerics = %w{ a b c d e f g h i j k l m n o p q r s t u v w x y z A B C D E F G H I J K L M N O P Q R S T U V W X Y Z 1 2 3 4 5 6 7 8 9 } Array.new(limit, "").collect{strong_alphanumerics[rand(61)]}.join end end end end