# encoding: UTF-8 module Stringex # These methods are all added on String class. module StringExtensions def self.included(base) # :nodoc: base.extend(ClassMethods) end # Returns the string converted (via Textile/RedCloth) to HTML format # or self [with a friendly warning] if Redcloth is not available. # # Using :lite argument will cause RedCloth to not wrap the HTML in a container # P element, which is useful behavior for generating header element text, etc. # This is roughly equivalent to ActionView's textilize_without_paragraph # except that it makes RedCloth do all the work instead of just gsubbing the return # from RedCloth. def to_html(lite_mode = false) if defined?(RedCloth) if lite_mode RedCloth.new(self, [:lite_mode]).to_html else if self =~ /
/
            RedCloth.new(self).to_html.tr("\t", "")
          else
            RedCloth.new(self).to_html.tr("\t", "").gsub(/\n\n/, "")
          end
        end
      else
        warn "String#to_html was called without RedCloth being successfully required"
        self
      end
    end

    # Create a URI-friendly representation of the string. This is used internally by
    # acts_as_url[link:classes/Stringex/ActsAsUrl/ClassMethods.html#M000012]
    # but can be called manually in order to generate an URI-friendly version of any string.
    def to_url(options = {})
      return self if options[:exclude] && options[:exclude].include?(self)
      remove_formatting(options).downcase.replace_whitespace("-").collapse("-").limit(options[:limit])
    end

    # Returns the string limited in size to the value of limit.
    def limit(limit = nil)
      limit.nil? ? self : self[0...limit]
    end

    # Performs multiple text manipulations. Essentially a shortcut for typing them all. View source
    # below to see which methods are run.
    def remove_formatting(options = {})
      strip_html_tags.
        convert_smart_punctuation.
        convert_accented_entities.
        convert_vulgar_fractions.
        convert_misc_entities.
        convert_misc_characters(options).
        to_ascii.
        # NOTE: String#to_ascii may convert some Unicode characters to ascii we'd already transliterated
        # so we need to do it again just to be safe
        convert_misc_characters(options).
        collapse
    end

    # Removes HTML tags from text. This code is simplified from Tobias Luettke's regular expression
    # in Typo[http://typosphere.org].
    def strip_html_tags(leave_whitespace = false)
      name = /[\w:_-]+/
      value = /([A-Za-z0-9]+|('[^']*?'|"[^"]*?"))/
      attr = /(#{name}(\s*=\s*#{value})?)/
      rx = /<[!\/?\[]?(#{name}|--)(\s+(#{attr}(\s+#{attr})*))?\s*([!\/?\]]+|--)?>/
      (leave_whitespace) ?  gsub(rx, "").strip : gsub(rx, "").gsub(/\s+/, " ").strip
    end
    # Converts HTML entities into the respective non-accented letters. Examples:
    #
    #   "á".convert_accented_entities # => "a"
    #   "ç".convert_accented_entities # => "c"
    #   "è".convert_accented_entities # => "e"
    #   "î".convert_accented_entities # => "i"
    #   "ø".convert_accented_entities # => "o"
    #   "ü".convert_accented_entities # => "u"
    #
    # Note: This does not do any conversion of Unicode/ASCII accented-characters. For that
    # functionality please use to_ascii.
    def convert_accented_entities
      gsub(/&([A-Za-z])(grave|acute|circ|tilde|uml|ring|cedil|slash);/, '\1').strip
    end

    # Converts HTML entities (taken from common Textile/RedCloth formattings) into plain text formats.
    #
    # Note: This isn't an attempt at complete conversion of HTML entities, just those most likely
    # to be generated by Textile.
    def convert_misc_entities
      dummy = dup
      {
        "#822[01]" => "\"",
        "#821[67]" => "'",
        "#8230" => "...",
        "#8211" => "-",
        "#8212" => "--",
        "#215" => "x",
        "gt" => ">",
        "lt" => "<",
        "(#8482|trade)" => "(tm)",
        "(#174|reg)" => "(r)",
        "(#169|copy)" => "(c)",
        "(#38|amp)" => "and",
        "nbsp" => " ",
        "(#162|cent)" => " cent",
        "(#163|pound)" => " pound",
        "(#188|frac14)" => "one fourth",
        "(#189|frac12)" => "half",
        "(#190|frac34)" => "three fourths",
        "(#247|divide)" => "divide",
        "(#176|deg)" => " degrees "
      }.each do |textiled, normal|
        dummy.gsub!(/&#{textiled};/, normal)
      end
      dummy.gsub(/&[^;]+;/, "").strip
    end

    # Converts vulgar fractions from supported html entities and unicode to
    # plain text formats.
    def convert_vulgar_fractions
      dummy = dup
      {
        "(¼|¼|¼)" => "one fourth",
        "(½|½|½)" => "half",
        "(¾|¾|¾)" => "three fourths",
        "(⅓|⅓)" => "one third",
        "(⅔|⅔)" => "two thirds",
        "(⅕|⅕)" => "one fifth",
        "(⅖|⅖)" => "two fifths",
        "(⅗|⅗)" => "three fifths",
        "(⅘|⅘)" => "four fifths",
        "(⅙|⅙)" => "one sixth",
        "(⅚|⅚)" => "five sixths",
        "(⅛|⅛)" => "one eighth",
        "(⅜|⅜)" => "three eighths",
        "(⅝|⅝)" => "five eighths",
        "(⅞|⅞)" => "seven eighths"
      }.each do |textiled, normal|
        dummy.gsub!(/#{textiled}/, normal)
      end
      dummy
    end

    # Converts MS Word 'smart punctuation' to ASCII
    #
    def convert_smart_punctuation
      dummy = dup
      {

        "(“|”|\302\223|\302\224|\303\222|\303\223)" => '"',
        "(‘|’|\302\221|\302\222|\303\225)" => "'",
        "…" => "...",
      }.each do |smart, normal|
        dummy.gsub!(/#{smart}/, normal)
      end
      dummy.strip
    end

    # Converts various common plaintext characters to a more URI-friendly representation.
    # Examples:
    #
    #   "foo & bar".convert_misc_characters # => "foo and bar"
    #   "Chanel #9".convert_misc_characters # => "Chanel number nine"
    #   "user@host".convert_misc_characters # => "user at host"
    #   "google.com".convert_misc_characters # => "google dot com"
    #   "$10".convert_misc_characters # => "10 dollars"
    #   "*69".convert_misc_characters # => "star 69"
    #   "100%".convert_misc_characters # => "100 percent"
    #   "windows/mac/linux".convert_misc_characters # => "windows slash mac slash linux"
    #
    # Note: Because this method will convert any & symbols to the string "and",
    # you should run any methods which convert HTML entities (convert_html_entities and convert_misc_entities)
    # before running this method.
    def convert_misc_characters(options = {})
      dummy = dup.gsub(/\.{3,}/, " dot dot dot ") # Catch ellipses before single dot rule!
      # Special rules for money
      {
        /(\s|^)\$(\d+)\.(\d+)(\s|$)/ => '\2 dollars \3 cents',
        /(\s|^)£(\d+)\.(\d+)(\s|$)/u => '\2 pounds \3 pence',
      }.each do |found, replaced|
        replaced = " #{replaced} " unless replaced =~ /\\1/
        dummy.gsub!(found, replaced)
      end
      # Special rules for abbreviations
      dummy.gsub!(/(\s|^)([[:alpha:]](\.[[:alpha:]])+(\.?)[[:alpha:]]*(\s|$))/) do |x|
        x.gsub(".", "")
      end
      # Back to normal rules
      misc_characters =
      {
        /\s*&\s*/ => "and",
        /\s*#/ => "number",
        /\s*@\s*/ => "at",
        /(\S|^)\.(\S)/ => '\1 dot \2',
        /(\s|^)\$(\d*)(\s|$)/ => '\2 dollars',
        /(\s|^)£(\d*)(\s|$)/u => '\2 pounds',
        /(\s|^)¥(\d*)(\s|$)/u => '\2 yen',
        /\s*\*\s*/ => "star",
        /\s*%\s*/ => "percent",
        /(\s*=\s*)/ => " equals ",
        /\s*\+\s*/ => "plus",
        /\s*÷\s*/ => "divide",
        /\s*°\s*/ => "degrees"
      }
      misc_characters[/\s*(\\|\/|/)\s*/] = 'slash' unless options[:allow_slash]
      misc_characters.each do |found, replaced|
        replaced = " #{replaced} " unless replaced =~ /\\1/
        dummy.gsub!(found, replaced)
      end
      dummy = dummy.gsub(/(^|[[:alpha:]])'|`([[:alpha:]]|$)/, '\1\2').gsub(/[\.,:;()\[\]\/\?!\^'ʼ"_\|]/, " ").strip
    end

    # Replace runs of whitespace in string. Defaults to a single space but any replacement
    # string may be specified as an argument. Examples:
    #
    #   "Foo       bar".replace_whitespace # => "Foo bar"
    #   "Foo       bar".replace_whitespace("-") # => "Foo-bar"
    def replace_whitespace(replace = " ")
      gsub(/\s+/, replace)
    end

    # Removes specified character from the beginning and/or end of the string and then performs
    # String#squeeze(character), condensing runs of the character within the string.
    #
    # Note: This method has been superceded by ActiveSupport's squish method.
    def collapse(character = " ")
      sub(/^#{character}*/, "").sub(/#{character}*$/, "").squeeze(character)
    end

    module ClassMethods
      # Returns string of random characters with a length matching the specified limit. Excludes 0
      # to avoid confusion between 0 and O.
      def random(limit)
        strong_alphanumerics = %w{
          a b c d e f g h i j k l m n o p q r s t u v w x y z
          A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
          1 2 3 4 5 6 7 8 9
        }
        Array.new(limit, "").collect{strong_alphanumerics[rand(61)]}.join
      end
    end
  end
end