# -*- encoding : utf-8 -*-
class String
  
  def remove_whitespace
    self.gsub(" ","")
  end
  
  def is_i?
    !!(self =~ /^[-+]?[0-9]([0-9]*)?$/)
  end
  
  #
  # How many words are in this string
  # Includes duplicates
  #
  def word_count
    self.to_utf8.scan(/(\w|-|')+/).size
  end
  
  #
  # Removes starting, trailing whitespace and double spaces
  #
  def trim
    self.gsub(/^(.*[^\s])\s+$/, '\1').gsub(/^\s*(.*)$/, '\1')
  end
  
  def to_utf16le
    Iconv.conv('utf-16le', 'UTF-8', self)
  end
  
  def to_active_record_condition
    "%#{self.trim.gsub(/[[:space:]]+/, '%')}%"
  end   
  # need to force encoding for ruby 1.9 otherwise regex fails when comparing string of 2 different encodings
  # TODO : String.blank? - do we need to do force encoding? is UTF-8 a good default?
  def blank?
    begin
      if Gem::Version.new(''+RUBY_VERSION) >= Gem::Version.new("1.9.0") # only for ruby 1.9+
        self.dup.force_encoding("UTF-8") !~ /\S/
      else
  	    self !~ /\S/
      end
    rescue ArgumentError => e
      if e.message =~ /^(invalid\ byte\ sequence|incompatible\ character\ encodings)/
	      empty?
      else
	      raise e
      end
    end
  end
  
  # converts the encoding to UTF-8 regardless of current encoding
  def to_utf8
    text = self.dup
    # shiny new ruby 1.9 way
    return text if text.encoding.name == "UTF-8" && text.valid_encoding? # already utf-8 yay!    
    
    encodings = [
      "UTF-8",
      "ISO-8859-1",
      "UTF-16BE",
      "UTF-16LE",
      "UTF-32BE",
      "UTF-32LE",
      "Windows-1251",
      "UTF-7",
      "US-ASCII", 
      "ASCII-8BIT"
    ]
    
    encodings.each do |encoding|
      if (text.force_encoding(encoding).valid_encoding? rescue false)
        return text.force_encoding(encoding).encode("UTF-8")
      end
    end    
        
    # ok so we are out of suggestions. Just return the string and hope that its ok
    text
    
  end
  
end