# -*- encoding : utf-8 -*- class String def remove_whitespace self.gsub(" ","") end def is_i? !!(self =~ /^[-+]?[0-9]([0-9]*)?$/) end # # How many words are in this string # Includes duplicates # def word_count self.to_utf8.scan(/(\w|-|')+/).size end # # Removes starting, trailing whitespace and double spaces # def trim self.gsub(/^(.*[^\s])\s+$/, '\1').gsub(/^\s*(.*)$/, '\1') end def to_utf16le Iconv.conv('utf-16le', 'UTF-8', self) end def to_active_record_condition "%#{self.trim.gsub(/[[:space:]]+/, '%')}%" end # need to force encoding for ruby 1.9 otherwise regex fails when comparing string of 2 different encodings # TODO : String.blank? - do we need to do force encoding? is UTF-8 a good default? def blank? begin if Gem::Version.new(''+RUBY_VERSION) >= Gem::Version.new("1.9.0") # only for ruby 1.9+ self.dup.force_encoding("UTF-8") !~ /\S/ else self !~ /\S/ end rescue ArgumentError => e if e.message =~ /^(invalid\ byte\ sequence|incompatible\ character\ encodings)/ empty? else raise e end end end # converts the encoding to UTF-8 regardless of current encoding def to_utf8 text = self.dup # shiny new ruby 1.9 way return text if text.encoding.name == "UTF-8" && text.valid_encoding? # already utf-8 yay! encodings = [ "UTF-8", "ISO-8859-1", "UTF-16BE", "UTF-16LE", "UTF-32BE", "UTF-32LE", "Windows-1251", "UTF-7", "US-ASCII", "ASCII-8BIT" ] encodings.each do |encoding| if (text.force_encoding(encoding).valid_encoding? rescue false) return text.force_encoding(encoding).encode("UTF-8") end end # ok so we are out of suggestions. Just return the string and hope that its ok text end end