lib/sup/util.rb in sup-0.7 vs lib/sup/util.rb in sup-0.8

- old
+ new

@@ -170,10 +170,12 @@ end end end class String + def display_length; scan(/./u).size end + def camel_to_hyphy self.gsub(/([a-z])([A-Z0-9])/, '\1-\2').downcase end def find_all_positions x @@ -186,15 +188,10 @@ start = pos + 1 end ret end - ## one of the few things i miss from perl - def ucfirst - self[0 .. 0].upcase + self[1 .. -1] - end - ## a very complicated regex found on teh internets to split on ## commas, unless they occurr within double quotes. def split_on_commas split(/,\s*(?=(?:[^"]*"[^"]*")*(?![^"]*"))/) end @@ -274,10 +271,15 @@ end def normalize_whitespace gsub(/\t/, " ").gsub(/\r/, "") end + + ## takes a space-separated list of words, and returns an array of symbols. + ## typically used in Sup for translating Ferret's representation of a list + ## of labels (a string) to an array of label symbols. + def symbolistize; split.map { |x| x.intern } end end class Numeric def clamp min, max if self < min @@ -401,10 +403,14 @@ def to_boolean_h; Hash[*map { |x| [x, true] }.flatten]; end def last= e; self[-1] = e end def nonempty?; !empty? end + + def to_set_of_symbols + map { |x| x.is_a?(Symbol) ? x : x.intern }.uniq + end end class Time def to_indexable_s sprintf "%012d", self @@ -618,19 +624,22 @@ end end class Iconv def self.easy_decode target, charset, text - return text if charset =~ /^(x-unknown|unknown[-_]?8bit|ascii[-_]?7[-_]?bit)$/i + return text if charset =~ /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i charset = case charset - when /UTF[-_]?8/i: "utf-8" - when /(iso[-_])?latin[-_]?1$/i: "ISO-8859-1" - when /unicode[-_]1[-_]1[-_]utf[-_]7/i: "utf-7" - else charset - end + when /UTF[-_ ]?8/i: "utf-8" + when /(iso[-_ ])?latin[-_ ]?1$/i: "ISO-8859-1" + when /iso[-_ ]?8859[-_ ]?15/i: 'ISO-8859-15' + when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i: "utf-7" + else charset + end - # Convert: - # - # Remember - Iconv.open(to, from)! - Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2] + begin + Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2] + rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e + Redwood::log "warning: error (#{e.class.name}) decoding text from #{charset} to #{target}: #{text[0 ... 20]}" + text + end end end