lib/sup/util.rb in sup-0.7 vs lib/sup/util.rb in sup-0.8
- old
+ new
@@ -170,10 +170,12 @@
end
end
end
class String
+ def display_length; scan(/./u).size end
+
def camel_to_hyphy
self.gsub(/([a-z])([A-Z0-9])/, '\1-\2').downcase
end
def find_all_positions x
@@ -186,15 +188,10 @@
start = pos + 1
end
ret
end
- ## one of the few things i miss from perl
- def ucfirst
- self[0 .. 0].upcase + self[1 .. -1]
- end
-
## a very complicated regex found on teh internets to split on
## commas, unless they occurr within double quotes.
def split_on_commas
split(/,\s*(?=(?:[^"]*"[^"]*")*(?![^"]*"))/)
end
@@ -274,10 +271,15 @@
end
def normalize_whitespace
gsub(/\t/, " ").gsub(/\r/, "")
end
+
+ ## takes a space-separated list of words, and returns an array of symbols.
+ ## typically used in Sup for translating Ferret's representation of a list
+ ## of labels (a string) to an array of label symbols.
+ def symbolistize; split.map { |x| x.intern } end
end
class Numeric
def clamp min, max
if self < min
@@ -401,10 +403,14 @@
def to_boolean_h; Hash[*map { |x| [x, true] }.flatten]; end
def last= e; self[-1] = e end
def nonempty?; !empty? end
+
+ def to_set_of_symbols
+ map { |x| x.is_a?(Symbol) ? x : x.intern }.uniq
+ end
end
class Time
def to_indexable_s
sprintf "%012d", self
@@ -618,19 +624,22 @@
end
end
class Iconv
def self.easy_decode target, charset, text
- return text if charset =~ /^(x-unknown|unknown[-_]?8bit|ascii[-_]?7[-_]?bit)$/i
+ return text if charset =~ /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i
charset = case charset
- when /UTF[-_]?8/i: "utf-8"
- when /(iso[-_])?latin[-_]?1$/i: "ISO-8859-1"
- when /unicode[-_]1[-_]1[-_]utf[-_]7/i: "utf-7"
- else charset
- end
+ when /UTF[-_ ]?8/i: "utf-8"
+ when /(iso[-_ ])?latin[-_ ]?1$/i: "ISO-8859-1"
+ when /iso[-_ ]?8859[-_ ]?15/i: 'ISO-8859-15'
+ when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i: "utf-7"
+ else charset
+ end
- # Convert:
- #
- # Remember - Iconv.open(to, from)!
- Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2]
+ begin
+ Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2]
+ rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e
+ Redwood::log "warning: error (#{e.class.name}) decoding text from #{charset} to #{target}: #{text[0 ... 20]}"
+ text
+ end
end
end