lib/sup/util.rb in sup-0.9.1 vs lib/sup/util.rb in sup-0.10
- old
+ new
@@ -175,11 +175,11 @@
class String
## nasty multibyte hack for ruby 1.8. if it's utf-8, split into chars using
## the utf8 regex and count those. otherwise, use the byte length.
def display_length
- if $encoding == "UTF-8" || $encoding == "utf8"
+ if RUBY_VERSION < '1.9.1' && ($encoding == "UTF-8" || $encoding == "utf8")
scan(/./u).size
else
size
end
end
@@ -288,16 +288,49 @@
def ord
self[0]
end
end
+ unless method_defined? :each
+ def each &b
+ each_line &b
+ end
+ end
+
## takes a list of words, and returns an array of symbols. typically used in
## Sup for translating Ferret's representation of a list of labels (a string)
## to an array of label symbols.
##
## split_on will be passed to String#split, so you can leave this nil for space.
def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end
+
+ class CheckError < ArgumentError; end
+ def check
+ begin
+ fail "unexpected encoding #{encoding}" if respond_to?(:encoding) && !(encoding == Encoding::UTF_8 || encoding == Encoding::ASCII)
+ fail "invalid encoding" if respond_to?(:valid_encoding?) && !valid_encoding?
+ rescue
+ raise CheckError.new($!.message)
+ end
+ end
+
+ def ascii
+ out = ""
+ each_byte do |b|
+ if (b & 128) != 0
+ out << "\\x#{b.to_s 16}"
+ else
+ out << b.chr
+ end
+ end
+ out.force_encoding Encoding::UTF_8 if out.respond_to? :force_encoding
+ out
+ end
+
+ def transcode src_encoding=$encoding
+ Iconv.easy_decode $encoding, src_encoding, self
+ end
end
class Numeric
def clamp min, max
if self < min
@@ -484,13 +517,13 @@
strftime "%b %Y"
elsif month != from.month
strftime "%b %e"
else
if is_the_same_day? from
- strftime("%l:%M%P")
+ strftime("%l:%M%p").downcase # emulate %P (missing on ruby 1.8 darwin)
elsif is_the_day_before? from
- "Yest." + nearest_hour.strftime("%l%P")
+ "Yest." + nearest_hour.strftime("%l%p").downcase # emulate %P
else
strftime "%b %e"
end
end
end
@@ -639,23 +672,28 @@
@m.synchronize { !@over && @over = true }
end
end
class Iconv
- def self.easy_decode target, charset, text
- return text if charset =~ /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i
- charset = case charset
+ def self.easy_decode target, orig_charset, text
+ if text.respond_to? :force_encoding
+ text = text.dup
+ text.force_encoding Encoding::BINARY
+ end
+ charset = case orig_charset
when /UTF[-_ ]?8/i then "utf-8"
when /(iso[-_ ])?latin[-_ ]?1$/i then "ISO-8859-1"
when /iso[-_ ]?8859[-_ ]?15/i then 'ISO-8859-15'
when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i then "utf-7"
- else charset
+ when /^euc$/i then 'EUC-JP' # XXX try them all?
+ when /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i then 'ASCII'
+ else orig_charset
end
begin
- Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2]
- rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::InvalidCharacter, Iconv::IllegalSequence => e
- warn "couldn't transcode text from #{charset} to #{target} (\"#{text[0 ... 20]}\"...) (got #{e.message}); using original as is"
- text
+ returning(Iconv.iconv(target, charset, text + " ").join[0 .. -2]) { |str| str.check }
+ rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::InvalidCharacter, Iconv::IllegalSequence, String::CheckError
+ debug "couldn't transcode text from #{orig_charset} (#{charset}) to #{target}) (#{text[0 ... 20].inspect}...) (got #{$!.message} (#{$!.class}))"
+ text.ascii
end
end
end