util.rb in sup-0.9

- old
+ new

@@ -1,9 +1,10 @@
 require 'thread'
 require 'lockfile'
 require 'mime/types'
 require 'pathname'
+require 'set'
 
 ## time for some monkeypatching!
 class Lockfile
   def gen_lock_id
     Hash[
@@ -22,10 +23,11 @@
     end
 
   def lockinfo_on_disk
     h = load_lock_id IO.read(path)
     h['mtime'] = File.mtime path
+    h['path'] = path
     h
   end
 
   def touch_yourself; touch path end
 end
@@ -88,11 +90,11 @@
   end
 end
 
 class Range
   ## only valid for integer ranges (unless I guess it's exclusive)
-  def size 
+  def size
     last - first + (exclude_end? ? 0 : 1)
   end
 end
 
 class Module
@@ -131,23 +133,23 @@
   def returning x; yield x; x; end
 
   ## clone of java-style whole-method synchronization
   ## assumes a @mutex variable
   ## TODO: clean up, try harder to avoid namespace collisions
-  def synchronized *meth
-    meth.each do
+  def synchronized *methods
+    methods.each do |meth|
       class_eval <<-EOF
         alias unsynchronized_#{meth} #{meth}
         def #{meth}(*a, &b)
           @mutex.synchronize { unsynchronized_#{meth}(*a, &b) }
         end
       EOF
     end
   end
 
-  def ignore_concurrent_calls *meth
-    meth.each do
+  def ignore_concurrent_calls *methods
+    methods.each do |meth|
       mutex = "@__concurrent_protector_#{meth}"
       flag = "@__concurrent_flag_#{meth}"
       oldmeth = "__unprotected_#{meth}"
       class_eval <<-EOF
         alias #{oldmeth} #{meth}
@@ -173,11 +175,11 @@
 
 class String
   ## nasty multibyte hack for ruby 1.8. if it's utf-8, split into chars using
   ## the utf8 regex and count those. otherwise, use the byte length.
   def display_length
-    if $encoding == "UTF-8"
+    if $encoding == "UTF-8" || $encoding == "utf8"
       scan(/./u).size
     else
       size
     end
   end
@@ -211,44 +213,44 @@
     state = :outstring
     pos = 0
     region_start = 0
     while pos <= length
       newpos = case state
-        when :escaped_instring, :escaped_outstring: pos
+        when :escaped_instring, :escaped_outstring then pos
         else index(/[,"\\]/, pos)
-      end 
-      
+      end
+
       if newpos
         char = self[newpos]
       else
         char = nil
         newpos = length
       end
 
       case char
       when ?"
         state = case state
-          when :outstring: :instring
-          when :instring: :outstring
-          when :escaped_instring: :instring
-          when :escaped_outstring: :outstring
+          when :outstring then :instring
+          when :instring then :outstring
+          when :escaped_instring then :instring
+          when :escaped_outstring then :outstring
         end
       when ?,, nil
         state = case state
-          when :outstring, :escaped_outstring:
+          when :outstring, :escaped_outstring then
             ret << self[region_start ... newpos].gsub(/^\s+|\s+$/, "")
             region_start = newpos + 1
             :outstring
-          when :instring: :instring
-          when :escaped_instring: :instring
+          when :instring then :instring
+          when :escaped_instring then :instring
         end
       when ?\\
         state = case state
-          when :instring: :escaped_instring
-          when :outstring: :escaped_outstring
-          when :escaped_instring: :instring
-          when :escaped_outstring: :outstring
+          when :instring then :escaped_instring
+          when :outstring then :escaped_outstring
+          when :escaped_instring then :instring
+          when :escaped_outstring then :outstring
         end
       end
       pos = newpos + 1
     end
 
@@ -280,14 +282,22 @@
 
   def normalize_whitespace
     gsub(/\t/, "    ").gsub(/\r/, "")
   end
 
-  ## takes a space-separated list of words, and returns an array of symbols.
-  ## typically used in Sup for translating Ferret's representation of a list
-  ## of labels (a string) to an array of label symbols.
-  def symbolistize; split.map { |x| x.intern } end
+  unless method_defined? :ord
+    def ord
+      self[0]
+    end
+  end
+
+  ## takes a list of words, and returns an array of symbols.  typically used in
+  ## Sup for translating Ferret's representation of a list of labels (a string)
+  ## to an array of label symbols.
+  ##
+  ## split_on will be passed to String#split, so you can leave this nil for space.
+  def to_set_of_symbols split_on=nil; Set.new split(split_on).map { |x| x.strip.intern } end
 end
 
 class Numeric
   def clamp min, max
     if self < min
@@ -411,14 +421,10 @@
 
   def to_boolean_h; Hash[*map { |x| [x, true] }.flatten]; end
 
   def last= e; self[-1] = e end
   def nonempty?; !empty? end
-
-  def to_set_of_symbols
-    map { |x| x.is_a?(Symbol) ? x : x.intern }.uniq
-  end
 end
 
 class Time
   def to_indexable_s
     sprintf "%012d", self
@@ -488,39 +494,41 @@
       end
     end
   end
 end
 
-## simple singleton module. far less complete and insane than the ruby
-## standard library one, but automatically forwards methods calls and
-## allows for constructors that take arguments.
+## simple singleton module. far less complete and insane than the ruby standard
+## library one, but it automatically forwards methods calls and allows for
+## constructors that take arguments.
 ##
-## You must have #initialize call "self.class.i_am_the_instance self"
-## at some point or everything will fail horribly.
+## classes that inherit this can define initialize. however, you cannot call
+## .new on the class. To get the instance of the class, call .instance;
+## to create the instance, call init.
 module Singleton
   module ClassMethods
     def instance; @instance; end
     def instantiated?; defined?(@instance) && !@instance.nil?; end
     def deinstantiate!; @instance = nil; end
     def method_missing meth, *a, &b
-      raise "no instance defined!" unless defined? @instance
+      raise "no #{name} instance defined in method call to #{meth}!" unless defined? @instance
 
       ## if we've been deinstantiated, just drop all calls. this is
       ## useful because threads that might be active during the
       ## cleanup process (e.g. polling) would otherwise have to
       ## special-case every call to a Singleton object
       return nil if @instance.nil?
 
       @instance.send meth, *a, &b
     end
-    def i_am_the_instance o
+    def init *args
       raise "there can be only one! (instance)" if defined? @instance
-      @instance = o
+      @instance = new(*args)
     end
   end
 
   def self.included klass
+    klass.private_class_method :allocate, :new
     klass.extend ClassMethods
   end
 end
 
 ## wraps an object. if it throws an exception, keeps a copy.
@@ -535,11 +543,11 @@
 
   def clear_error!; @error = nil; end
   def has_errors?; !@error.nil?; end
 
   def method_missing m, *a, &b; __pass m, *a, &b end
-  
+
   def id; __pass :id; end
   def to_s; __pass :to_s; end
   def to_yaml x; __pass :to_yaml, x; end
   def is_a? c; @o.is_a? c; end
 
@@ -634,20 +642,20 @@
 
 class Iconv
   def self.easy_decode target, charset, text
     return text if charset =~ /^(x-unknown|unknown[-_ ]?8bit|ascii[-_ ]?7[-_ ]?bit)$/i
     charset = case charset
-      when /UTF[-_ ]?8/i: "utf-8"
-      when /(iso[-_ ])?latin[-_ ]?1$/i: "ISO-8859-1"
-      when /iso[-_ ]?8859[-_ ]?15/i: 'ISO-8859-15'
-      when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i: "utf-7"
+      when /UTF[-_ ]?8/i then "utf-8"
+      when /(iso[-_ ])?latin[-_ ]?1$/i then "ISO-8859-1"
+      when /iso[-_ ]?8859[-_ ]?15/i then 'ISO-8859-15'
+      when /unicode[-_ ]1[-_ ]1[-_ ]utf[-_]7/i then "utf-7"
       else charset
     end
 
     begin
       Iconv.iconv(target + "//IGNORE", charset, text + " ").join[0 .. -2]
-    rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::IllegalSequence => e
-      Redwood::log "warning: error (#{e.class.name}) decoding text from #{charset} to #{target}: #{text[0 ... 20]}"
+    rescue Errno::EINVAL, Iconv::InvalidEncoding, Iconv::InvalidCharacter, Iconv::IllegalSequence => e
+      warn "couldn't transcode text from #{charset} to #{target} (\"#{text[0 ... 20]}\"...) (got #{e.message}); using original as is"
       text
     end
   end
 end