ascii85.rb in Ascii85-1.0.0

- old
+ new

@@ -1,5 +1,8 @@
+# encoding: utf-8
+
+
 #
 # Ascii85 is an implementation of Adobe's binary-to-text encoding of the same
 # name in pure Ruby.
 #
 # See http://www.adobe.com/products/postscript/pdfs/PLRM.pdf page 131 and
@@ -10,17 +13,17 @@
 #
 
 
 module Ascii85
   # The gem version number
-  VERSION = '0.9.0' # :nodoc:
+  VERSION = '1.0.0' # :nodoc:
 
   #
-  # Encodes the given String as Ascii85.
+  # Encodes the bytes of the given String as Ascii85.
   #
-  # If +wrap_lines+ evaluates to +false+, the output will be returned as a
-  # single long line. Otherwise #encode formats the output into lines of
+  # If +wrap_lines+ evaluates to +false+, the output will be returned as
+  # a single long line. Otherwise #encode formats the output into lines of
   # length +wrap_lines+ (minimum is 2).
   #
   #     Ascii85::encode("Ruby")
   #     => <~;KZGo~>
   #
@@ -34,26 +37,34 @@
   #     => <~;g!%jEarNoBkDBoB5)0rF*),+AU&0.@;KXgDe!L"F`R~>
   #
   #
   def self.encode(str, wrap_lines = 80)
 
-    return '' if str.to_s.empty?
+    to_encode = str.to_s
+    return '' if to_encode.empty?
 
+    # Deal with multi-byte encodings
+    if to_encode.methods.include?(:bytesize)
+      input_size = to_encode.bytesize
+    else
+      input_size = to_encode.size
+    end
+
     # Compute number of \0s to pad the message with (0..3)
-    padding_length = (-str.to_s.length) % 4
+    padding_length = (-input_size) % 4
 
     # Extract big-endian integers
-    tuples = (str.to_s + ("\0" * padding_length)).unpack('N*')
+    tuples = (to_encode + ("\0" * padding_length)).unpack('N*')
 
     # Encode
     tuples.map! do |tuple|
       if tuple == 0
         'z'
       else
         tmp = ""
         5.times do
-          tmp += ((tuple % 85) + 33).chr
+          tmp << ((tuple % 85) + 33).chr
           tuple /= 85
         end
         tmp.reverse
       end
     end
@@ -64,42 +75,42 @@
     end
 
     # Cut off the padding
     tuples[-1] = tuples[-1][0..(4 - padding_length)]
 
-    # Add start-marker and join into a String
-    result = '<~' + tuples.join
-
-    # If we don't need to wrap the lines to a certain length, add ~> and return
+    # If we don't need to wrap the lines, add delimiters and return
     if (!wrap_lines)
-      return result + '~>'
+      return '<~' + tuples.join + '~>'
     end
 
     # Otherwise we wrap the lines
 
     line_length = [2, wrap_lines.to_i].max
 
     wrapped = []
-    0.step(result.length, line_length) do |index|
-      wrapped << result.slice(index, line_length)
+    to_wrap = '<~' + tuples.join
+
+    0.step(to_wrap.length, line_length) do |index|
+      wrapped << to_wrap.slice(index, line_length)
     end
 
-    # Add end-marker -- on a new line if necessary
+    # Add end-marker – on a new line if necessary
     if (wrapped.last.length + 2) > line_length
       wrapped << '~>'
     else
-      wrapped[-1] += '~>'
+      wrapped[-1] << '~>'
     end
 
     return wrapped.join("\n")
   end
 
   #
   # Searches through +str+ and decodes the _first_ Ascii85-String found.
   #
-  # #decode expects an Ascii85-encoded String enclosed in <~ and ~>. It will
-  # ignore all characters outside these markers.
+  # #decode expects an Ascii85-encoded String enclosed in <~ and ~> — it
+  # will ignore all characters outside these markers. The returned strings are
+  # always encoded as ASCII-8BIT.
   #
   #     Ascii85::decode("<~;KZGo~>")
   #     => "Ruby"
   #
   #     Ascii85::decode("Foo<~;KZGo~>Bar<~;KZGo~>Baz")
@@ -111,20 +122,37 @@
   # #decode will raise Ascii85::DecodingError when malformed input is
   # encountered.
   #
   def self.decode(str)
 
-    # Find the Ascii85 encoded data between <~ and ~>
-    input = str.to_s.match(/<~.*?~>/mn)
+    input = str.to_s
 
-    return '' if input.nil?
+    # Try to compile the regular expression for finding the input between
+    # the <~ and ~> delimiters. In order to work properly with different
+    # input encodings, the RegExp itself is re-encoded to the input encoding
+    # if possible. Thanks to Myrddin Emrys for suggesting this approach
+    # (http://is.gd/5x18O)
+    begin
+      regex = "<~(.*?)?~>"
 
-    # Remove the delimiters
-    input = input.to_s[2..-3]
+      if regex.methods.include?(:encode)
+        regex = regex.encode(input.encoding)
+      end
+      regex = Regexp.compile(regex, Regexp::MULTILINE)
 
-    return '' if input.empty?
+      # Find the actual data to be decoded
+      input = input.match(regex)
 
+    rescue EncodingError
+      raise ArgumentError, "Incompatible input encoding: #{str.encoding.inspect}"
+    end
+
+    return '' if input.nil?
+
+    # Get the matched data as String
+    input = input.captures.first
+
     # Decode
     result = []
 
     count = 0
     word = 0
@@ -180,12 +208,12 @@
       end
 
       count -= 1
       word += 85**(4 - count)
 
-      result += ((word >> 24) & 255).chr if count >= 1
-      result += ((word >> 16) & 255).chr if count >= 2
-      result += ((word >>  8) & 255).chr if count == 3
+      result << ((word >> 24) & 255).chr if count >= 1
+      result << ((word >> 16) & 255).chr if count >= 2
+      result << ((word >>  8) & 255).chr if count == 3
     end
 
     return result
   end