lib/ascii85.rb in Ascii85-0.9.0 vs lib/ascii85.rb in Ascii85-1.0.0

- old
+ new

@@ -1,5 +1,8 @@ +# encoding: utf-8 + + # # Ascii85 is an implementation of Adobe's binary-to-text encoding of the same # name in pure Ruby. # # See http://www.adobe.com/products/postscript/pdfs/PLRM.pdf page 131 and @@ -10,17 +13,17 @@ # module Ascii85 # The gem version number - VERSION = '0.9.0' # :nodoc: + VERSION = '1.0.0' # :nodoc: # - # Encodes the given String as Ascii85. + # Encodes the bytes of the given String as Ascii85. # - # If +wrap_lines+ evaluates to +false+, the output will be returned as a - # single long line. Otherwise #encode formats the output into lines of + # If +wrap_lines+ evaluates to +false+, the output will be returned as + # a single long line. Otherwise #encode formats the output into lines of # length +wrap_lines+ (minimum is 2). # # Ascii85::encode("Ruby") # => <~;KZGo~> # @@ -34,26 +37,34 @@ # => <~;g!%jEarNoBkDBoB5)0rF*),+AU&0.@;KXgDe!L"F`R~> # # def self.encode(str, wrap_lines = 80) - return '' if str.to_s.empty? + to_encode = str.to_s + return '' if to_encode.empty? + # Deal with multi-byte encodings + if to_encode.methods.include?(:bytesize) + input_size = to_encode.bytesize + else + input_size = to_encode.size + end + # Compute number of \0s to pad the message with (0..3) - padding_length = (-str.to_s.length) % 4 + padding_length = (-input_size) % 4 # Extract big-endian integers - tuples = (str.to_s + ("\0" * padding_length)).unpack('N*') + tuples = (to_encode + ("\0" * padding_length)).unpack('N*') # Encode tuples.map! do |tuple| if tuple == 0 'z' else tmp = "" 5.times do - tmp += ((tuple % 85) + 33).chr + tmp << ((tuple % 85) + 33).chr tuple /= 85 end tmp.reverse end end @@ -64,42 +75,42 @@ end # Cut off the padding tuples[-1] = tuples[-1][0..(4 - padding_length)] - # Add start-marker and join into a String - result = '<~' + tuples.join - - # If we don't need to wrap the lines to a certain length, add ~> and return + # If we don't need to wrap the lines, add delimiters and return if (!wrap_lines) - return result + '~>' + return '<~' + tuples.join + '~>' end # Otherwise we wrap the lines line_length = [2, wrap_lines.to_i].max wrapped = [] - 0.step(result.length, line_length) do |index| - wrapped << result.slice(index, line_length) + to_wrap = '<~' + tuples.join + + 0.step(to_wrap.length, line_length) do |index| + wrapped << to_wrap.slice(index, line_length) end - # Add end-marker -- on a new line if necessary + # Add end-marker – on a new line if necessary if (wrapped.last.length + 2) > line_length wrapped << '~>' else - wrapped[-1] += '~>' + wrapped[-1] << '~>' end return wrapped.join("\n") end # # Searches through +str+ and decodes the _first_ Ascii85-String found. # - # #decode expects an Ascii85-encoded String enclosed in <~ and ~>. It will - # ignore all characters outside these markers. + # #decode expects an Ascii85-encoded String enclosed in <~ and ~> — it + # will ignore all characters outside these markers. The returned strings are + # always encoded as ASCII-8BIT. # # Ascii85::decode("<~;KZGo~>") # => "Ruby" # # Ascii85::decode("Foo<~;KZGo~>Bar<~;KZGo~>Baz") @@ -111,20 +122,37 @@ # #decode will raise Ascii85::DecodingError when malformed input is # encountered. # def self.decode(str) - # Find the Ascii85 encoded data between <~ and ~> - input = str.to_s.match(/<~.*?~>/mn) + input = str.to_s - return '' if input.nil? + # Try to compile the regular expression for finding the input between + # the <~ and ~> delimiters. In order to work properly with different + # input encodings, the RegExp itself is re-encoded to the input encoding + # if possible. Thanks to Myrddin Emrys for suggesting this approach + # (http://is.gd/5x18O) + begin + regex = "<~(.*?)?~>" - # Remove the delimiters - input = input.to_s[2..-3] + if regex.methods.include?(:encode) + regex = regex.encode(input.encoding) + end + regex = Regexp.compile(regex, Regexp::MULTILINE) - return '' if input.empty? + # Find the actual data to be decoded + input = input.match(regex) + rescue EncodingError + raise ArgumentError, "Incompatible input encoding: #{str.encoding.inspect}" + end + + return '' if input.nil? + + # Get the matched data as String + input = input.captures.first + # Decode result = [] count = 0 word = 0 @@ -180,12 +208,12 @@ end count -= 1 word += 85**(4 - count) - result += ((word >> 24) & 255).chr if count >= 1 - result += ((word >> 16) & 255).chr if count >= 2 - result += ((word >> 8) & 255).chr if count == 3 + result << ((word >> 24) & 255).chr if count >= 1 + result << ((word >> 16) & 255).chr if count >= 2 + result << ((word >> 8) & 255).chr if count == 3 end return result end