lib/ascii85.rb in Ascii85-0.9.0 vs lib/ascii85.rb in Ascii85-1.0.0
- old
+ new
@@ -1,5 +1,8 @@
+# encoding: utf-8
+
+
#
# Ascii85 is an implementation of Adobe's binary-to-text encoding of the same
# name in pure Ruby.
#
# See http://www.adobe.com/products/postscript/pdfs/PLRM.pdf page 131 and
@@ -10,17 +13,17 @@
#
module Ascii85
# The gem version number
- VERSION = '0.9.0' # :nodoc:
+ VERSION = '1.0.0' # :nodoc:
#
- # Encodes the given String as Ascii85.
+ # Encodes the bytes of the given String as Ascii85.
#
- # If +wrap_lines+ evaluates to +false+, the output will be returned as a
- # single long line. Otherwise #encode formats the output into lines of
+ # If +wrap_lines+ evaluates to +false+, the output will be returned as
+ # a single long line. Otherwise #encode formats the output into lines of
# length +wrap_lines+ (minimum is 2).
#
# Ascii85::encode("Ruby")
# => <~;KZGo~>
#
@@ -34,26 +37,34 @@
# => <~;g!%jEarNoBkDBoB5)0rF*),+AU&0.@;KXgDe!L"F`R~>
#
#
def self.encode(str, wrap_lines = 80)
- return '' if str.to_s.empty?
+ to_encode = str.to_s
+ return '' if to_encode.empty?
+ # Deal with multi-byte encodings
+ if to_encode.methods.include?(:bytesize)
+ input_size = to_encode.bytesize
+ else
+ input_size = to_encode.size
+ end
+
# Compute number of \0s to pad the message with (0..3)
- padding_length = (-str.to_s.length) % 4
+ padding_length = (-input_size) % 4
# Extract big-endian integers
- tuples = (str.to_s + ("\0" * padding_length)).unpack('N*')
+ tuples = (to_encode + ("\0" * padding_length)).unpack('N*')
# Encode
tuples.map! do |tuple|
if tuple == 0
'z'
else
tmp = ""
5.times do
- tmp += ((tuple % 85) + 33).chr
+ tmp << ((tuple % 85) + 33).chr
tuple /= 85
end
tmp.reverse
end
end
@@ -64,42 +75,42 @@
end
# Cut off the padding
tuples[-1] = tuples[-1][0..(4 - padding_length)]
- # Add start-marker and join into a String
- result = '<~' + tuples.join
-
- # If we don't need to wrap the lines to a certain length, add ~> and return
+ # If we don't need to wrap the lines, add delimiters and return
if (!wrap_lines)
- return result + '~>'
+ return '<~' + tuples.join + '~>'
end
# Otherwise we wrap the lines
line_length = [2, wrap_lines.to_i].max
wrapped = []
- 0.step(result.length, line_length) do |index|
- wrapped << result.slice(index, line_length)
+ to_wrap = '<~' + tuples.join
+
+ 0.step(to_wrap.length, line_length) do |index|
+ wrapped << to_wrap.slice(index, line_length)
end
- # Add end-marker -- on a new line if necessary
+ # Add end-marker – on a new line if necessary
if (wrapped.last.length + 2) > line_length
wrapped << '~>'
else
- wrapped[-1] += '~>'
+ wrapped[-1] << '~>'
end
return wrapped.join("\n")
end
#
# Searches through +str+ and decodes the _first_ Ascii85-String found.
#
- # #decode expects an Ascii85-encoded String enclosed in <~ and ~>. It will
- # ignore all characters outside these markers.
+ # #decode expects an Ascii85-encoded String enclosed in <~ and ~> — it
+ # will ignore all characters outside these markers. The returned strings are
+ # always encoded as ASCII-8BIT.
#
# Ascii85::decode("<~;KZGo~>")
# => "Ruby"
#
# Ascii85::decode("Foo<~;KZGo~>Bar<~;KZGo~>Baz")
@@ -111,20 +122,37 @@
# #decode will raise Ascii85::DecodingError when malformed input is
# encountered.
#
def self.decode(str)
- # Find the Ascii85 encoded data between <~ and ~>
- input = str.to_s.match(/<~.*?~>/mn)
+ input = str.to_s
- return '' if input.nil?
+ # Try to compile the regular expression for finding the input between
+ # the <~ and ~> delimiters. In order to work properly with different
+ # input encodings, the RegExp itself is re-encoded to the input encoding
+ # if possible. Thanks to Myrddin Emrys for suggesting this approach
+ # (http://is.gd/5x18O)
+ begin
+ regex = "<~(.*?)?~>"
- # Remove the delimiters
- input = input.to_s[2..-3]
+ if regex.methods.include?(:encode)
+ regex = regex.encode(input.encoding)
+ end
+ regex = Regexp.compile(regex, Regexp::MULTILINE)
- return '' if input.empty?
+ # Find the actual data to be decoded
+ input = input.match(regex)
+ rescue EncodingError
+ raise ArgumentError, "Incompatible input encoding: #{str.encoding.inspect}"
+ end
+
+ return '' if input.nil?
+
+ # Get the matched data as String
+ input = input.captures.first
+
# Decode
result = []
count = 0
word = 0
@@ -180,12 +208,12 @@
end
count -= 1
word += 85**(4 - count)
- result += ((word >> 24) & 255).chr if count >= 1
- result += ((word >> 16) & 255).chr if count >= 2
- result += ((word >> 8) & 255).chr if count == 3
+ result << ((word >> 24) & 255).chr if count >= 1
+ result << ((word >> 16) & 255).chr if count >= 2
+ result << ((word >> 8) & 255).chr if count == 3
end
return result
end