# encoding: utf-8 # # Ascii85 is an implementation of Adobe's binary-to-text encoding of the # same name in pure Ruby. # # See http://www.adobe.com/products/postscript/pdfs/PLRM.pdf page 131 # and http://en.wikipedia.org/wiki/Ascii85 for more information about # the format. # # Author:: Johannes Holzfuß (DataWraith@web.de) # License:: Distributed under the MIT License (see README.rdoc) # module Ascii85 # # Encodes the bytes of the given String as Ascii85. # # If +wrap_lines+ evaluates to +false+, the output will be returned as # a single long line. Otherwise #encode formats the output into lines # of length +wrap_lines+ (minimum is 2). # # Ascii85.encode("Ruby") # => <~;KZGo~> # # Ascii85.encode("Supercalifragilisticexpialidocious", 15) # => <~;g!%jEarNoBkD # BoB5)0rF*),+AU& # 0.@;KXgDe!L"F`R # ~> # # Ascii85.encode("Supercalifragilisticexpialidocious", false) # => <~;g!%jEarNoBkDBoB5)0rF*),+AU&0.@;KXgDe!L"F`R~> # # def self.encode(str, wrap_lines = 80) to_encode = str.to_s return '' if to_encode.empty? # Deal with multi-byte encodings if to_encode.methods.include?(:bytesize) input_size = to_encode.bytesize else input_size = to_encode.size end # Compute number of \0s to pad the message with (0..3) padding_length = (-input_size) % 4 # Extract big-endian integers tuples = (to_encode + ("\0" * padding_length)).unpack('N*') # Encode tuples.map! do |tuple| if tuple == 0 'z' else tmp = "" 5.times do tmp << ((tuple % 85) + 33).chr tuple /= 85 end tmp.reverse end end # We can't use the z-abbreviation if we're going to cut off padding if (padding_length > 0) and (tuples.last == 'z') tuples[-1] = '!!!!!' end # Cut off the padding tuples[-1] = tuples[-1][0..(4 - padding_length)] # If we don't need to wrap the lines, add delimiters and return if (!wrap_lines) return '<~' + tuples.join + '~>' end # Otherwise we wrap the lines line_length = [2, wrap_lines.to_i].max wrapped = [] to_wrap = '<~' + tuples.join 0.step(to_wrap.length, line_length) do |index| wrapped << to_wrap.slice(index, line_length) end # Add end-marker – on a new line if necessary if (wrapped.last.length + 2) > line_length wrapped << '~>' else wrapped[-1] << '~>' end return wrapped.join("\n") end # # Searches through +str+ and decodes the _first_ Ascii85-String found. # # #decode expects an Ascii85-encoded String enclosed in <~ and ~> — it will # ignore all characters outside these markers. The returned strings are always # encoded as ASCII-8BIT. # # Ascii85.decode("<~;KZGo~>") # => "Ruby" # # Ascii85.decode("Foo<~;KZGo~>Bar<~;KZGo~>Baz") # => "Ruby" # # Ascii85.decode("No markers") # => "" # # #decode will raise Ascii85::DecodingError when malformed input is # encountered. # def self.decode(str) input = str.to_s # Try to compile the regular expression for finding the input between # the <~ and ~> delimiters. In order to work properly with different # input encodings, the RegExp itself is re-encoded to the input encoding # if possible. Thanks to Myrddin Emrys for suggesting this approach # (http://is.gd/5x18O) begin regex = "<~(.*?)?~>" if regex.methods.include?(:encode) regex = regex.encode(input.encoding) end regex = Regexp.compile(regex, Regexp::MULTILINE) # Find the actual data to be decoded input = input.match(regex) rescue EncodingError raise ArgumentError, "Incompatible input encoding: #{str.encoding.inspect}" end return '' if input.nil? # Get the matched data as String input = input.captures.first # Decode result = [] count = 0 word = 0 input.each_byte do |c| case c.chr when /[ \t\r\n\f\0]/ # Ignore whitespace next when 'z' if count == 0 # Expand z to 0-word result << 0 else raise(Ascii85::DecodingError, "Found 'z' inside Ascii85 5-tuple") end when '!'..'u' # Decode 5 characters into a 4-byte word word += (c - 33) * 85**(4 - count) count += 1 if count == 5 if word > 0xffffffff raise(Ascii85::DecodingError, "Invalid Ascii85 5-tuple (#{word} >= 2**32)") end result << word word = 0 count = 0 end else raise(Ascii85::DecodingError, "Illegal character inside Ascii85: #{c.chr.dump}") end end # Convert result into a String result = result.pack('N*') if count > 0 # Finish last, partially decoded 32-bit-word if count == 1 raise(Ascii85::DecodingError, "Last 5-tuple consists of single character") end count -= 1 word += 85**(4 - count) result << ((word >> 24) & 255).chr if count >= 1 result << ((word >> 16) & 255).chr if count >= 2 result << ((word >> 8) & 255).chr if count == 3 end return result end # # This error is raised when Ascii85.decode encounters one of the following # problems in the input: # # * An invalid character. Valid characters are '!'..'u' and 'z'. # * A 'z' character inside a 5-tuple. 'z's are only valid on their own. # * An invalid 5-tuple that decodes to >= 2**32 # * The last tuple consisting of a single character. Valid tuples always have # at least two characters. # class DecodingError < StandardError; end end