lib/marshal/structure.rb in marshal-structure-1.1.1 vs lib/marshal/structure.rb in marshal-structure-2.0

- old
+ new

@@ -1,168 +1,77 @@ ## # Marshal::Structure dumps a nested Array describing the structure of a -# Marshal stream. +# Marshal stream. Marshal format 4.8 (Ruby 1.8 through 2.x) is supported. # -# Marshal format 4.8 is supported. +# Examples: +# +# To dump the structure of a Marshal stream: +# +# ruby -rpp -rmarshal/structure \ +# -e 'pp Marshal::Structure.load Marshal.dump "hello"' +# +# Fancier usage: +# +# require 'pp' +# require 'marshal/structure' +# +# ms = Marshal::Structure.new Marshal.dump %w[hello world] +# +# # print the stream structure +# pp ms.structure +# +# # show how many allocations are required to load the stream +# p ms.count_allocations + class Marshal::Structure ## - # Version of Marshal::Structure you are using + # Generic error class for Marshal::Structure - VERSION = '1.1.1' + class Error < RuntimeError + end ## - # Supported major Marshal version + # Raised when the Marshal stream is at the end - MAJOR_VERSION = 4 + class EndOfMarshal < Error - ## - # Supported minor Marshal version + ## + # Number of bytes of Marshal stream consumed - MINOR_VERSION = 8 + attr_reader :consumed - ## - # nil type prefix + ## + # Requested number of bytes that was not fulfillable - TYPE_NIL = '0' + attr_reader :requested - ## - # true type prefix + ## + # Creates a new EndOfMarshal exception. Marshal::Structure previously + # read +consumed+ bytes and was unable to fulfill the request for + # +requested+ additional bytes. - TYPE_TRUE = 'T' + def initialize consumed, requested + @consumed = consumed + @requested = requested - ## - # false type prefix + super "consumed #{consumed} bytes, requested #{requested} more" + end + end - TYPE_FALSE = 'F' - ## - # Fixnum type prefix + # Version of Marshal::Structure you are using - TYPE_FIXNUM = 'i' + VERSION = '2.0' ## - # An object that has been extended with a module + # The Marshal stream - TYPE_EXTENDED = 'e' + attr_reader :stream ## - # A subclass of a built-in type - - TYPE_UCLASS = 'C' - - ## - # A ruby Object - - TYPE_OBJECT = 'o' - - ## - # A wrapped C pointer - - TYPE_DATA = 'd' - - ## - # An object saved with _dump - - TYPE_USERDEF = 'u' - - ## - # An object saved with marshal_dump - - TYPE_USRMARSHAL = 'U' - - ## - # A Float - - TYPE_FLOAT = 'f' - - ## - # A Bignum - - TYPE_BIGNUM = 'l' - - ## - # A String - - TYPE_STRING = '"' - - ## - # A Regexp - - TYPE_REGEXP = '/' - - ## - # An Array - - TYPE_ARRAY = '[' - - ## - # A Hash - - TYPE_HASH = '{' - - ## - # A Hash with a default value (not proc) - - TYPE_HASH_DEF = '}' - - ## - # A Struct - - TYPE_STRUCT = 'S' - - ## - # An old-style Module (reference, not content) - # - # I'm not sure what makes this old. The byte stream is identical to - # TYPE_MODULE - - TYPE_MODULE_OLD = 'M' - - ## - # A class (reference, not content) - - TYPE_CLASS = 'c' - - ## - # A module (reference, not content) - - TYPE_MODULE = 'm' - - ## - # A Symbol - - TYPE_SYMBOL = ':' - - ## - # A reference to a previously Symbol - - TYPE_SYMLINK = ';' - - ## - # Instance variables for a following object - - TYPE_IVAR = 'I' - - ## - # A reference to a previously-stored Object - - TYPE_LINK = '@' - - ## - # Objects found in the Marshal stream. Since objects aren't constructed the - # actual object won't be present in this list. - - attr_reader :objects - - ## - # Symbols found in the Marshal stream - - attr_reader :symbols - - ## # Returns the structure of the Marshaled object +obj+ as nested Arrays. # # For +true+, +false+ and +nil+ the symbol +:true+, +:false+, +:nil+ is # returned, respectively. # @@ -176,28 +85,22 @@ def self.load obj if obj.respond_to? :to_str then data = obj.to_s elsif obj.respond_to? :read then data = obj.read - if data.empty? then - raise EOFError, "end of file reached" - end + raise EOFError, "end of file reached" if data.empty? elsif obj.respond_to? :getc then # FIXME - don't read all of it upfront data = '' - data << c while (c = obj.getc.chr) + + while c = obj.getc do + data << c.chr + end else raise TypeError, "instance of IO needed" end - major = data[0].ord - minor = data[1].ord - - if major != MAJOR_VERSION or minor > MINOR_VERSION then - raise TypeError, "incompatible marshal file format (can't be read)\n\tformat version #{MAJOR_VERSION}.#{MINOR_VERSION} required; #{major}.#{minor} given" - end - - new(data).construct + new(data).structure end ## # Dumps the structure of each item in +argv+. If +argv+ is empty standard # input is dumped. @@ -218,420 +121,49 @@ ## # Prepares processing of +stream+ def initialize stream - @objects = [] - @symbols = [] - - @stream = stream - @byte_array = stream.bytes.to_a - @consumed = 2 + @stream = stream + @tokenizer = Marshal::Structure::Tokenizer.new stream end ## - # Adds +obj+ to the objects list + # Counts allocations required to load the Marshal stream. See + # Marshal::Structure::AllocationsCounter for a description of how counting + # is performed. - def add_object obj - return if - [NilClass, TrueClass, FalseClass, Symbol, Fixnum].any? { |c| c === obj } + def count_allocations + counter = Marshal::Structure::AllocationCounter.new token_stream - index = @objects.size - @objects << obj - index + counter.count end ## - # Adds +symbol+ to the symbols list + # Loads the stream with Marshal.load - def add_symlink symbol - index = @symbols.size - @symbols << symbol - index + def load + Marshal.load @stream end ## - # Creates the structure for the remaining stream. + # Returns the structure of the Marshal stream. - def construct - type = consume_character + def structure + parser = Marshal::Structure::Parser.new token_stream - case type - when TYPE_NIL then - :nil - when TYPE_TRUE then - :true - when TYPE_FALSE then - :false - - when TYPE_ARRAY then - [:array, *construct_array] - when TYPE_BIGNUM then - [:bignum, *construct_bignum] - when TYPE_CLASS then - ref = store_unique_object Object.allocate - - [:class, ref, get_byte_sequence] - when TYPE_DATA then - [:data, *construct_data] - when TYPE_EXTENDED then - [:extended, get_symbol, construct] - when TYPE_FIXNUM then - [:fixnum, construct_integer] - when TYPE_FLOAT then - [:float, *construct_float] - when TYPE_HASH then - [:hash, *construct_hash] - when TYPE_HASH_DEF then - [:hash_default, *construct_hash_def] - when TYPE_IVAR then - [:instance_variables, construct, *construct_instance_variables] - when TYPE_LINK then - [:link, construct_integer] - when TYPE_MODULE, TYPE_MODULE_OLD then - ref = store_unique_object Object.allocate - - [:module, ref, get_byte_sequence] - when TYPE_OBJECT then - [:object, *construct_object] - when TYPE_REGEXP then - [:regexp, *construct_regexp] - when TYPE_STRING then - [:string, *construct_string] - when TYPE_STRUCT then - [:struct, *construct_struct] - when TYPE_SYMBOL then - [:symbol, *construct_symbol] - when TYPE_SYMLINK then - [:symbol_link, construct_integer] - when TYPE_USERDEF then - [:user_defined, *construct_user_defined] - when TYPE_USRMARSHAL then - [:user_marshal, *construct_user_marshal] - when TYPE_UCLASS then - name = get_symbol - - [:user_class, name, construct] - else - raise ArgumentError, "load error, unknown type #{type}" - end + parser.parse end ## - # Creates the body of an +:array+ object + # Returns an Enumerator for the tokens in the Marshal stream. - def construct_array - ref = store_unique_object Object.allocate - - obj = [ref] - - items = construct_integer - - obj << items - - items.times do - obj << construct - end - - obj + def token_stream + @tokenizer.tokens end - ## - # Creates the body of a +:bignum+ object - - def construct_bignum - sign = consume_byte == ?- ? -1 : 1 - size = construct_integer * 2 - - result = 0 - - data = consume_bytes size - - data.each_with_index do |data, exp| - result += (data * 2**(exp*8)) - end - - ref = store_unique_object Object.allocate - - [ref, sign, size, result] - end - - ## - # Creates the body of a wrapped C pointer object - - def construct_data - ref = store_unique_object Object.allocate - - [ref, get_symbol, construct] - end - - ## - # Creates the body of a +:float+ object - - def construct_float - float = get_byte_sequence - - ref = store_unique_object Object.allocate - - [ref, float] - end - - ## - # Creates the body of a +:hash+ object - - def construct_hash - ref = store_unique_object Object.allocate - - obj = [ref] - - pairs = construct_integer - obj << pairs - - pairs.times do - obj << construct - obj << construct - end - - obj - end - - ## - # Creates the body of a +:hash_def+ object - - def construct_hash_def - ref, hash = construct_hash - - [ref, hash, construct] - end - - ## - # Instance variables contain an object followed by a count of instance - # variables and their contents - - def construct_instance_variables - instance_variables = [] - - pairs = construct_integer - instance_variables << pairs - - pairs.times do - instance_variables << get_symbol - instance_variables << construct - end - - instance_variables - end - - ## - # Decodes a stored Fixnum - - def construct_integer - c = consume_byte - - # The format appears to be a simple integer compression format - # - # The 0-123 cases are easy, and use one byte - # We've read c as unsigned char in a way, but we need to honor - # the sign bit. We do that by simply comparing with the +128 values - return 0 if c == 0 - return c - 5 if 4 < c and c < 128 - - # negative, but checked known it's instead in 2's compliment - return c - 251 if 252 > c and c > 127 - - # otherwise c (now in the 1 to 4 range) indicates how many - # bytes to read to construct the value. - # - # Because we're operating on a small number of possible values, - # it's cleaner to just unroll the calculate of each - - case c - when 1 - consume_byte - when 2 - consume_byte | (consume_byte << 8) - when 3 - consume_byte | (consume_byte << 8) | (consume_byte << 16) - when 4 - consume_byte | (consume_byte << 8) | (consume_byte << 16) | - (consume_byte << 24) - - when 255 # -1 - consume_byte - 256 - when 254 # -2 - (consume_byte | (consume_byte << 8)) - 65536 - when 253 # -3 - (consume_byte | - (consume_byte << 8) | - (consume_byte << 16)) - 16777216 # 2 ** 24 - when 252 # -4 - (consume_byte | - (consume_byte << 8) | - (consume_byte << 16) | - (consume_byte << 24)) - 4294967296 - else - raise "Invalid integer size: #{c}" - end - end - - ## - # Creates an Object - - def construct_object - ref = store_unique_object Object.allocate - - [ref, get_symbol, construct_instance_variables] - end - - ## - # Creates a Regexp - - def construct_regexp - ref =store_unique_object Object.allocate - - [ref, get_byte_sequence, consume_byte] - end - - ## - # Creates a String - - def construct_string - ref = store_unique_object Object.allocate - - [ref, get_byte_sequence] - end - - ## - # Creates a Struct - - def construct_struct - symbols = [] - values = [] - - obj_ref = store_unique_object Object.allocate - - obj = [obj_ref, get_symbol] - - members = construct_integer - obj << members - - members.times do - obj << get_symbol - obj << construct - end - - obj - end - - ## - # Creates a Symbol - - def construct_symbol - sym = get_byte_sequence - - ref = store_unique_object sym.to_sym - - [ref, sym] - end - - ## - # Creates an object saved by _dump - - def construct_user_defined - name = get_symbol - - data = get_byte_sequence - - ref = store_unique_object Object.allocate - - [ref, name, data] - end - - ## - # Creates an object saved by marshal_dump - - def construct_user_marshal - name = get_symbol - - obj = Object.allocate - - obj_ref = store_unique_object obj - - [obj_ref, name, construct] - end - - ## - # Consumes +bytes+ from the marshal stream - - def consume bytes - raise ArgumentError, "marshal data too short" if @consumed > @stream.size - data = @stream[@consumed, bytes] - @consumed += bytes - data - end - - ## - # Consumes +count+ bytes from the marshal stream as an Array of bytes - - def consume_bytes count - consume(count).bytes.to_a - end - - ## - # Consumes one byte from the marshal stream - - def consume_byte - raise ArgumentError, "marshal data too short" if - @consumed > @byte_array.size - - data = @byte_array[@consumed] - @consumed += 1 - - data - end - - ## - # Consumes one byte from the marshal stream and returns a character - - def consume_character - consume_byte.chr - end - - ## - # Consumes a sequence of bytes from the marshal stream based on the next - # integer - - def get_byte_sequence - size = construct_integer - consume size - end - - ## - # Constructs a Symbol from a TYPE_SYMBOL or TYPE_SYMLINK - - def get_symbol - type = consume_character - - case type - when TYPE_SYMBOL then - [:symbol, *construct_symbol] - when TYPE_SYMLINK then - num = construct_integer - [:symbol_link, num] - else - raise ArgumentError, "expected TYPE_SYMBOL or TYPE_SYMLINK, got #{type.inspect}" - end - end - - ## - # Stores a reference to +obj+ - - def store_unique_object obj - if Symbol === obj then - add_symlink obj - else - add_object obj - end - end - end + +require 'marshal/structure/allocation_counter' +require 'marshal/structure/parser' +require 'marshal/structure/tokenizer'