lib/tracksperanto/accumulator.rb in tracksperanto-2.1.1 vs lib/tracksperanto/accumulator.rb in tracksperanto-2.2.0

- old
+ new

@@ -1,42 +1,68 @@ # An accumulator buffer for Ruby objects. Use it to sequentially store a shitload -# of objects on disk and then retreive them one by one. Make sure to call #close! when done with it to -# discard the stored blob. This object is intended to be used as a Tracksperanto::Import::Base#receiver +# of objects on disk and then retreive them one by one. Make sure to call clear when done +# with it to discard the stored blob. +# +# This object is intended to be used as a Tracksperanto::Import::Base#receiver, but can be used +# in general like a disk-based object buffer. +# +# a = Tracksperanto::Accumulator.new +# parse_big_file do | one_node | +# a.push(one_node) +# end +# +# a.size #=> 30932 +# a.each do | node_read_from_disk | +# # do something with node +# end +# +# a.clear # ensure that the file is deleted class Tracksperanto::Accumulator + include Enumerable - # Stores the number of objects stored so far - attr_reader :num_objects - alias_method :length, :num_objects + # Returns the number of objects stored so far + attr_reader :size def initialize @store = Tracksperanto::BufferIO.new - @num_objects = 0 + @size = 0 + @byte_size = 0 + super end # Store an object def push(object_to_store) - @num_objects += 1 - d = Marshal.dump(object_to_store) - [d.size, "\t", d, "\n"].map(&@store.method(:write)) + @store.seek(@byte_size) + blob = marshal_object(object_to_store) + @store.write(blob) + @size += 1 + @byte_size = @byte_size + blob.size + object_to_store end - # Retreive each stored object in succession and unlink the buffer - def each_object_with_index - begin - @store.rewind - @num_objects.times { |i| yield(recover_object, i - 1) } - ensure - @store.close! - end + # Retreive each stored object in succession. All other Enumerable + # methods are also available (but be careful with Enumerable#map) + def each + @store.rewind + @size.times { yield(recover_object) } end - + # Calls close! on the datastore and deletes the objects in it + def clear + @store.close! + @size = 0 + end + private + def marshal_object(object_to_store) + d = Marshal.dump(object_to_store) + blob = [d.size, "\t", d, "\n"].join + end + def recover_object # Up to the tab is the amount of bytes to read demarshal_bytes = @store.gets("\t").strip.to_i - # Then read the bytes and unmarshal it Marshal.load(@store.read(demarshal_bytes)) end end