lib/tracksperanto/accumulator.rb in tracksperanto-2.1.1 vs lib/tracksperanto/accumulator.rb in tracksperanto-2.2.0
- old
+ new
@@ -1,42 +1,68 @@
# An accumulator buffer for Ruby objects. Use it to sequentially store a shitload
-# of objects on disk and then retreive them one by one. Make sure to call #close! when done with it to
-# discard the stored blob. This object is intended to be used as a Tracksperanto::Import::Base#receiver
+# of objects on disk and then retreive them one by one. Make sure to call clear when done
+# with it to discard the stored blob.
+#
+# This object is intended to be used as a Tracksperanto::Import::Base#receiver, but can be used
+# in general like a disk-based object buffer.
+#
+# a = Tracksperanto::Accumulator.new
+# parse_big_file do | one_node |
+# a.push(one_node)
+# end
+#
+# a.size #=> 30932
+# a.each do | node_read_from_disk |
+# # do something with node
+# end
+#
+# a.clear # ensure that the file is deleted
class Tracksperanto::Accumulator
+ include Enumerable
- # Stores the number of objects stored so far
- attr_reader :num_objects
- alias_method :length, :num_objects
+ # Returns the number of objects stored so far
+ attr_reader :size
def initialize
@store = Tracksperanto::BufferIO.new
- @num_objects = 0
+ @size = 0
+ @byte_size = 0
+
super
end
# Store an object
def push(object_to_store)
- @num_objects += 1
- d = Marshal.dump(object_to_store)
- [d.size, "\t", d, "\n"].map(&@store.method(:write))
+ @store.seek(@byte_size)
+ blob = marshal_object(object_to_store)
+ @store.write(blob)
+ @size += 1
+ @byte_size = @byte_size + blob.size
+ object_to_store
end
- # Retreive each stored object in succession and unlink the buffer
- def each_object_with_index
- begin
- @store.rewind
- @num_objects.times { |i| yield(recover_object, i - 1) }
- ensure
- @store.close!
- end
+ # Retreive each stored object in succession. All other Enumerable
+ # methods are also available (but be careful with Enumerable#map)
+ def each
+ @store.rewind
+ @size.times { yield(recover_object) }
end
-
+ # Calls close! on the datastore and deletes the objects in it
+ def clear
+ @store.close!
+ @size = 0
+ end
+
private
+ def marshal_object(object_to_store)
+ d = Marshal.dump(object_to_store)
+ blob = [d.size, "\t", d, "\n"].join
+ end
+
def recover_object
# Up to the tab is the amount of bytes to read
demarshal_bytes = @store.gets("\t").strip.to_i
- # Then read the bytes and unmarshal it
Marshal.load(@store.read(demarshal_bytes))
end
end