lib/perobs/PersistentObjectCache.rb in perobs-4.0.0 vs lib/perobs/PersistentObjectCache.rb in perobs-4.1.0
- old
+ new
@@ -29,123 +29,109 @@
module PEROBS
class PersistentObjectCache
# This cache class manages the presence of objects that primarily live in
# a backing store but temporarily exist in memory as well. To work with
# these objects, direct references must be only very short lived. Indirect
# references can be done via a unique ID that the object must provide. Due
# to the indirect references the Ruby garbage collector can collect these
- # objects and the cache is notified via a finalizer that the objects must
- # provide. The finalize must call the _collect() method. To reduce the
- # read and write latencies of the backing store this class keeps a subset
- # of the object in memory which prevents them from being collected. All
- # references to the objects must be resolved via the get() method to
- # prevent duplicate instances in memory of the same object.
- # @param size [Integer] Maximum number of objects to be cached at a time
+ # objects. To reduce the read and write latencies of the backing store
+ # this class keeps a subset of the objects in memory which prevents them
+ # from being collected. All references to the objects must be resolved via
+ # the get() method to prevent duplicate instances in memory of the same
+ # in-store object. The cache uses a least-recently-used (LRU) scheme to
+ # cache objects.
+ # @param size [Integer] Minimum number of objects to be cached at a time
+ # @param flush_delay [Integer] Determines how often non-forced flushes are
+ # ignored in a row before the flush is really done.
# @param klass [Class] The class of the objects to be cached. Objects must
# provide a uid() method that returns a unique ID for every object.
# @param collection [] The object collection the objects belong to. It
# must provide a ::load method.
- def initialize(size, klass, collection)
+ def initialize(size, flush_delay, klass, collection)
@size = size
@klass = klass
@collection = collection
- @flush_counter = FLUSH_WATERMARK
+ @flush_delay = @flush_counter = flush_delay
+ @flush_times = 0
# Insert an object into the cache.
# @param object [Object] Object to cache
# @param modified [Boolean] True if the object was modified, false otherwise
def insert(object, modified = true)
- # Store the object via its Ruby object ID instead of a direct reference.
- # This allows the object to be collected by the garbage collector.
- @in_memory_objects[object.uid] = object.object_id
+ unless object.is_a?(@klass)
+ raise ArgumentError, "You can insert only #{@klass} objects in this " +
+ "cache. You have tried to insert a #{object.class} instead."
+ end
- @lines[object.uid % @size].insert(object, modified)
+ if modified
+ @modified_entries[object.uid] = object
+ else
+ index = object.uid % @size
+ @unmodified_entries[index] = object
+ end
+ nil
# Retrieve a object reference from the cache.
# @param uid [Integer] uid of the object to retrieve.
- def get(uid)
- if (entry = @lines[uid % @size].get(uid))
- return entry.obj
+ # @param ref [Object] optional reference to be used by the load method
+ def get(uid, ref = nil)
+ # First check if it's a modified object.
+ if (object = @modified_entries[uid])
+ return object
- if (ruby_object_id = @in_memory_objects[uid])
- # We have the object in memory so we can just return it.
- begin
- object = ObjectSpace._id2ref(ruby_object_id)
- # Let's make sure the object is really the object we are looking
- # for. The GC might have recycled it already and the Ruby object ID
- # could now be used for another object.
- if object.is_a?(@klass) && object.uid == uid
- # Let's put the object in the cache. We might need it soon again.
- insert(object, false)
- return object
- end
- rescue RangeError
- # Due to a race condition the object can still be in the
- # @in_memory_objects list but has been collected already by the Ruby
- # GC. In that case we need to load it again. In this case the
- # _collect() call will happen much later, potentially after we have
- # registered a new object with the same ID.
- @in_memory_objects.delete(uid)
- end
+ # Then check the unmodified object list.
+ if (object = @unmodified_entries[uid % @size]) && object.uid == uid
+ return object
- @klass::load(@collection, uid)
+ # If we don't have it in memory we need to load it.
+ @klass::load(@collection, uid, ref)
# Remove a object from the cache.
# @param uid [Integer] unique ID of object to remove.
def delete(uid)
- # The object is likely still in memory, but we really don't want to
- # access it anymore.
- @in_memory_objects.delete(uid)
+ @modified_entries.delete(uid)
- @lines[uid % @size].delete(uid)
- end
- # Remove a object from the in-memory list. This is an internal method
- # and should never be called from user code. It will be called from a
- # finalizer, so many restrictions apply!
- # @param uid [Integer] Object address of the object to remove from
- # the list
- # @param ruby_object_id [Integer] The Ruby object ID of the collected
- # object
- def _collect(address, ruby_object_id)
- if @in_memory_objects[id] == ruby_object_id
- @in_memory_objects.delete(address)
+ index = uid % @size
+ if (object = @unmodified_entries[index]) && object.uid == uid
+ @unmodified_entries[index] = nil
# Write all excess modified objects into the backing store. If now is true
# all modified objects will be written.
# @param now [Boolean]
def flush(now = false)
if now || (@flush_counter -= 1) <= 0
- @lines.each { |line| line.flush(now) }
- @flush_counter = FLUSH_WATERMARK
+ @modified_entries.each do |id, object|
+ end
+ @modified_entries =
+ @flush_counter = @flush_delay
+ @flush_times += 1
# Remove all entries from the cache.
def clear
- # A hash that stores all objects by the Ruby object ID that are
- # currently in memory. Objects are added via insert() and will be
- # removed via delete() or _collect() called from a Object
- # finalizer. It only stores the object Ruby object ID hashed by their
- # address in the file. This enables them from being collected by the
- # Ruby garbage collector.
- @in_memory_objects = {}
- # This is the actual cache. The Array stores objects as Entry objects to
- # also store the modified/not-modified state.
- @lines = { |i| }
+ # This Array stores all unmodified entries. It has a fixed size and uses
+ # a % operation to compute the index from the object ID.
+ @unmodified_entries =
+ # This Hash stores all modified entries. It can grow and shrink as
+ # needed. A flush operation writes all modified objects into the backing
+ # store.
+ @modified_entries =