module Lite3 # Lite3::DBM encapsulates a single table in a single SQLite3 # database file and lets you access it as easily as a Hash. # Multiple instances may be opened on different tables in the same # database. # # Note that instances do not explicitly own their database # connection; instead, they are managed internally and shared across # `DBM` instances. class Lite3::DBM include Enumerable include ErrUtil PREFIX = "dbmlite3_tbl_" META = :dbmlite3_meta private_constant(:PREFIX, :META) # # Construction and setup # # Create a new `Lite3::DBM` object that opens database file # `filename` and performs subsequent operations on `table`. Both # the database file and the table will be created if they do not # yet exist. The table name must be a valid name identifier # (i.e. matches `/^[a-zA-Z_]\w*$/`). # # The optional third argument `serializer` is used to choose the # serialization method for converting Ruby values into storable # strings. There are three options: # # * `:yaml` uses the `Psych` module. # * `:marshal` uses the `Marshal` module. # * `:string` simply uses the default `to_s` method, just like the # stock `DBM`. # # Each of these will have their pros and cons. The default is # `:yaml` because that is the most portable. `:marshal` tends to # be faster but is incompatible across minor Ruby versions. # # Your serializer choice is registered in a metadata table when # `tablename` is created in the SQLite3 file. Afterward, it is an # error to attempt to open the table with a different serializer # and will result in a Lite3::Error exception. # # ## Caveats: # # 1. Both YAML and Marshal serialization have the usual security # issues as described in the documentation for `Marshal` and # `Psych`. If you are going to let an untrusted entity modify # the database, you should not use these methods and instead # stick to string conversion. # # 2. `DBM` does not check your Marshal version; a mismatch will # fail dramatically at exactly the wrong time. # # 3. `filename` is normalized using `File.realpath` and this path # is used to look up an existing database handle if one exists. # Using hard links or other trickery to defeat this mechanism # and open a second handle to the same database is **probably** # still harmless but is not something this API guarantees will # work correctly. # def initialize(filename, tablename, serializer = :yaml) @filename = filename @tablename = tablename @valenc, @valdec = value_encoders(serializer) @handle = HandlePool.get(filename) @handle.addref(self) check("Malformed table name '#{tablename}'; must be a valid identifer") { tablename =~ /^[a-zA-Z_]\w*$/ } transaction { register_serialization_scheme(serializer) @handle.create_key_value_table( actual_tbl() ) } rescue Error => e self.close if @handle raise e end # Identical to `initialize` except that if a block is provided, it # is evaluated with a new Lite3::DBM which is then closed afterward. # This is analagous to `File.open`. def self.open(filename, tablename, serializer = :yaml, &block) instance = self.new(filename, tablename, serializer) return instance unless block begin return block.call(instance) ensure instance.close end end private # Return encode and decode procs for the requested serialization # scheme. def value_encoders(serializer) case serializer when :yaml enc = proc{ |val| Psych.dump(val) } # Psych (and module YAML) has gradually moved from defaulting # from unsafe loading to safe loading. This is a pain for us # because old versions don't provide `unsafe_load` as an alias # to `load` and new versions default `load` to `safe_load`. # So we have to do this thing to pick `unsafe_load` if it's # available and `load` otherwise. if Psych.respond_to? :unsafe_load dec = proc{ |val| Psych.unsafe_load(val) } else dec = proc{ |val| Psych.load(val) } end when :marshal enc = proc { |val| Marshal.dump(val) } dec = proc { |val| Marshal.load(val) } when :string enc = proc { |val| val.to_s } dec = proc { |val| val.to_s } # sqlite preserves some types else raise InternalError.new("Invalid serializer selected: '#{serializer}'") end return enc, dec end # Add the serialization scheme for this table to META def register_serialization_scheme(req_ser) @handle.create_key_value_table(META) transaction { srlzr = @handle.lookup(META, @tablename) if srlzr check("Serializer mismatch for '#{@tablename}; specified " + "#{req_ser} but table uses #{srlzr}") { req_ser.to_s == srlzr } else @handle.upsert(META, @tablename, req_ser.to_s) end } end # # Helpers # # Return the actual table name we are using. def actual_tbl() return "#{PREFIX}#{@tablename}".intern; end public def to_s openstr = closed? ? 'CLOSED' : 'OPEN' return "<#{self.class}:0x#{object_id.to_s(16)} file='#{@filename}'" + " tablename='#{@tablename}' #{openstr}>" end alias inspect to_s # Disassociate `self` from the underlying database. If this is # the last `DBM` using it, the handle will (probably) also be # closed. # # Subsequent attempts to use `self` will fail with an error; the # only exception to this is the method `closed?` which will return # true. def close @handle.delref(self) @handle = ClosedHandle.new(@filename, @tablename) end # Test if this object has been closed. This is safe to call on a # closed `DBM`. def closed? return @handle.is_a? ClosedHandle end # # Transactions # # Begins a transaction, evaluates the given block and then ends # the transaction. If no error occurred (i.e. an exception was # thrown), the transaction is committed; otherwise, it is rolled # back. Returns the block's result. # # It is safe to call `DBM.transaction` within another # `DBM.transaction` block's call chain because `DBM` will not # start a new transaction on a database handle that already has # one in progress. (It may be possible to trick `DBM` into trying # via fibers or other flow control trickery; don't do that.) # # Note that it's probably not a good idea to assume too much about # the precise semantics; I can't guarantee that the underlying # library(es) won't change or be replaced outright. # # That being said, at present, this is simply a wrapper around # `Sequel::Database.transaction` with the default options and so # is subject to the quirks therein. In version 1.0.0, # transactions were always executed in `:deferred` mode via the # `sqlite3` gem. # # @return [obj] Returns the block's result. # # @yield [db] The block takes a reference to the receiver as an # argument. # def transaction(&block) return @handle.transaction { block.call(self) } end # Test if there is currently a transaction in progress def transaction_active? return @handle.transaction_active? end # # Basic hash-like access # # Store `value` at `key` in the database. # # `key` **must** be a String or a Symbol; Symbols are # transparently converted to Strings. # # `value` **must** be convertable to string by whichever # serialization method you have chosen. def []=(key, value) key = check_key(key) valstr = @valenc.call(value) @handle.upsert(actual_tbl(), key, valstr) return value end alias store :'[]=' # Retrieve the value associated with `key` from the database or # nil if it is not present. def [](key) return fetch(key, nil) end # Retrieve the value associated with `key`. # # `key` **must** be a String or a Symbol; Symbols are # transparently converted to Strings. # # If it is not present and a block is given, evaluate the block # with the key as its argument and return that. # # If no block was given either but one extra parameter was given, # that value is returned instead. # # Finally, if none of these was given, it throws an `IndexError` # exception. # # It is an error if `fetch` is called with more than two arguments. # # @yield [key] The fallback block. def fetch(key, *args, &default_block) # Ensure there are no extra arguments nargs = args.size + 1 check("Too many arguments for 'fetch'; expected 1 or 2; got #{nargs}") { nargs <= 2 } # Retrieve the value key = check_key(key) # Return the value if found. (nil will always mean the entry # isn't present because values are encoded in strings.) value = @handle.lookup(actual_tbl(), key) return @valdec.call(value) if value # Not found. If a block was given, evaluate it and return its # result. return default_block.call(key) if default_block # Next, see if we have a default value we can return return args[0] if args.size > 0 # And if all else fails, raise an IndexError. raise IndexError.new("key '#{key}' not found.") end # Return a new `Array` containing the values corresponding to the # given keys. def values_at(*keys) return keys.map{|k| self[k]} end # Return an `Array` of all of the keys in the table. # # **WARNING:** since this list is being read from disk, it is possible # that the result could exceed available memory. def keys keys = [] fast_each { |k, v| keys.push k } return keys end # Return an array of all values in the table. # # **WARNING:** since this list is being read from disk, it is possible # that the result could exceed available memory. def values values = [] fast_each { |k, v| values.push v } return values end # Return `true` if the table contains `key`; otherwise, return # `false`. def has_key?(key) return false unless key.class == String || key.class == Symbol fetch( key ) { return false } return true end alias include? has_key? alias member? has_key? alias key? has_key? # Delete all entries from the table. def clear @handle.clear_table(actual_tbl()) end # Behaves like 'each' with a block--that is, call it for each # key/value pair--but (probably) executes faster. # # The downside is that there is no guarantee of reentrance or # safety. The block *MUST NOT* access the database in any way. # In addition, no guarantee is made about element order. # # (You might be able to infer some ways to safely bend the rules # by seeing what the underlying database libraries allow, but your # code won't be future-proof if you do that.) # # @yield [key, value] The block to evaluate def fast_each(&block) transaction { @handle.tbl_each_fast( actual_tbl() ) { |row| block.call(row[:key], @valdec.call(row[:value])); } } end # Calls the given block with each key-value pair in the usual # order, then return self. The entire call takes place in its own # transaction. # # It is safe to modify `self` inside the block. # # If no block is given, returns an Enumerator instead. The # Enumerator does *not* start a transaction but individual # accesses of it (e.g. calling `next`) each take place in their # own transaction. # # @yield [key, value] The block to evaluate def each(&block) return self.to_enum(:nt_each) unless block transaction { nt_each(&block) } return self end alias each_pair each private # Back-end for `each`; does not explicitly start a transaction. def nt_each(&block) @handle.tbl_each(actual_tbl()) do |k, v| block.call(k, @valdec.call(v)) end return self end public # Calls the given block with each key; returns self. Exactly like # `each` except for the block argument. # # @yield [key] The block to evaluate def each_key(&block) return Enumerator.new{|y| nt_each{ |k,v| y << k } } unless block return each{ |k,v| block.call(k) } end # Calls the given block with each value; returns self. Exactly like # `each` except for the block argument. # # @yield [value] The block to evaluate def each_value(&block) return Enumerator.new{|y| nt_each{ |k,v| y << v } } unless block return each{ |k,v| block.call(v) } end # Updates the database with multiple values from the specified # object. Takes any object which implements the each_pair method, # including `Hash` and `DBM` objects. def update(hash) transaction { hash.each{|k, v| self[k] = v } } end # Remove `key` and its associated value from `self`. If `key` is # not present, does nothing. def delete(key) @handle.delete(actual_tbl(), key) end # Evaluate the block on each key-value pair in `self` end delete # each entry for which the block returns true. # # @yield [value] The block to evaluate def delete_if(&block) transaction { self.each{ |k, v| block.call(k,v) and delete(k) } } end alias reject! delete_if # Return the number of entries (key-value pairs) in `self`. def size return @handle.get_size(actual_tbl()) end alias length size # Test if `self` is empty. def empty? return size == 0 end # # Conversion to internal types # # Copies the table into a `Hash` and returns it. # # **WARNING:** it is possible for tables to be significantly larger # than available RAM; in that case, this will likely crash your # program. def to_hash result = {} fast_each{|k,v| result[k] = v} return result end # Returns an `Array` of 2-element `Array` objects each containing a # key-value pair from `self`. # # **WARNING:** it is possible for tables to be significantly larger # than available RAM; in that case, this will likely crash your # program. def to_a result = [] fast_each { |k,v| result.push [k,v] } return result end # # Hacky odds and ends # # Test if `val` is one of the values in this table. # # Potentially very slow, especially on large tables. def has_value?(val) fast_each{|k,v| return true if v == val } return false end alias value? has_value? # Return a `Hash` whose keys are the table's values and whose values # are the table's keys. # # **WARNING:** it is possible for tables to be significantly larger # than available RAM; in that case, this will likely crash your # program. def invert result = {} fast_each{|k,v| result[v] = k} return result end # Remove the first key/value pair from `self` and return it. "First" # is defined by `self`'s row order, which is the order of insertion # as determined by SQLite3. def shift transaction { return nil if empty? key, value = self.each.first delete(key) return [key, value] } end private # Attempt to turn 'key' to a valid key and raise an exception if # that isn't possible. def check_key(key) key = key.to_s if key.class == Symbol raise TypeError.new("Key '#{key}' is not a string or symbol!") unless key.class == String return key end end end