# encoding: UTF-8 # # = BTreeDB.rb -- Persistent Ruby Object Store # # Copyright (c) 2015, 2016 by Chris Schlaeger # # MIT License # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the # "Software"), to deal in the Software without restriction, including # without limitation the rights to use, copy, modify, merge, publish, # distribute, sublicense, and/or sell copies of the Software, and to # permit persons to whom the Software is furnished to do so, subject to # the following conditions: # # The above copyright notice and this permission notice shall be # included in all copies or substantial portions of the Software. # # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, # EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF # MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND # NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE # LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION # OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION # WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. require 'fileutils' require 'perobs/Log' require 'perobs/RobustFile' require 'perobs/DataBase' require 'perobs/BTreeBlob' module PEROBS # This class implements a BTree database using filesystem directories as # nodes and blob files as leafs. The BTree grows with the number of stored # entries. Each leaf node blob can hold a fixed number of entries. If more # entries need to be stored, the blob is replaced by a node with multiple # new leafs that store the entries of the previous node. The leafs are # implemented by the BTreeBlob class. class BTreeDB < DataBase attr_reader :max_blob_size # Create a new BTreeDB object. # @param db_name [String] name of the DB directory # @param options [Hash] options to customize the behavior. Currently only # the following options are supported: # :serializer : Can be :marshal, :json, :yaml # :dir_bits : The number of bits to use for the BTree nodes. # The value must be between 4 and 14. The larger # the number the more back-end directories are # being used. The default is 12 which results in # 4096 directories per node. # :max_blob_size : The maximum number of entries in the BTree leaf # nodes. The insert/find/delete time grows # linearly with the size. def initialize(db_name, options = {}) super(options[:serializer] || :json) @db_dir = db_name # Create the database directory if it doesn't exist yet. ensure_dir_exists(@db_dir) # Read the existing DB config. @config = get_hash('config') check_option('serializer') # Check and set @dir_bits, the number of bits used for each tree level. @dir_bits = options[:dir_bits] || 12 if @dir_bits < 4 || @dir_bits > 14 PEROBS.log.fatal "dir_bits option (#{@dir_bits}) must be between 4 " + "and 12" end check_option('dir_bits') @max_blob_size = options[:max_blob_size] || 32 if @max_blob_size < 4 || @max_blob_size > 128 PEROBS.log.fatal "max_blob_size option (#{@max_blob_size}) must be " + "between 4 and 128" end check_option('max_blob_size') put_hash('config', @config) # This format string is used to create the directory name. @dir_format_string = "%0#{(@dir_bits / 4) + (@dir_bits % 4 == 0 ? 0 : 1)}X" # Bit mask to extract the dir_bits LSBs. @dir_mask = 2 ** @dir_bits - 1 end # Delete the entire database. The database is no longer usable after this # method was called. def delete_database FileUtils.rm_rf(@db_dir) end def BTreeDB::delete_db(db_name) FileUtils.rm_rf(db_name) end # Return true if the object with given ID exists # @param id [Integer] def include?(id) !(blob = find_blob(id)).nil? && !blob.find(id).nil? end # Store a simple Hash as a JSON encoded file into the DB directory. # @param name [String] Name of the hash. Will be used as file name. # @param hash [Hash] A Hash that maps String objects to strings or # numbers. def put_hash(name, hash) file_name = File.join(@db_dir, name + '.json') begin RobustFile.write(file_name, hash.to_json) rescue IOError => e PEROBS.log.fatal "Cannot write hash file '#{file_name}': #{e.message}" end end # Load the Hash with the given name. # @param name [String] Name of the hash. # @return [Hash] A Hash that maps String objects to strings or numbers. def get_hash(name) file_name = File.join(@db_dir, name + '.json') return ::Hash.new unless File.exist?(file_name) begin json = File.read(file_name) rescue => e PEROBS.log.fatal "Cannot read hash file '#{file_name}': #{e.message}" end JSON.parse(json, :create_additions => true) end # Store the given object into the cluster files. # @param obj [Hash] Object as defined by PEROBS::ObjectBase def put_object(obj, id) find_blob(id, true).write_object(id, serialize(obj)) end # Load the given object from the filesystem. # @param id [Integer] object ID # @return [Hash] Object as defined by PEROBS::ObjectBase or nil if ID does # not exist def get_object(id) return nil unless (blob = find_blob(id)) && (obj = blob.read_object(id)) deserialize(obj) end # This method must be called to initiate the marking process. def clear_marks each_blob { |blob| blob.clear_marks } end # Permanently delete all objects that have not been marked. Those are # orphaned and are no longer referenced by any actively used object. # @return [Array] List of IDs that have been removed from the DB. def delete_unmarked_objects deleted_ids = [] each_blob { |blob| deleted_ids += blob.delete_unmarked_entries } deleted_ids end # Mark an object. # @param id [Integer] ID of the object to mark def mark(id) (blob = find_blob(id)) && blob.mark(id) end # Check if the object is marked. # @param id [Integer] ID of the object to check # @param ignore_errors [Boolean] If set to true no errors will be raised # for non-existing objects. def is_marked?(id, ignore_errors = false) (blob = find_blob(id)) && blob.is_marked?(id, ignore_errors) end # Basic consistency check. # @param repair [TrueClass/FalseClass] True if found errors should be # repaired. def check_db(repair = false) each_blob { |blob| blob.check(repair) } end # Check if the stored object is syntactically correct. # @param id [Integer] Object ID # @param repair [TrueClass/FalseClass] True if an repair attempt should be # made. # @return [TrueClass/FalseClass] True if the object is OK, otherwise # false. def check(id, repair) begin get_object(id) rescue => e PEROBS.log.error "Cannot read object with ID #{id}: #{e.message}" return false end true end # Store the given serialized object into the cluster files. This method is # for internal use only! # @param raw [String] Serialized Object as defined by PEROBS::ObjectBase # @param id [Integer] Object ID def put_raw_object(raw, id) find_blob(id, true).write_object(id, raw) end private def find_blob(id, create_missing_blob = false, dir_name = @db_dir) dir_bits = id & @dir_mask sub_dir_name = File.join(dir_name, @dir_format_string % dir_bits) if Dir.exist?(sub_dir_name) if File.exist?(File.join(sub_dir_name, 'index')) # The directory is a blob directory and not a BTree node dir. return BTreeBlob.new(sub_dir_name, self) end else Dir.glob(File.join(dir_name, '*.index')).each do |fqfn| # Extract the 01-part of the filename lsb_string = File.basename(fqfn)[0..-6] # Convert the lsb_string into a Integer lsb = Integer('0b' + lsb_string) # Bit mask to match the LSBs mask = (2 ** lsb_string.length) - 1 if (id & mask) == lsb return TreeBlob.new(sub_dir_name, lsb_string, self) end end if create_missing_blob # Create the new blob directory. Dir.mkdir(dir_name) # And initialize the blob DB. return BTreeBlob.new(dir_name, self) else return nil end end # Discard the least significant @dir_bits bits and start over again # with the directory that matches the @dir_bits LSBs of the new ID. id = id >> @dir_bits end def each_blob(&block) each_blob_r(@db_dir, &block) end def each_blob_r(dir, &block) Dir.glob(File.join(dir, '*')) do |dir_name| if is_blob_dir?(dir_name) block.call(BTreeBlob.new(dir_name, self)) else each_blob_r(dir_name, &block) end end end def is_blob_dir?(dir_name) # A blob directory contains an 'index' and 'data' file. This is in # contrast to BTree node directories that only contain other # directories. index_file = File.join(dir_name, 'index') File.exist?(index_file) end end end