lib/perobs/BTree.rb in perobs-4.0.0 vs lib/perobs/BTree.rb in perobs-4.1.0

- old
+ new

@@ -1,10 +1,10 @@ # encoding: UTF-8 # # = BTreeNode.rb -- Persistent Ruby Object Store # -# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org> +# Copyright (c) 2016, 2017, 2018 by Chris Schlaeger <chris@taskjuggler.org> # # MIT License # # Permission is hereby granted, free of charge, to any person obtaining # a copy of this software and associated documentation files (the @@ -30,28 +30,31 @@ require 'perobs/PersistentObjectCache' require 'perobs/BTreeNode' module PEROBS - # This BTree class is very similar to a classic BTree implementation. It + # This BTree class is very similar to a classic B+Tree implementation. It # manages a tree that is always balanced. The BTree is stored in the # specified directory and partially kept in memory to speed up operations. # The order of the tree specifies how many keys each node will be able to # hold. Leaf nodes will have a value associated with each key. Branch nodes # have N + 1 references to child nodes instead. class BTree - attr_reader :order, :nodes, :node_cache + attr_reader :order, :nodes, :node_cache, :first_leaf, :last_leaf, :size # Create a new BTree object. # @param dir [String] Directory to store the tree file # @param name [String] Base name of the BTree related files in 'dir' # @param order [Integer] The maximum number of keys per node. This number # must be odd and larger than 2 and smaller than 2**16 - 1. - def initialize(dir, name, order) + # @param progressmeter [ProgressMeter] reference to a ProgressMeter object + def initialize(dir, name, order, progressmeter) @dir = dir @name = name + @progressmeter = progressmeter + unless order > 2 PEROBS.log.fatal "BTree order must be larger than 2, not #{order}" end unless order % 2 == 1 PEROBS.log.fatal "BTree order must be an uneven number, not #{order}" @@ -60,13 +63,18 @@ PEROBS.log.fatal "BTree order must be smaller than #{2**16 - 1}" end @order = order # This EquiBlobsFile contains the nodes of the BTree. - @nodes = EquiBlobsFile.new(@dir, @name, + @nodes = EquiBlobsFile.new(@dir, @name, @progressmeter, BTreeNode::node_bytes(@order)) - @node_cache = PersistentObjectCache.new(512, BTreeNode, self) + @nodes.register_custom_data('first_leaf') + @nodes.register_custom_data('last_leaf') + @nodes.register_custom_data('btree_size') + @node_cache = PersistentObjectCache.new(16384, 5000, BTreeNode, self) + @root = @first_leaf = @last_leaf = nil + @size = 0 # This BTree implementation uses a write cache to improve write # performance of multiple successive read/write operations. This also # means that data may not be written on the backing store until the # sync() or close() methods have been called. A bug in the program or a @@ -87,64 +95,122 @@ PEROBS.log.fatal "Index file #{@nodes.file_name} does not exist" end @node_cache.clear @nodes.open - node = @nodes.total_entries == 0 ? - BTreeNode::create(self) : - BTreeNode::load(self, @nodes.first_entry) + + if @nodes.total_entries == 0 + # We've created a new nodes file + node = BTreeNode::create(self) + else + # We are loading an existing tree. + node = BTreeNode::load_and_link(self, @nodes.first_entry) + @first_leaf = BTreeNode::load_and_link( + self, @nodes.get_custom_data('first_leaf')) + @last_leaf = BTreeNode::load_and_link( + self, @nodes.get_custom_data('last_leaf')) + end set_root(node) + + # Get the total number of entries that are stored in the tree. + @size = @nodes.get_custom_data('btree_size') end # Close the tree file. def close + + def val_perc(value, total) + "#{value} (#{(value.to_f / total*100.0).to_i}%)" + end + sync @nodes.close @root = nil end + # @return true if file is currently open + def is_open? + !@root.nil? + end + # Clear all pools and forget any registered spaces. def clear @node_cache.clear @nodes.clear + @size = 0 set_root(BTreeNode::create(self)) end # Erase the backing store of the BTree. This method should only be called # when not having the BTree open. And it obviously and permanently erases # all stored data from the BTree. def erase @nodes.erase + @size = 0 @root = nil @dirty_flag.forced_unlock end # Flush all pending modifications into the tree file. def sync @node_cache.flush(true) + @nodes.set_custom_data('btree_size', @size) @nodes.sync @dirty_flag.unlock if @dirty_flag.is_locked? end # Check if the tree file contains any errors. # @return [Boolean] true if no erros were found, false otherwise def check(&block) - @root.check(&block) + sync + return false unless @nodes.check + + entries = 0 + res = true + @progressmeter.start('Checking index structure', @size) do |pm| + res = @root.check do |k, v| + pm.update(entries += 1) + block_given? ? yield(k, v) : true + end + end + + unless entries == @size + PEROBS.log.error "The BTree size (#{@size}) and the number of " + + "found entries (#{entries}) don't match" + return false + end + + res end # Register a new node as root node of the tree. + # @param node [BTreeNode] def set_root(node) @root = node @nodes.first_entry = node.node_address end + # Set the address of the first leaf node. + # @param node [BTreeNode] + def set_first_leaf(node) + @first_leaf = node + @nodes.set_custom_data('first_leaf', node.node_address) + end + + # Set the address of the last leaf node. + # @param node [BTreeNode] + def set_last_leaf(node) + @last_leaf = node + @nodes.set_custom_data('last_leaf', node.node_address) + end + # Insert a new value into the tree using the key as a unique index. If the # key already exists the old value will be overwritten. # @param key [Integer] Unique key # @param value [Integer] value def insert(key, value) - @root.insert(key, value) + @size += 1 if @root.insert(key, value) @node_cache.flush end # Retrieve the value associated with the given key. If no entry was found, # return nil. @@ -155,11 +221,11 @@ end # Find and remove the value associated with the given key. If no entry was # found, return nil, otherwise the found value. def remove(key) - removed_value = @root.remove(key) + @size -= 1 unless (removed_value = @root.remove(key)).nil? # Check if the root node only contains one child link after the delete # operation. Then we can delete that node and pull the tree one level # up. This could happen for a sequence of nodes that all got merged to # single child nodes. @@ -183,9 +249,14 @@ # Delete the node at the given address in the BTree file. # @param address [Integer] address in file def delete_node(address) @node_cache.delete(address) @nodes.delete_blob(address) + end + + # @return [Integer] The number of entries stored in the tree. + def entries_count + @size end # @return [String] Human reable form of the tree. def to_s @root.to_s