lib/perobs/BTree.rb in perobs-4.0.0 vs lib/perobs/BTree.rb in perobs-4.1.0
- old
+ new
@@ -1,10 +1,10 @@
# encoding: UTF-8
#
# = BTreeNode.rb -- Persistent Ruby Object Store
#
-# Copyright (c) 2016, 2017 by Chris Schlaeger <chris@taskjuggler.org>
+# Copyright (c) 2016, 2017, 2018 by Chris Schlaeger <chris@taskjuggler.org>
#
# MIT License
#
# Permission is hereby granted, free of charge, to any person obtaining
# a copy of this software and associated documentation files (the
@@ -30,28 +30,31 @@
require 'perobs/PersistentObjectCache'
require 'perobs/BTreeNode'
module PEROBS
- # This BTree class is very similar to a classic BTree implementation. It
+ # This BTree class is very similar to a classic B+Tree implementation. It
# manages a tree that is always balanced. The BTree is stored in the
# specified directory and partially kept in memory to speed up operations.
# The order of the tree specifies how many keys each node will be able to
# hold. Leaf nodes will have a value associated with each key. Branch nodes
# have N + 1 references to child nodes instead.
class BTree
- attr_reader :order, :nodes, :node_cache
+ attr_reader :order, :nodes, :node_cache, :first_leaf, :last_leaf, :size
# Create a new BTree object.
# @param dir [String] Directory to store the tree file
# @param name [String] Base name of the BTree related files in 'dir'
# @param order [Integer] The maximum number of keys per node. This number
# must be odd and larger than 2 and smaller than 2**16 - 1.
- def initialize(dir, name, order)
+ # @param progressmeter [ProgressMeter] reference to a ProgressMeter object
+ def initialize(dir, name, order, progressmeter)
@dir = dir
@name = name
+ @progressmeter = progressmeter
+
unless order > 2
PEROBS.log.fatal "BTree order must be larger than 2, not #{order}"
end
unless order % 2 == 1
PEROBS.log.fatal "BTree order must be an uneven number, not #{order}"
@@ -60,13 +63,18 @@
PEROBS.log.fatal "BTree order must be smaller than #{2**16 - 1}"
end
@order = order
# This EquiBlobsFile contains the nodes of the BTree.
- @nodes = EquiBlobsFile.new(@dir, @name,
+ @nodes = EquiBlobsFile.new(@dir, @name, @progressmeter,
BTreeNode::node_bytes(@order))
- @node_cache = PersistentObjectCache.new(512, BTreeNode, self)
+ @nodes.register_custom_data('first_leaf')
+ @nodes.register_custom_data('last_leaf')
+ @nodes.register_custom_data('btree_size')
+ @node_cache = PersistentObjectCache.new(16384, 5000, BTreeNode, self)
+ @root = @first_leaf = @last_leaf = nil
+ @size = 0
# This BTree implementation uses a write cache to improve write
# performance of multiple successive read/write operations. This also
# means that data may not be written on the backing store until the
# sync() or close() methods have been called. A bug in the program or a
@@ -87,64 +95,122 @@
PEROBS.log.fatal "Index file #{@nodes.file_name} does not exist"
end
@node_cache.clear
@nodes.open
- node = @nodes.total_entries == 0 ?
- BTreeNode::create(self) :
- BTreeNode::load(self, @nodes.first_entry)
+
+ if @nodes.total_entries == 0
+ # We've created a new nodes file
+ node = BTreeNode::create(self)
+ else
+ # We are loading an existing tree.
+ node = BTreeNode::load_and_link(self, @nodes.first_entry)
+ @first_leaf = BTreeNode::load_and_link(
+ self, @nodes.get_custom_data('first_leaf'))
+ @last_leaf = BTreeNode::load_and_link(
+ self, @nodes.get_custom_data('last_leaf'))
+ end
set_root(node)
+
+ # Get the total number of entries that are stored in the tree.
+ @size = @nodes.get_custom_data('btree_size')
end
# Close the tree file.
def close
+
+ def val_perc(value, total)
+ "#{value} (#{(value.to_f / total*100.0).to_i}%)"
+ end
+
sync
@nodes.close
@root = nil
end
+ # @return true if file is currently open
+ def is_open?
+ !@root.nil?
+ end
+
# Clear all pools and forget any registered spaces.
def clear
@node_cache.clear
@nodes.clear
+ @size = 0
set_root(BTreeNode::create(self))
end
# Erase the backing store of the BTree. This method should only be called
# when not having the BTree open. And it obviously and permanently erases
# all stored data from the BTree.
def erase
@nodes.erase
+ @size = 0
@root = nil
@dirty_flag.forced_unlock
end
# Flush all pending modifications into the tree file.
def sync
@node_cache.flush(true)
+ @nodes.set_custom_data('btree_size', @size)
@nodes.sync
@dirty_flag.unlock if @dirty_flag.is_locked?
end
# Check if the tree file contains any errors.
# @return [Boolean] true if no erros were found, false otherwise
def check(&block)
- @root.check(&block)
+ sync
+ return false unless @nodes.check
+
+ entries = 0
+ res = true
+ @progressmeter.start('Checking index structure', @size) do |pm|
+ res = @root.check do |k, v|
+ pm.update(entries += 1)
+ block_given? ? yield(k, v) : true
+ end
+ end
+
+ unless entries == @size
+ PEROBS.log.error "The BTree size (#{@size}) and the number of " +
+ "found entries (#{entries}) don't match"
+ return false
+ end
+
+ res
end
# Register a new node as root node of the tree.
+ # @param node [BTreeNode]
def set_root(node)
@root = node
@nodes.first_entry = node.node_address
end
+ # Set the address of the first leaf node.
+ # @param node [BTreeNode]
+ def set_first_leaf(node)
+ @first_leaf = node
+ @nodes.set_custom_data('first_leaf', node.node_address)
+ end
+
+ # Set the address of the last leaf node.
+ # @param node [BTreeNode]
+ def set_last_leaf(node)
+ @last_leaf = node
+ @nodes.set_custom_data('last_leaf', node.node_address)
+ end
+
# Insert a new value into the tree using the key as a unique index. If the
# key already exists the old value will be overwritten.
# @param key [Integer] Unique key
# @param value [Integer] value
def insert(key, value)
- @root.insert(key, value)
+ @size += 1 if @root.insert(key, value)
@node_cache.flush
end
# Retrieve the value associated with the given key. If no entry was found,
# return nil.
@@ -155,11 +221,11 @@
end
# Find and remove the value associated with the given key. If no entry was
# found, return nil, otherwise the found value.
def remove(key)
- removed_value = @root.remove(key)
+ @size -= 1 unless (removed_value = @root.remove(key)).nil?
# Check if the root node only contains one child link after the delete
# operation. Then we can delete that node and pull the tree one level
# up. This could happen for a sequence of nodes that all got merged to
# single child nodes.
@@ -183,9 +249,14 @@
# Delete the node at the given address in the BTree file.
# @param address [Integer] address in file
def delete_node(address)
@node_cache.delete(address)
@nodes.delete_blob(address)
+ end
+
+ # @return [Integer] The number of entries stored in the tree.
+ def entries_count
+ @size
end
# @return [String] Human reable form of the tree.
def to_s
@root.to_s