lib/perobs/BTreeNode.rb in perobs-4.1.0 vs lib/perobs/BTreeNode.rb in perobs-4.2.0

- old
+ new

@@ -37,10 +37,12 @@ # of values and no child references. Branch nodes only contain BTree.order + # 1 number of child references but no values. The is_leaf flag is used to # mark a node as leaf or branch node. class BTreeNode + Stats = Struct.new(:branch_depth, :nodes_count, :leave_nodes, :leaves) + attr_reader :node_address, :parent, :is_leaf, :next_sibling, :prev_sibling, :keys, :values, :children # Create a new BTreeNode object for the given tree with the given parent # or recreate the node with the given node_address from the backing store. @@ -223,10 +225,11 @@ if node.is_leaf return node.insert_element(key, value) else # Descend into the right child node to add the value to. node = node.children[node.search_key_index(key)] + node = node.get_node if node end end PEROBS.log.fatal 'Could not find proper node to add to' end @@ -247,16 +250,72 @@ return node.keys[i] == key ? node.values[i] : nil end # Descend into the right child node to continue the search. node = node.children[i] + node = node.get_node if node end PEROBS.log.fatal "Could not find proper node to get from while " + "looking for key #{key}" end + # Return the key/value pair that matches the given key or the next larger + # key/value pair with a key that is at least as large as key + + # min_miss_increment. + # @param key [Integer] key to search for + # @param min_miss_increment [Integer] minimum required key increment in + # case an exact key match could not be found + # @return [Integer or nil] value that matches the key + def get_best_match(key, min_miss_increment) + node = self + + while node do + # Find index of the entry that best fits the key. + i = node.search_key_index(key) + if node.is_leaf + # This is a leaf node. Check if there is an exact match for the + # given key. + if node.keys[i] == key + # Return the corresponding value/value pair. + return [ key, node.values[i] ] + else + # No exact key match. Now search the larger keys for the first + # that is at least key + min_miss_increment large. + keys = node.keys + keys_length = keys.length + while node + if i >= keys_length + # We've reached the end of a node. Continue search in next + # sibling. + return nil unless (node = node.next_sibling) + node = node.get_node + keys = node.keys + keys_length = keys.length + i = -1 + elsif keys[i] >= key + min_miss_increment + # We've found a key that fits the critera. Return the + # corresponding key/value pair. + return [ keys[i], node.values[i] ] + end + + i += 1 + end + + return nil + end + end + + # Descend into the right child node to continue the search. + node = node.children[i] + node = node.get_node if node + end + + PEROBS.log.fatal "Could not find proper node to get from while " + + "looking for key #{key}" + end + # Return the value that matches the given key and remove the value from # the tree. Return nil if the key is unknown. # @param key [Integer] key to search for # @return [Integer or nil] value that matches the key def remove(key) @@ -275,10 +334,11 @@ end end # Descend into the right child node to continue the search. node = node.children[i] + node = node.get_node if node end PEROBS.log.fatal 'Could not find proper node to remove from' end @@ -308,26 +368,10 @@ trim(mid) @parent end - def merge_node(upper_sibling, parent_index) - if upper_sibling == self - PEROBS.log.fatal "Cannot merge node @#{@node_address} with self" - end - unless upper_sibling.is_leaf - insert_element(@parent.keys[parent_index], upper_sibling.children[0]) - end - upper_sibling.copy_elements(0, self, @keys.size, upper_sibling.keys.size) - if (@next_sibling = link(upper_sibling.next_sibling)) - @next_sibling.prev_sibling = link(self) - end - @tree.delete_node(upper_sibling.node_address) - - @parent.remove_element(parent_index) - end - # Insert the given value or child into the current node using the key as # index. # @param key [Integer] key to address the value or child # @param value_or_child [Integer or BTreeNode] value or BTreeNode # reference @@ -426,17 +470,12 @@ borrow_from_next_sibling(@next_sibling) || merge_with_branch_node(@next_sibling) end end - if @parent.nil? && @children.length == 1 - # If the node just below the root only has one child it will become - # the new root node. - new_root = @children.first - new_root.parent = nil - @tree.set_root(new_root) - end + # Delete the node from the cache and backing store. + @tree.delete_node(node.node_address) end def merge_with_leaf_node(node) if @keys.length + node.keys.length > @tree.order PEROBS.log.fatal "Leaf nodes are too big to merge" @@ -559,40 +598,12 @@ # either the index of an exact match or the index of the position where # the given key would have to be inserted. # @param key [Integer] key to search for # @return [Integer] Index of the matching key or the insert position. def search_key_index(key) - # Handle special case for empty keys list. - return 0 if @keys.empty? - - # Keys are unique and always sorted. Use a binary search to find the - # index that fits the given key. - li = pi = 0 - ui = @keys.size - 1 - while li <= ui - # The pivot element is always in the middle between the lower and upper - # index. - pi = li + (ui - li) / 2 - - if key < @keys[pi] - # The pivot element is smaller than the key. Set the upper index to - # the pivot index. - ui = pi - 1 - elsif key > @keys[pi] - # The pivot element is larger than the key. Set the lower index to - # the pivot index. - li = pi + 1 - else - # We've found an exact match. For leaf nodes return the found index. - # For branch nodes we have to add one to the index since the larger - # child is the right one. - return @is_leaf ? pi : pi + 1 - end - end - # No exact match was found. For the insert operaton we need to return - # the index of the first key that is larger than the given key. - @keys[pi] < key ? pi + 1 : pi + (@is_leaf ? @keys.bsearch_index { |x| x >= key } : + @keys.bsearch_index { |x| x > key }) || @keys.length end # Iterate over all the key/value pairs in this node and all sub-nodes. # @yield [key, value] def each @@ -639,117 +650,123 @@ end # Check consistency of the node and all subsequent nodes. In case an error # is found, a message is logged and false is returned. # @yield [key, value] - # @return [Boolean] true if tree has no errors + # @return [nil or Hash] nil in case of errors or a hash with some + # statistical information about the tree def check - branch_depth = nil + stats = Stats.new(nil, 0, 0, 0) traverse do |node, position, stack| if position == 0 + stats.nodes_count += 1 if node.parent # After a split the nodes will only have half the maximum keys. # For branch nodes one of the split nodes will have even 1 key # less as this will become the branch key in a parent node. if node.keys.size < min_keys - (node.is_leaf ? 0 : 1) node.error "BTreeNode #{node.node_address} has too few keys" - return false + return nil end end if node.keys.size > @tree.order node.error "BTreeNode must not have more then #{@tree.order} " + "keys, but has #{node.keys.size} keys" + return nil end last_key = nil node.keys.each do |key| if last_key && key < last_key node.error "Keys are not increasing monotoneously: " + "#{node.keys.inspect}" - return false + return nil end last_key = key end if node.is_leaf - if branch_depth - unless branch_depth == stack.size + if stats.branch_depth + unless stats.branch_depth == node.tree_level node.error "All leaf nodes must have same distance from root " - return false + return nil end else - branch_depth = stack.size + stats.branch_depth = node.tree_level end if node.prev_sibling.nil? && @tree.first_leaf != node node.error "Leaf node #{node.node_address} has no previous " + "sibling but is not the first leaf of the tree" - return false + return nil end if node.next_sibling.nil? && @tree.last_leaf != node node.error "Leaf node #{node.node_address} has no next sibling " + "but is not the last leaf of the tree" - return false + return nil end unless node.keys.size == node.values.size node.error "Key count (#{node.keys.size}) and value " + "count (#{node.values.size}) don't match" - return false + return nil end unless node.children.empty? node.error "@children must be nil for a leaf node" - return false + return nil end + + stats.leave_nodes += 1 + stats.leaves += node.keys.length else unless node.values.empty? node.error "@values must be nil for a branch node" - return false + return nil end unless node.children.size == node.keys.size + 1 node.error "Key count (#{node.keys.size}) must be one " + "less than children count (#{node.children.size})" - return false + return nil end node.children.each_with_index do |child, i| unless child.is_a?(BTreeNodeLink) node.error "Child #{i} is of class #{child.class} " + "instead of BTreeNodeLink" - return false + return nil end unless child.parent.is_a?(BTreeNodeLink) node.error "Parent reference of child #{i} is of class " + "#{child.parent.class} instead of BTreeNodeLink" - return false + return nil end if child == node node.error "Child #{i} points to self" - return false + return nil end if stack.include?(child) node.error "Child #{i} points to ancester node" - return false + return nil end unless child.parent == node node.error "Child #{i} does not have parent pointing " + "to this node" - return false + return nil end if i > 0 unless node.children[i - 1].next_sibling == child node.error "next_sibling of node " + "#{node.children[i - 1].node_address} " + "must point to node #{child.node_address}" - return false + return nil end end if i < node.children.length - 1 unless child == node.children[i + 1].prev_sibling node.error "prev_sibling of node " + "#{node.children[i + 1].node_address} " + "must point to node #{child.node_address}" - return false + return nil end end end end elsif position <= node.keys.size @@ -759,28 +776,28 @@ if !node.is_leaf unless node.children[index].keys.last < node.keys[index] node.error "Child #{node.children[index].node_address} " + "has too large key #{node.children[index].keys.last}. " + "Must be smaller than #{node.keys[index]}." - return false + return nil end unless node.children[position].keys.first >= node.keys[index] node.error "Child #{node.children[position].node_address} " + "has too small key #{node.children[position].keys.first}. " + "Must be larger than or equal to #{node.keys[index]}." - return false + return nil end else if block_given? # If a block was given, call this block with the key and value. - return false unless yield(node.keys[index], node.values[index]) + return nil unless yield(node.keys[index], node.values[index]) end end end end - true + stats end def is_top? @parent.nil? || @parent.parent.nil? || @parent.parent.parent.nil? end @@ -832,10 +849,11 @@ break end str = (is_last_child ? ' ' : ' |') + str node = node.parent + node = node.get_node if node end str end @@ -868,9 +886,20 @@ end end s end + + def tree_level + level = 1 + node = self + while (node = node.parent) + level += 1 + end + + level + end + def error(msg) PEROBS.log.error "Error in BTreeNode @#{@node_address}: #{msg}" end