module Eco::Data::Locations::NodeBase # Generic treeifier # @note expects nodes to have these properties: # 1. `id`, `name` and `parentId` # 2. `parent` # 3. `tracked_level` module Treeify include Eco::Language::AuxiliarLogger # @note if block is no given, it auto-detects the `serializer` **block**. # @yield [NodeBase] for each included node # @yieldreturn [Hash] custom hash model when treeifying (allows to set more keys/properties). # @nodes [Array] list of nodes # @return [Array] a hierarchical tree of nested Hashes via `nodes` key. def treeify(nodes, skipped: [], unlinked_trees: [], &block) return [] if nodes.empty? block ||= nodes.first.class.serializer done_ids = {} warns = [] parents = parents_hash(nodes) get_children(nil, parents, done_ids: done_ids, skipped: skipped, warns: warns, &block).tap do |tree| check_results( tree, nodes, parents, done_ids: done_ids, skipped: skipped, unlinked_trees: unlinked_trees, warns: warns, &block ) log(:warn) { warns.join("\n") } unless warns.empty? end end private # @return [Hash] where `key`s are all the `parentId` of the nodes # and `value` and `Array` of those nodes that have that `parentId` def parents_hash(nodes) nodes.each_with_object({}) do |node, parents| (parents[node.parentId] ||= []).push(node) end end # @note # 1. It tracks the `level` where nodes are discovered # 2. If the node had already a tracked level, it warns and keeps the previous level # 3. The above can translate into some # @yield [node] # @yieldreturn [Hash] custom hash model when treeifying def get_children(node_id, parents, parent: nil, level: 0, done_ids: {}, skipped: [], warns: [], &block) level_ids = [] (parents[node_id] ||= []).each_with_object([]) do |child, results| # Skipping done id. Add proper warnings... # => rely on `done_ids` to identify if an `id` has already been done next report_skipped_node( child, parent, done_ids, level, level_ids, parents, skipped: skipped, warns: warns ) if done_ids[child.id] # Fill in tracking data child.parent = parent child.tracked_level = level + 1 level_ids << child.id node_hash = { "id" => child.id, "name" => child.name, "parent_id" => node_id } node_hash.merge(yield(child)) if block_given? # we must register the `id` before recursing down done_ids[child.id] = child children = get_children( child.id, parents, parent: child, done_ids: done_ids, level: level + 1, skipped: skipped, warns: warns, &block ).tap do |desc| if (nil_count = desc.count(nil)) > 0 log(:debug) { "get_children gave #{nil_count} nil values for nodes of #{child.id}" } end end results << node_hash.merge({ "nodes" => children.compact }) end end def parent_msg(parent) parent ? "child of '#{parent.id}'" : "top level" end def level_msg(level) "at lev: #{level}" end def indent(level) "#{" " * level}" end # Method to ensure the results are consistent # @param skipped [Array] those skipped because repeated # 1. It will add children of them that were skipped. This won't clash with unlinked nodes # because otherwise would be part of `done_ids` anyway. # @param unlinked_trees [Array] by excluding those done and skipped, # it will treeify the unlinked nodes (the exclusion applies to `parants_hash`) def check_results(tree, nodes, parents, done_ids: {}, skipped: [], unlinked_trees: [], warns: [], &block) update_skipped(skipped, parents, done_ids: done_ids) unless skipped.empty? if done_ids.count != nodes.count tracked_nodes = done_ids.values untracked_nodes = nodes - tracked_nodes - skipped # skipped keys is inherent, as they were excluded because of id clash with done_ids unlinked_parent_ids = (parents.keys - done_ids.keys).compact msg = [] # The reason of missing nodes in the output tree is unknown! if skipped.empty? && unlinked_parent_ids.empty? msg << "BUG in this library (open issue with maintainers)." msg << "There were no skipped nodes nor missin referred parents, and yet:" msg << " • the tree nodes count: #{done_ids.count} ..." msg << " • doesn't match the original nodes count: #{nodes.count}" raise msg.join("\n") end unless unlinked_parent_ids.empty? msg << "There are #{unlinked_parent_ids.count} referred parent_id's NOT linked to the root:" msg << " • total_nodes: #{nodes.count}" msg << " • tracked_nodes: #{tracked_nodes.count}" msg << " • untracked_nodes: #{untracked_nodes.count}" msg << " • unlinked_parents: #{unlinked_parent_ids.count}" msg << " • skipped (repeated) nodes: #{skipped.count}" unless skipped.empty? unlinked_nodes = nodes - skipped unlinked_parents = parents.slice(*unlinked_parent_ids) # doesn'thave skipped ones residual_skipped = [] unlinked_trees.concat \ get_unlinked_trees( unlinked_nodes, unlinked_parents, done_ids: done_ids, skipped: residual_skipped, warns: warns, &block ) update_skipped(skipped, parents, with: residual_skipped, done_ids: done_ids) unless residual_skipped.empty? tracked_nodes = done_ids.values untracked_nodes = nodes - tracked_nodes - skipped unlinked_parent_ids = (parents.keys - done_ids.keys).compact msg << "After treeifying via the unlinked_parents:" msg << " • total_nodes: #{nodes.count}" msg << " • tracked_nodes: #{tracked_nodes.count}" msg << " • untracked_nodes: #{untracked_nodes.count}" msg << " • unlinked_parents: #{unlinked_parent_ids.count}" msg << " • skipped in this step: #{residual_skipped.count}" end msg << " • total skipped (repeated) nodes: #{skipped.count} !!" unless skipped.empty? warns << msg.join("\n") nil end end # Treeifies the unlinked nodes by scoping existing parent ids. def get_unlinked_trees(nodes, parents, done_ids: {}, skipped: [], warns: [], &block) node_ids = nodes.map(&:id) parent_ids = parents.keys & node_ids missing_parent_ids = parents.keys - parent_ids missing_parents = parents.slice(*missing_parent_ids) warns << " • missing_parents: #{missing_parents.count}" nil_parent_nodes = missing_parents.each_with_object([]) do |(id, nodes), mem| nodes.each {|node| node.parent_id = nil} mem.concat(nodes) end rest_parents = parents.slice(*parent_ids).merge({ nil => nil_parent_nodes }) get_children(nil, rest_parents, done_ids: done_ids, skipped: skipped, warns: warns, &block) end # Same as `get_children` but not performing checks and with # option to retrieve the source nodes (rather than parsing to `Hash`). # @note serves the purpose to identify what linked children got inherently # skipped, because their parent was skipped. def get_tree_nodes_raw(node_id, parents, src_plain: true, &block) (parents[node_id] ||= []).each_with_object([]) do |child, results| unless src_plain node_hash = { "id" => child.id, "name" => child.name, "parent_id" => node_id } node_hash.merge(yield(child)) if block_given? end descendants = get_tree_nodes_raw(child.id, parents, src_plain: src_plain, &block).tap do |desc| if (nil_count = desc.count(nil)) > 0 puts "get_tree_nodes_raw gave #{nil_count} nil values for nodes of #{child.id}" end end if src_plain results.concat(descendants) else results << node_hash.merge({ "nodes" => descendants.compact }) end end end # It goes through the `with` skipped nodes, and adds them to the `skipped` ones # by including their not tracked/done/included children. def update_skipped(skipped, parents, with: skipped, done_ids: {}) raw_skipped_children = with.each_with_object([]) do |node, mem| mem << node mem.concat get_tree_nodes_raw(node.id, parents) end.uniq skipped_children = raw_skipped_children - done_ids.values skipped.concat(skipped_children).uniq! skipped end # With given a skipped `node` (repeated `id`), it gives different warnings, # provided that the context in which the double-up `id` happened is identified. def report_skipped_node(node, parent, done_ids, level, level_ids, parents, skipped: [], warns: []) skipped << node lev = level + 1 done_node = done_ids[node.id] prev_parent = node.parent prev_level = node.tracked_level node_dup = done_node && (done_node != node) lev_dup = level_ids.include?(node.id) multi_parent = (!prev_parent == !!parent) || (prev_parent && (prev_parent.id != parent.id)) row_num = node.respond_to?(:row_num) ? node.row_num : nil row_str = row_num ? "(Row: #{row_num}) " : '' node_str = "#{row_str}Node '#{node.id}' #{level_msg(lev)} (#{parent_msg(parent)})" msg = [] msg << "#{indent(level)}Skipping #{node_str}." # Implementation integrity guard # => as we don't register in `done_ids` those that are skipped, # when a `node` has already a tracked `parent` or `level`, # it should not happen that the `node.id` retrieves a different node in `node_ids`. if (prev_parent || prev_level) && node_dup # && !done_node str = "Integrity issue in Treeify. " str << "A Node with tracked level or parent should be present in done_ids, but it isn't." str << "\n • #{node_str}." raise str end # From here on, do NOT expect `node_dup` where `node` has tracked `parent` or `level`. # Implementation integrity guard # => as`level_ids` only relates to the current `parent`, # and as `done_ids` don't get those skipped, # when we get an ID double-up in `level_ids`, # there must be a `done_node` AND # `done_node` can only have `tracked_level` matching the current one # Moreover, they should have exactly the same parentId. if lev_dup && (multi_parent || !done_node || done_node.tracked_level != lev) str = "Integrity issue in Treeify. " str << "A Node with ID already in level_ids should have same tracked_level as current level." str << "\n • #{node_str}." raise str end # From here on, do NOT expect `lev_up` where there isn't `done_node` or it has different level or parent. cyclic = multi_parent && done_node == node double_up = node_dup || lev_dup if cyclic str = "#{indent(level + 1)}Cyclic definition. By skipping the node, " str << "it will remain as #{parent_msg(done_node.parent)} (#{level_msg(prev_level)})." msg << str end if double_up str = "#{indent(level + 1)}Node ID was already tracked as #{level_msg(done_node.tracked_level)}, " str << "as #{parent_msg(done_node.parent)} " str << "(same parent)." if lev_dup str << "(different parent)." if multi_parent msg << str end unless cyclic || double_up str = "Integrity issue in Treeify. " str << "Skipping is only applicable to double_ups or cyclic nodes." str << "\n • #{node_str}." raise str end unless (children = parents[node.id] || []).empty? str = "#{indent(level + 1)}Immediate children of skipped node (will probably be missing): " str << children.map {|ch| "'#{ch.id}'"}.join(", ") msg << str end warns << msg.join("\n") nil end end end