require 'nodepile/colspecs.rb' require 'nodepile/rec_source.rb' require 'nodepile/pragmas.rb' require 'nodepile/rule_eval.rb' module Nodepile # Container class for managing a Nodepile. A nodepile consists of a set of # entities including nodes, edges, and rules. It includes methods for # enumerating the various items in the collection, filtering, and # deducing the existence of implied edges using rules. class PileOrganizer # see nodepile/base_structs.rb for definition of an Nodepile::EntityPacket class ERecStack; end #ERecStack defined further down class RuleCache; end # defined further down SourceData = Struct.new(:source_name,:highest_sequence_num) def initialize() @nodes = Hash.new{|h,k| h[k] = ERecStack.new} @edges = Hash.new{|h,k| h[k] = ERecStack.new} @rules = Array.new # not subject to overlaying with themselves @pragmas = Nodepile::Pragmas.new @sources = Hash.new{|h,k| h[k] = SourceData.new(k,0)} # insert a dummy source for unspecifie @last_source_name = nil @dirty = true end # If a source name is not specified, then the source is assumed to be # the last source that was used to append. If no sequence number is provided # then the sequence_number is assumed to be one more than the highest # sequence number that was specified. If callers are manually specifying # sequence numbers for a source, they should do so consistently to avoid # repeats. # @param kaa [KeyedArrayAccessor] Includes metadata about @type, @key, @is_implied # @return [self] def append(kaa) @last_source_name = kaa.source || @last_source_name source_data = @sources[@last_source_name] # note that the way things work below deliberately "overlays" items # when a matching key is encountered. Rule recalculation is deferred case kaa['@type'] when :node @nodes[kaa['@key']] << kaa when :edge @edges[kaa['@key']] << kaa when :rule @rules << RuleCache.new(kaa) when :pragma @pragmas.parse(kaa['_id']) else raise "Unhandled entity entity type #{kaa['@type'].inspect}" end #case return self end def node_count() = @nodes.length def rule_count() = @rules.length def edge_count() = @edges.length def pragmas() = @pragmas def entity_record(key) _update_rule_impacts case key when String return @nodes[key] when Array return @edges[key] else raise "Unrecognized key structure/type" end end # Provide summarized records in order def edge_records return enum_for(:edge_records) unless block_given? _update_rule_impacts @edges.each_value{|erstack| yield(erstack.summary) } end # Provide the summarized records def node_records return enum_for(:node_records) unless block_given? _update_rule_impacts @nodes.each_value{|erstack| yield(erstack.summary) } end # Alias for #append() # @param entity_record [Nodepile::KeyedArrayAccessor] def <<(entity_record) = append(entity_record) # Loads the given file (on top of anything already stored in this object) def load_from_file(tabular_filepath) source = Nodepile::TabularRecordSource.new(tabular_filepath,format: :guess) specs = nil loaded_entity_count = 0 rec_src_meta = {'path' => tabular_filepath,'rec_num' => nil} metadata = Hash.new source.each{|(rec,rec_num)| rec_src_meta['rec_num'] = rec_num if specs.nil? #first row is header specs = Nodepile::InputColumnSpecs.new(rec,metadata_key_prefix: '@') else begin specs.parse(rec,source: tabular_filepath, ref_num: rec_num, metadata: metadata, ){|keyed_array_accessor| append(keyed_array_accessor) loaded_entity_count += 1 } rescue Nodepile::InputColumnSpecs::InvalidRecordError => err # re-raise but add info about the record number that triggered the error err.rec_num = rec_num err.file_path = tabular_filepath raise # re-raise end end #if } return loaded_entity_count end private # This sledgehammer approach deletes all calculated impacts that may # have previously been applied to the @edges and @nodes and recalculates # all of them. # # Method is a no-op if the structrues are up-to-date def _update_rule_impacts(force = false) return nil unless force || @dirty @nodes.each_value(&:purge_rule_overlays) @edges.each_value(&:purge_rule_overlays) @rules.each{|rulecache| recs = (rulecache.relevant_entity_type == :node ? @nodes : @edges).each_value recs.each{|erstack| if rulecache.match?(erstack.summary) # calculate the rule as applied to the given node/edge calculated_rule_erec = rulecache.eval_using(erstack.summary) erstack << calculated_rule_erec end } } #loop over rules @dirty = false end # An ERecStack is a data structure used for holding and summarizing # overlay-able records related to a given Node or Edge which can include # "rules" that apply to that node/stack class ERecStack def initialize() @a = Array.new @summary = nil @mc = CrudeCalculationCache.new end def inspect "#<#{self.class}:0x#{object_id} type= #{type} key=#{self.key.inspect} depth=#{@a.length}> " end def type = @a.first['@type'] def key() = @a.first['@key'] def is_node? = self.type == :node def is_edge? = self.type == :edge def summary() = @summary def to_a = @a # A stack of type :node or :edge is implied if it contains # no ERec records where the is_implied attribute is false. # The return value of this method is undefined for types def is_implied @a.each{|kaa| return false if !kaa['@is_implied'] && [:node,:edge].include?(kaa['@type']) } return true end # Delete overlayed rule records # @return [void] def purge_rule_overlays() @a.delete_if{|rec| rec['@type'] == :rule} @a.each{|rec| @summary = self.class._update_summary(@summary,rec)} #recalc end # Note that this method does not verify whether it is appropriate to stack the new # record and assumes callers have alreay done this due-diligence. # @param erec [KeyedArrayAccessor] def <<(rec) raise "ERecStack may only hold objects of type Nodepile::KeyedArrayAccessor" unless rec.is_a?(Nodepile::KeyedArrayAccessor) # Keep the summary up-to-date if we've got one. @a << rec if @a.length == 1 @summary = rec.dup else @summary = self.class._update_summary(@summary,rec) end return self end def each_keyed_array() return enum_for(:each_keyed_array) unless block_given? @a.each{|erec| yield erec } end private def self._update_summary(cur_summary,new_overlay) return new_overlay if cur_summary.nil? cur_summary.underlay!(new_overlay) cur_summary.source = nil if cur_summary.source != new_overlay.source cur_summary.ref_num = nil #summary no longer represents a single ref_num cur_summary.update_metadata('@is_implied',false) unless new_overlay['@is_implied'] return cur_summary end end #class Nodepile::PileOrganizer::ERecStack # Represents cached information about a single specific rule class RuleCache # @param rule_erec[KeyedArrayAccessor] created from this rule record def initialize(rule_erec) @er = rule_erec.freeze #just for to avoid casual alteration raise "Only ERecs of type :rule may be stored in this structure" unless self.type == :rule @verifiers = [*self.key].map{|s| Nodepile::InputColumnSpecs.make_pattern_match_verifier(s)} @rule_eval = RuleRecordEvaluator.new(@er) end def inspect "#<#{self.class}:0x#{object_id} key=#{self.key.inspect} > " end def type = @er['@type'] def is_implied = @er['@is_implied'] def key = @er['@key'] # Note that a rule that uses dynamic matching cannot precalculate which # records it matches and must (to be safe) reclculate the match in response # to any changes def uses_dynamic_match? = @rule_eval.uses_dynamic_match? # @param kaa [KeyedArrayAccessor] a set of field values that will # tested against this rule for matching. def match?(kaa) = @rule_eval.match_record?(kaa) def eval_using(kaa) = @rule_eval.calculate_rule(kaa) def relevant_entity_type = @er['@key'].is_a?(String) ? :node : :edge end #class Nodepile::PileOrganizer::RuleCache end #class PileOrganizer end #module Nodepile