lib/tap/support/audit.rb in tap-0.11.1 vs lib/tap/support/audit.rb in tap-0.12.0

- old
+ new

@@ -1,411 +1,332 @@ module Tap - module Support + module Support - # Marks the merge of multiple Audit trails - class AuditMerge < Array - - # True if another is an AuditMerge and passes Array#== - def ==(another) - another.kind_of?(AuditMerge) && super - end - end - - # Marks the expansion of an Audit trail - class AuditIterate - attr_reader :index - def initialize(index) @index = index end - - # True if another is an AuditIterate with the same index. - def ==(another) - another.kind_of?(AuditIterate) && another.index == index - end - - # Returns a string like '_iterate(<index>)'. - def to_s - "_iterate(#{index})" - end - end - - # Audit provides a way to track the values (inputs and results) passed among - # tasks or, more generally, any Executable. Audits allow you to track inputs - # as they make their way through a workflow, and have great utility in - # debugging and record keeping. + # Audit provides a way to track the values passed among tasks or, more + # generally, any Executable. Audits collectively build a {directed + # acyclic graph}[http://en.wikipedia.org/wiki/Directed_acyclic_graph] + # of task execution and have great utility in debugging and record keeping. # - # During execution, the inputs to a task are used to initialize an Audit. - # These inputs are the original value of the audit and mark the begining - # of an audit trail; every task adds to the trail by recording it's result - # and itself as the 'source' of the result. + # Audits record a key, a current value, and the previous audit(s) in the + # trail. Keys are arbitrary identifiers of where the value comes from. + # To illustrate, lets use symbols as keys. # - # Audits can take any object as a source, so for illustration lets use some - # symbols: - # # # initialize a new audit - # a = Audit.new(1, nil) + # _a = Audit.new(:one, 1) + # _a.key # => :one + # _a.value # => 1 # - # # record some values - # a._record(:A, 2) - # a._record(:B, 3) + # # build a short trail + # _b = Audit.new(:two, 2, _a) + # _c = Audit.new(:three, 3, _b) # - # Now you can pull up the source and value trails, as well as the current - # and original values: + # _a.sources # => [] + # _b.sources # => [_a] + # _c.sources # => [_b] # - # a._source_trail # => [nil, :A, :B] - # a._value_trail # => [1, 2, 3] + # Audits allow you track back through the sources of each audit to build + # a trail describing how a particular value was produced. # - # a._original # => 1 - # a._original_source # => nil + # _c.trail # => [_a,_b,_c] + # _c.trail {|audit| audit.key } # => [:one, :two, :three] + # _c.trail {|audit| audit.value } # => [1,2,3] # - # a._current # => 3 - # a._current_source # => :B + # Any number of audits may share the same source, so forks are naturally + # supported. # - # Merges are supported by using an array of the merged trails (actually - # an AuditMerge) as the source, and an array of the merged values as the - # original value. + # _d = Audit.new(:four, 4, _b) + # _d.trail # => [_a,_b,_d] # - # b = Audit.new(10, nil) - # b._record(:C, 11) - # b._record(:D, 12) + # _e = Audit.new(:five, 5, _b) + # _e.trail # => [_a,_b,_e] # - # c = Audit.merge(a, b) - # c._source_trail # => [ [[nil, :A, :B], [nil, :C, :D]] ] - # c._value_trail # => [ [[1,2,3], [10, 11, 12]] ] - # c._current # => [3, 12] + # Merges are supported by specifying more than one source. Merges have + # the effect of nesting audit trails within an array: # - # c._record(:E, "a string value") - # c._record(:F, {'a' => 'hash value'}) - # c._record(:G, ['an', 'array', 'value']) + # _f = Audit.new(:six, 6) + # _g = Audit.new(:seven, 7, _f) + # _h = Audit.new(:eight, 8, [_c,_d,_g]) + # _h.trail # => [[[_a,_b,_c], [_a,_b,_d], [_f,_g]], _h] + # + # Nesting can get quite ugly after a couple merges so Audit provides a + # scalable pretty-print dump that helps visualize the audit trail. # - # c._source_trail # => [ [[nil, :A, :B], [nil, :C, :D]], :E, :F, :G] - # c._value_trail # => [ [[1,2,3], [10, 11, 12]], "a string value", {'a' => 'hash value'}, ['an', 'array', 'value']] + # "\n" + _h.dump + # # => %q{ + # # o-[one] 1 + # # o-[two] 2 + # # | + # # |-o-[three] 3 + # # | | + # # `---o-[four] 4 + # # | | + # # | | o-[six] 6 + # # | | o-[seven] 7 + # # | | | + # # `-`-`-o-[eight] 8 + # # } # - # Audit supports forks by duplicating the source and value trails. Forks - # can be developed independently. Audits are also forked during a merge; - # notice the additional record in 'a' doesn't change the source trail for - # 'c': + # In practice, tasks are recorded as keys. Thus audit trails can be used + # to access task configurations and other information that may be useful + # when creating reports or making workflow decisions. Note that by + # convention Audits and non-Audit methods that return Audits are + # prefixed with an underscore. # - # a1 = a._fork - # - # a._record(:X, -1) - # a1._record(:Y, -2) - # - # a._source_trail # => [nil, :A, :B, :X] - # a1._source_trail # => [nil, :A, :B, :Y] - # c._source_trail # => [ [[nil, :A, :B], [nil, :C, :D]], :E, :F, :G] - # - # The data structure for an audit gets nasty after a few merges because - # the lead array gets more and more nested. Audit provides iterators - # to help gain access, as well as a printing method to visualize the - # audit trail: - # - # c._to_s - # # => - # # o-[] 1 - # # o-[A] 2 - # # o-[B] 3 - # # | - # # | o-[] 10 - # # | o-[C] 11 - # # | o-[D] 12 - # # | | - # # `-`-o-[E] "a string value" - # # o-[F] {"a"=>"hash value"} - # # o-[G] ["an", "array", "value"] - # - # In practice, tasks are recored as sources. Thus source trails can be used - # to access task configurations and other information that may be useful - # when creating reports or making workflow decisions. - # #-- - # TODO: - # Track nesting level of ams; see if you can hook this into the _to_s process to make - # extraction/presentation of audits more managable. + # Note Audit could easily be expanded to track sinks as well as sources. + # In initialize: # - # Create a FirstLastArray to minimize the audit data collected. Allow different audit - # modes: - # - full ([] both) - # - source_only (fl value) - # - minimal (fl source and value) - # - # Try to work a _to_s that doesn't repeat the same audit twice. Think about a format - # like: - # | - # ------|-----+ - # | | - # ------|-----|-----+ - # | | | - # `-----`-----`-o-[j] j5 - # + # @sinks = [] + # sources.each do |source| + # source.sinks << self + # end + # + # The downside is that this may not circumvent cleanly if you want light + # or no auditing. It also adds additonal references which will prevent + # garbage collection. On the plus side, sinks will make it easier to + # truly use Audits as a DAG class Audit class << self - - # Creates a new Audit by merging the input audits. The value of the new - # Audit will be an array of the _current values of the inputs. The source - # will be an AuditMerge whose values are forks of the inputs. Non-Audit - # sources may be provided; they are initialized to Audits before merging. - # - # a = Audit.new - # a._record(:a, 'a') - # - # b = Audit.new - # b._record(:b, 'b') - # - # c = Audit.merge(a, b, 1) - # c._record(:c, 'c') - # - # c._values # => [['a','b', 1], 'c'] - # c._sources # => [AuditMerge[a, b, Audit.new(1)], :c] - # - # If no audits are provided, merge returns a new Audit. If only one - # audit is provided, merge returns a fork of that audit. - def merge(*audits) - case audits.length - when 0 then Audit.new - when 1 then audits[0]._fork - else - sources = AuditMerge.new - audits.each {|a| sources << (a.kind_of?(Audit) ? a._fork : Audit.new(a)) } - values = audits.collect {|a| a.kind_of?(Audit) ? a._current : a} + + # Produces a pretty-print dump of the specified audits to target. + # A block may be provided to format the trailer of each line. + def dump(audits, target=$stdout) # :yields: audit + return dump(audits, target) do |audit| + "o-[#{audit.key}] #{audit.value.inspect}" + end unless block_given? - Audit.new(values, sources) + # arrayify audits + audits = [audits].flatten + + # the order of audits + order = [] + + # (audit, sinks) hash preventing double iteration over + # audits, and identifying sinks for a particular audit + sinks = {} + + # iterate over all audits, collecting in order + audits.each do |audit| + traverse(audit, order, sinks) end + + # visit each audit, collecting audits into indent groups + groups = [] + current = nil + order.each do |audit| + sources = audit.sources + unless sources.length == 1 && sinks[sources[0]].length <= 1 + current = [] + groups << current + end + + current << audit + end + + # identify nodes at which a fork occurs... these are audits + # that have more than one sink, and they cause a fork-style + # leader to be printed + forks = {} + sinks.each_pair do |audit, audit_sinks| + n = audit_sinks.length + forks[audit] = [0, n] if n > 1 + end + + # setup print + index = 0 + leader = "" + + # print each group + groups.each do |group| + sources = group[0].sources + complete = audits.include?(group[-1]) + + case + when sources.length > 1 + # print a merge + # `-`-`-o-[merge] + + leader =~ /^(.*)((\| *){#{sources.length}})$/ + leader = "#{$1}#{' ' * $2.length} " + target << "#{$1}#{$2.gsub('|', '`').gsub(' ', '-')}-#{yield(group.shift)}\n" + + when fork = forks[sources[0]] + # print a fork + # |-o-[a] + # | + # `---o-[b] + + n = fork[0] += 1 + base = leader[0, leader.length - (2 * n - 1)] + target << "#{base}#{fork[0] == fork[1] ? '`-' : '|-'}#{'--' * (n-1)}#{yield(group.shift)}\n" + leader = "#{base}#{fork[0] == fork[1] ? ' ' : '| '}#{'| ' * (n-1)}" + + when index > 0 + # simply get ready to print the next series of audits + # o-[a] + # o-[b] + + leader = "#{leader} " + leader = "" if leader.strip.empty? + end + + # print the next series of audits + group.each do |audit| + target << "#{leader}#{yield(audit)}\n" + end + + # add a continuation line, if necessary + unless group == groups.last + if complete + leader = "#{leader} " + else + leader = "#{leader}|" + end + target << "#{leader}\n" + end + + index += 1 + end + + target end + + protected + + # helper to determine the order and sinks for a node + def traverse(node, order=[], sinks={}) # :nodoc: + return if sinks.has_key?(node) + + node.sources.each do |source| + traverse(source, order, sinks) + (sinks[source] ||= []) << node + end + + order << node + end end - # An array of the sources in self - attr_reader :_sources + # A key for self (typically the task producing value, or + # nil if the value has an unknown origin) + attr_reader :key - # An array of the values in self - attr_reader :_values + # The current value + attr_reader :value - # An arbitrary object used to identify when no inputs have been - # provided to Audit.new. (nil cannot be used since nil is a valid - # initial value) - AUDIT_NIL = Object.new - - # A new audit takes a value and/or source. A nil source is typically given - # for the original value. - def initialize(value=AUDIT_NIL, source=nil) - @_sources = [] - @_values = [] - - _record(source, value) unless value == AUDIT_NIL - end - - # Records the next value produced by the source. When an audit is - # passed as a value, record will record the current value of the audit. - # Record will similarly resolve every audit in an array containing audits. + # Initializes a new Audit. Sources may be an array, a single value + # (which is turned into an array), or nil (indicating no sources). # - # Example: + # _a = Audit.new(nil, nil, nil) + # _a.sources # => [] # - # a = Audit.new(1) - # b = Audit.new(2) - # c = Audit.new(3) + # _b = Audit.new(nil, nil, _a) + # _b.sources # => [_a] # - # c.record(:a, a) - # c.sources # => [:a] - # c.values # => [1] - # - # c.record(:ab, [a,b]) - # c.sources # => [:a, :ab] - # c.values # => [1, [1, 2]] - def _record(source, value) - _sources << source - _values << value - self + # _c = Audit.new(nil, nil, [_a,_b]) + # _c.sources # => [_a,_b] + # + def initialize(key=nil, value=nil, sources=nil) + @key = key + @value = value + @source = singularize(sources) end - - # The original value used to initialize the Audit - def _original - _values.first - end - - # The current (ie last) value recorded in the Audit - def _current - _values.last - end - - # The original source used to initialize the Audit - def _original_source - _sources.first - end - # The current (ie last) source recorded in the Audit - def _current_source - _sources.last + # An array of source audits for self. Sources may be empty. + def sources + arrayify(@source) end - - # Searches back and recursively (if the source is an audit) collects all sources - # for the current value. - def _source_trail - _collect_records {|source, value| source} - end - # Searches back and recursively (if the source is an audit) collects all values - # leading to the current value. - def _value_trail - _collect_records {|source, value| value} - end - - def _collect_records(&block) # :yields: source, value + # Produces a fork of self for each item in value, using the index of + # the item as a key. Splat is useful for developing each item of an + # array value along different paths. + # + # _a = Audit.new(nil, [:x, :y, :z]) + # _b,_c,_d = _a.splat + # + # _b.key # => 0 + # _b.value # => :x + # + # _c.key # => 1 + # _c.value # => :y + # + # _d.key # => 2 + # _d.value # => :z + # _d.trail # => [_a,_d] + # + # If value does not respond to 'each', an array with self as the only + # member will be returned. This ensures that the result of splat + # is an array of audits ready for further development. + # + # _a = Audit.new(nil, :value) + # _a.splat # => [_a] + # + def splat + return [self] unless value.respond_to?(:each) + collection = [] - 0.upto(_sources.length-1) do |i| - collection << collect_records(_sources[i], _values[i], &block) + index = 0 + value.each do |obj| + collection << Audit.new(index, obj, self) + index += 1 end collection end - def _each_record(merge_level=0, merge_index=0, &block) # :yields: source, value, merge_level, merge_index, index - 0.upto(_sources.length-1) do |i| - each_record(_sources[i], _values[i], merge_level, merge_index, i, &block) + # Recursively collects an audit trail leading to self. Single sources + # are collected into the trail directly, while multiple sources are + # collected into arrays. + # + # _a = Audit.new(:one, 1) + # _b = Audit.new(:two, 2, _a) + # _b.trail # => [_a,_b] + # + # _a = Audit.new(:one, 1) + # _b = Audit.new(:two, 2) + # _c = Audit.new(:three, 3, [_a, _b]) + # _c.trail # => [[[_a],[_b]],_c] + # + # A block may be provided to collect a specific audit attribute + # instead of the audit itself. + # + # _c.trail {|audit| audit.value } # => [[[1],[2]],3] + # + def trail(trail=[], &block) + trail.unshift(block_given? ? block.call(self) : self) + + case @source + when Audit + @source.trail(trail, &block) + when Array + trail.unshift @source.collect {|audit| audit.trail(&block) } end + + trail end - - # Creates a new Audit by merging self and the input audits, using Audit#merge. - def _merge(*audits) - Audit.merge(self, *audits) - end - # Produces a new Audit with duplicate sources and values, suitable for - # independent development. - def _fork - a = Audit.new - a._sources = _sources.dup - a._values = _values.dup - a + # A kind of pretty-print for Audits. + def dump(&block) + Audit.dump(self, "", &block) end - - # Produces a fork of self for each item in the current value (_current). - # Iterate is useful for developing each item of (say) an array along - # different paths. - # - # Records the next value of each fork as [item, AuditIterate.new(<index of item>)]. - # Raises an error if _current does not respond to each. - def _iterate - expanded = [] - _current.each do |value| - expanded << _fork._record(AuditIterate.new(expanded.length), value) - end - expanded - end - # Returns true if the _sources and _values for self are equal - # to those of another. - def ==(another) - another.kind_of?(Audit) && self._sources == another._sources && self._values == another._values - end + private - # A kind of pretty-print for Audits. See the example in the overview. - def _to_s - # TODO -- find a way to avoid repeating groups + # helper to optimize storage of nodes + def singularize(obj) # :nodoc: + return obj unless obj.kind_of?(Array) - group = [] - groups = [group] - extended_groups = [groups] - group_merges = [] - extended_group_merges = [] - current_level = nil - current_index = nil - - _each_record do |source, value, merge_level, merge_index, index| - source_str, value_str = if block_given? - yield(source, value) - else - [source, value == nil ? '' : PP.singleline_pp(value, '')] - end - - if !group.empty? && (merge_level != current_level || index == 0) - unless merge_level <= current_level - groups = [] - extended_groups << groups - end - - group = [] - groups << group - - if merge_level < current_level - if merge_index == 0 - extended_group_merges << group.object_id - end - - unless index == 0 - group_merges << group.object_id - end - end - end - - group << "o-[#{source_str}] #{value_str}" - current_level = merge_level - current_index = merge_index + case obj.length + when 0 then nil + when 1 then obj[0] + else obj end - - lines = [] - group_prefix = "" - extended_groups.each do |ext_groups| - indentation = 0 - - ext_groups.each_with_index do |ext_group, group_num| - ext_group.each_with_index do |line, line_num| - if line_num == 0 - unless lines.empty? - lines << group_prefix + " " * indentation + "| " * (group_num-indentation) - end - - if group_merges.include?(ext_group.object_id) - lines << group_prefix + " " * indentation + "`-" * (group_num-indentation) + line - indentation = group_num - - if extended_group_merges.include?(ext_group.object_id) - lines.last.gsub!(/\| \s*/) {|match| "`-" + "-" * (match.length - 2)} - group_prefix.gsub!(/\| /, " ") - end - next - end - end - - lines << group_prefix + " " * indentation + "| " * (group_num-indentation) + line - end - end - - group_prefix += " " * (ext_groups.length-1) + "| " - end - - lines.join("\n") + "\n" end - protected - - attr_writer :_sources, :_values # :nodoc: - - private - - # helper method to recursively collect the value trail for a given source - def collect_records(source, value, &block) - case source - when AuditMerge - collection = [] - 0.upto(source.length-1) do |i| - collection << collect_records(source[i], value[i], &block) - end - collection - when Audit - source._collect_records(&block) - else - yield(source, value) - end - end - - def each_record(source, value, merge_level, merge_index, index, &block) - case source - when AuditMerge - merge_level += 1 - 0.upto(source.length-1) do |i| - each_record(source[i], value[i], merge_level, i, index, &block) - end - when Audit - source._each_record(merge_level, merge_index, &block) - else - yield(source, value, merge_level, merge_index, index) + # helper to optimize storage of nodes + def arrayify(obj) # :nodoc: + case obj + when nil then [] + when Array then obj + else [obj] end end end end end \ No newline at end of file