lib/tap/support/audit.rb in tap-0.11.1 vs lib/tap/support/audit.rb in tap-0.12.0
- old
+ new
@@ -1,411 +1,332 @@
module Tap
- module Support
+ module Support
- # Marks the merge of multiple Audit trails
- class AuditMerge < Array
-
- # True if another is an AuditMerge and passes Array#==
- def ==(another)
- another.kind_of?(AuditMerge) && super
- end
- end
-
- # Marks the expansion of an Audit trail
- class AuditIterate
- attr_reader :index
- def initialize(index) @index = index end
-
- # True if another is an AuditIterate with the same index.
- def ==(another)
- another.kind_of?(AuditIterate) && another.index == index
- end
-
- # Returns a string like '_iterate(<index>)'.
- def to_s
- "_iterate(#{index})"
- end
- end
-
- # Audit provides a way to track the values (inputs and results) passed among
- # tasks or, more generally, any Executable. Audits allow you to track inputs
- # as they make their way through a workflow, and have great utility in
- # debugging and record keeping.
+ # Audit provides a way to track the values passed among tasks or, more
+ # generally, any Executable. Audits collectively build a {directed
+ # acyclic graph}[http://en.wikipedia.org/wiki/Directed_acyclic_graph]
+ # of task execution and have great utility in debugging and record keeping.
#
- # During execution, the inputs to a task are used to initialize an Audit.
- # These inputs are the original value of the audit and mark the begining
- # of an audit trail; every task adds to the trail by recording it's result
- # and itself as the 'source' of the result.
+ # Audits record a key, a current value, and the previous audit(s) in the
+ # trail. Keys are arbitrary identifiers of where the value comes from.
+ # To illustrate, lets use symbols as keys.
#
- # Audits can take any object as a source, so for illustration lets use some
- # symbols:
- #
# # initialize a new audit
- # a = Audit.new(1, nil)
+ # _a = Audit.new(:one, 1)
+ # _a.key # => :one
+ # _a.value # => 1
#
- # # record some values
- # a._record(:A, 2)
- # a._record(:B, 3)
+ # # build a short trail
+ # _b = Audit.new(:two, 2, _a)
+ # _c = Audit.new(:three, 3, _b)
#
- # Now you can pull up the source and value trails, as well as the current
- # and original values:
+ # _a.sources # => []
+ # _b.sources # => [_a]
+ # _c.sources # => [_b]
#
- # a._source_trail # => [nil, :A, :B]
- # a._value_trail # => [1, 2, 3]
+ # Audits allow you track back through the sources of each audit to build
+ # a trail describing how a particular value was produced.
#
- # a._original # => 1
- # a._original_source # => nil
+ # _c.trail # => [_a,_b,_c]
+ # _c.trail {|audit| audit.key } # => [:one, :two, :three]
+ # _c.trail {|audit| audit.value } # => [1,2,3]
#
- # a._current # => 3
- # a._current_source # => :B
+ # Any number of audits may share the same source, so forks are naturally
+ # supported.
#
- # Merges are supported by using an array of the merged trails (actually
- # an AuditMerge) as the source, and an array of the merged values as the
- # original value.
+ # _d = Audit.new(:four, 4, _b)
+ # _d.trail # => [_a,_b,_d]
#
- # b = Audit.new(10, nil)
- # b._record(:C, 11)
- # b._record(:D, 12)
+ # _e = Audit.new(:five, 5, _b)
+ # _e.trail # => [_a,_b,_e]
#
- # c = Audit.merge(a, b)
- # c._source_trail # => [ [[nil, :A, :B], [nil, :C, :D]] ]
- # c._value_trail # => [ [[1,2,3], [10, 11, 12]] ]
- # c._current # => [3, 12]
+ # Merges are supported by specifying more than one source. Merges have
+ # the effect of nesting audit trails within an array:
#
- # c._record(:E, "a string value")
- # c._record(:F, {'a' => 'hash value'})
- # c._record(:G, ['an', 'array', 'value'])
+ # _f = Audit.new(:six, 6)
+ # _g = Audit.new(:seven, 7, _f)
+ # _h = Audit.new(:eight, 8, [_c,_d,_g])
+ # _h.trail # => [[[_a,_b,_c], [_a,_b,_d], [_f,_g]], _h]
+ #
+ # Nesting can get quite ugly after a couple merges so Audit provides a
+ # scalable pretty-print dump that helps visualize the audit trail.
#
- # c._source_trail # => [ [[nil, :A, :B], [nil, :C, :D]], :E, :F, :G]
- # c._value_trail # => [ [[1,2,3], [10, 11, 12]], "a string value", {'a' => 'hash value'}, ['an', 'array', 'value']]
+ # "\n" + _h.dump
+ # # => %q{
+ # # o-[one] 1
+ # # o-[two] 2
+ # # |
+ # # |-o-[three] 3
+ # # | |
+ # # `---o-[four] 4
+ # # | |
+ # # | | o-[six] 6
+ # # | | o-[seven] 7
+ # # | | |
+ # # `-`-`-o-[eight] 8
+ # # }
#
- # Audit supports forks by duplicating the source and value trails. Forks
- # can be developed independently. Audits are also forked during a merge;
- # notice the additional record in 'a' doesn't change the source trail for
- # 'c':
+ # In practice, tasks are recorded as keys. Thus audit trails can be used
+ # to access task configurations and other information that may be useful
+ # when creating reports or making workflow decisions. Note that by
+ # convention Audits and non-Audit methods that return Audits are
+ # prefixed with an underscore.
#
- # a1 = a._fork
- #
- # a._record(:X, -1)
- # a1._record(:Y, -2)
- #
- # a._source_trail # => [nil, :A, :B, :X]
- # a1._source_trail # => [nil, :A, :B, :Y]
- # c._source_trail # => [ [[nil, :A, :B], [nil, :C, :D]], :E, :F, :G]
- #
- # The data structure for an audit gets nasty after a few merges because
- # the lead array gets more and more nested. Audit provides iterators
- # to help gain access, as well as a printing method to visualize the
- # audit trail:
- #
- # c._to_s
- # # =>
- # # o-[] 1
- # # o-[A] 2
- # # o-[B] 3
- # # |
- # # | o-[] 10
- # # | o-[C] 11
- # # | o-[D] 12
- # # | |
- # # `-`-o-[E] "a string value"
- # # o-[F] {"a"=>"hash value"}
- # # o-[G] ["an", "array", "value"]
- #
- # In practice, tasks are recored as sources. Thus source trails can be used
- # to access task configurations and other information that may be useful
- # when creating reports or making workflow decisions.
- #
#--
- # TODO:
- # Track nesting level of ams; see if you can hook this into the _to_s process to make
- # extraction/presentation of audits more managable.
+ # Note Audit could easily be expanded to track sinks as well as sources.
+ # In initialize:
#
- # Create a FirstLastArray to minimize the audit data collected. Allow different audit
- # modes:
- # - full ([] both)
- # - source_only (fl value)
- # - minimal (fl source and value)
- #
- # Try to work a _to_s that doesn't repeat the same audit twice. Think about a format
- # like:
- # |
- # ------|-----+
- # | |
- # ------|-----|-----+
- # | | |
- # `-----`-----`-o-[j] j5
- #
+ # @sinks = []
+ # sources.each do |source|
+ # source.sinks << self
+ # end
+ #
+ # The downside is that this may not circumvent cleanly if you want light
+ # or no auditing. It also adds additonal references which will prevent
+ # garbage collection. On the plus side, sinks will make it easier to
+ # truly use Audits as a DAG
class Audit
class << self
-
- # Creates a new Audit by merging the input audits. The value of the new
- # Audit will be an array of the _current values of the inputs. The source
- # will be an AuditMerge whose values are forks of the inputs. Non-Audit
- # sources may be provided; they are initialized to Audits before merging.
- #
- # a = Audit.new
- # a._record(:a, 'a')
- #
- # b = Audit.new
- # b._record(:b, 'b')
- #
- # c = Audit.merge(a, b, 1)
- # c._record(:c, 'c')
- #
- # c._values # => [['a','b', 1], 'c']
- # c._sources # => [AuditMerge[a, b, Audit.new(1)], :c]
- #
- # If no audits are provided, merge returns a new Audit. If only one
- # audit is provided, merge returns a fork of that audit.
- def merge(*audits)
- case audits.length
- when 0 then Audit.new
- when 1 then audits[0]._fork
- else
- sources = AuditMerge.new
- audits.each {|a| sources << (a.kind_of?(Audit) ? a._fork : Audit.new(a)) }
- values = audits.collect {|a| a.kind_of?(Audit) ? a._current : a}
+
+ # Produces a pretty-print dump of the specified audits to target.
+ # A block may be provided to format the trailer of each line.
+ def dump(audits, target=$stdout) # :yields: audit
+ return dump(audits, target) do |audit|
+ "o-[#{audit.key}] #{audit.value.inspect}"
+ end unless block_given?
- Audit.new(values, sources)
+ # arrayify audits
+ audits = [audits].flatten
+
+ # the order of audits
+ order = []
+
+ # (audit, sinks) hash preventing double iteration over
+ # audits, and identifying sinks for a particular audit
+ sinks = {}
+
+ # iterate over all audits, collecting in order
+ audits.each do |audit|
+ traverse(audit, order, sinks)
end
+
+ # visit each audit, collecting audits into indent groups
+ groups = []
+ current = nil
+ order.each do |audit|
+ sources = audit.sources
+ unless sources.length == 1 && sinks[sources[0]].length <= 1
+ current = []
+ groups << current
+ end
+
+ current << audit
+ end
+
+ # identify nodes at which a fork occurs... these are audits
+ # that have more than one sink, and they cause a fork-style
+ # leader to be printed
+ forks = {}
+ sinks.each_pair do |audit, audit_sinks|
+ n = audit_sinks.length
+ forks[audit] = [0, n] if n > 1
+ end
+
+ # setup print
+ index = 0
+ leader = ""
+
+ # print each group
+ groups.each do |group|
+ sources = group[0].sources
+ complete = audits.include?(group[-1])
+
+ case
+ when sources.length > 1
+ # print a merge
+ # `-`-`-o-[merge]
+
+ leader =~ /^(.*)((\| *){#{sources.length}})$/
+ leader = "#{$1}#{' ' * $2.length} "
+ target << "#{$1}#{$2.gsub('|', '`').gsub(' ', '-')}-#{yield(group.shift)}\n"
+
+ when fork = forks[sources[0]]
+ # print a fork
+ # |-o-[a]
+ # |
+ # `---o-[b]
+
+ n = fork[0] += 1
+ base = leader[0, leader.length - (2 * n - 1)]
+ target << "#{base}#{fork[0] == fork[1] ? '`-' : '|-'}#{'--' * (n-1)}#{yield(group.shift)}\n"
+ leader = "#{base}#{fork[0] == fork[1] ? ' ' : '| '}#{'| ' * (n-1)}"
+
+ when index > 0
+ # simply get ready to print the next series of audits
+ # o-[a]
+ # o-[b]
+
+ leader = "#{leader} "
+ leader = "" if leader.strip.empty?
+ end
+
+ # print the next series of audits
+ group.each do |audit|
+ target << "#{leader}#{yield(audit)}\n"
+ end
+
+ # add a continuation line, if necessary
+ unless group == groups.last
+ if complete
+ leader = "#{leader} "
+ else
+ leader = "#{leader}|"
+ end
+ target << "#{leader}\n"
+ end
+
+ index += 1
+ end
+
+ target
end
+
+ protected
+
+ # helper to determine the order and sinks for a node
+ def traverse(node, order=[], sinks={}) # :nodoc:
+ return if sinks.has_key?(node)
+
+ node.sources.each do |source|
+ traverse(source, order, sinks)
+ (sinks[source] ||= []) << node
+ end
+
+ order << node
+ end
end
- # An array of the sources in self
- attr_reader :_sources
+ # A key for self (typically the task producing value, or
+ # nil if the value has an unknown origin)
+ attr_reader :key
- # An array of the values in self
- attr_reader :_values
+ # The current value
+ attr_reader :value
- # An arbitrary object used to identify when no inputs have been
- # provided to Audit.new. (nil cannot be used since nil is a valid
- # initial value)
- AUDIT_NIL = Object.new
-
- # A new audit takes a value and/or source. A nil source is typically given
- # for the original value.
- def initialize(value=AUDIT_NIL, source=nil)
- @_sources = []
- @_values = []
-
- _record(source, value) unless value == AUDIT_NIL
- end
-
- # Records the next value produced by the source. When an audit is
- # passed as a value, record will record the current value of the audit.
- # Record will similarly resolve every audit in an array containing audits.
+ # Initializes a new Audit. Sources may be an array, a single value
+ # (which is turned into an array), or nil (indicating no sources).
#
- # Example:
+ # _a = Audit.new(nil, nil, nil)
+ # _a.sources # => []
#
- # a = Audit.new(1)
- # b = Audit.new(2)
- # c = Audit.new(3)
+ # _b = Audit.new(nil, nil, _a)
+ # _b.sources # => [_a]
#
- # c.record(:a, a)
- # c.sources # => [:a]
- # c.values # => [1]
- #
- # c.record(:ab, [a,b])
- # c.sources # => [:a, :ab]
- # c.values # => [1, [1, 2]]
- def _record(source, value)
- _sources << source
- _values << value
- self
+ # _c = Audit.new(nil, nil, [_a,_b])
+ # _c.sources # => [_a,_b]
+ #
+ def initialize(key=nil, value=nil, sources=nil)
+ @key = key
+ @value = value
+ @source = singularize(sources)
end
-
- # The original value used to initialize the Audit
- def _original
- _values.first
- end
-
- # The current (ie last) value recorded in the Audit
- def _current
- _values.last
- end
-
- # The original source used to initialize the Audit
- def _original_source
- _sources.first
- end
- # The current (ie last) source recorded in the Audit
- def _current_source
- _sources.last
+ # An array of source audits for self. Sources may be empty.
+ def sources
+ arrayify(@source)
end
-
- # Searches back and recursively (if the source is an audit) collects all sources
- # for the current value.
- def _source_trail
- _collect_records {|source, value| source}
- end
- # Searches back and recursively (if the source is an audit) collects all values
- # leading to the current value.
- def _value_trail
- _collect_records {|source, value| value}
- end
-
- def _collect_records(&block) # :yields: source, value
+ # Produces a fork of self for each item in value, using the index of
+ # the item as a key. Splat is useful for developing each item of an
+ # array value along different paths.
+ #
+ # _a = Audit.new(nil, [:x, :y, :z])
+ # _b,_c,_d = _a.splat
+ #
+ # _b.key # => 0
+ # _b.value # => :x
+ #
+ # _c.key # => 1
+ # _c.value # => :y
+ #
+ # _d.key # => 2
+ # _d.value # => :z
+ # _d.trail # => [_a,_d]
+ #
+ # If value does not respond to 'each', an array with self as the only
+ # member will be returned. This ensures that the result of splat
+ # is an array of audits ready for further development.
+ #
+ # _a = Audit.new(nil, :value)
+ # _a.splat # => [_a]
+ #
+ def splat
+ return [self] unless value.respond_to?(:each)
+
collection = []
- 0.upto(_sources.length-1) do |i|
- collection << collect_records(_sources[i], _values[i], &block)
+ index = 0
+ value.each do |obj|
+ collection << Audit.new(index, obj, self)
+ index += 1
end
collection
end
- def _each_record(merge_level=0, merge_index=0, &block) # :yields: source, value, merge_level, merge_index, index
- 0.upto(_sources.length-1) do |i|
- each_record(_sources[i], _values[i], merge_level, merge_index, i, &block)
+ # Recursively collects an audit trail leading to self. Single sources
+ # are collected into the trail directly, while multiple sources are
+ # collected into arrays.
+ #
+ # _a = Audit.new(:one, 1)
+ # _b = Audit.new(:two, 2, _a)
+ # _b.trail # => [_a,_b]
+ #
+ # _a = Audit.new(:one, 1)
+ # _b = Audit.new(:two, 2)
+ # _c = Audit.new(:three, 3, [_a, _b])
+ # _c.trail # => [[[_a],[_b]],_c]
+ #
+ # A block may be provided to collect a specific audit attribute
+ # instead of the audit itself.
+ #
+ # _c.trail {|audit| audit.value } # => [[[1],[2]],3]
+ #
+ def trail(trail=[], &block)
+ trail.unshift(block_given? ? block.call(self) : self)
+
+ case @source
+ when Audit
+ @source.trail(trail, &block)
+ when Array
+ trail.unshift @source.collect {|audit| audit.trail(&block) }
end
+
+ trail
end
-
- # Creates a new Audit by merging self and the input audits, using Audit#merge.
- def _merge(*audits)
- Audit.merge(self, *audits)
- end
- # Produces a new Audit with duplicate sources and values, suitable for
- # independent development.
- def _fork
- a = Audit.new
- a._sources = _sources.dup
- a._values = _values.dup
- a
+ # A kind of pretty-print for Audits.
+ def dump(&block)
+ Audit.dump(self, "", &block)
end
-
- # Produces a fork of self for each item in the current value (_current).
- # Iterate is useful for developing each item of (say) an array along
- # different paths.
- #
- # Records the next value of each fork as [item, AuditIterate.new(<index of item>)].
- # Raises an error if _current does not respond to each.
- def _iterate
- expanded = []
- _current.each do |value|
- expanded << _fork._record(AuditIterate.new(expanded.length), value)
- end
- expanded
- end
- # Returns true if the _sources and _values for self are equal
- # to those of another.
- def ==(another)
- another.kind_of?(Audit) && self._sources == another._sources && self._values == another._values
- end
+ private
- # A kind of pretty-print for Audits. See the example in the overview.
- def _to_s
- # TODO -- find a way to avoid repeating groups
+ # helper to optimize storage of nodes
+ def singularize(obj) # :nodoc:
+ return obj unless obj.kind_of?(Array)
- group = []
- groups = [group]
- extended_groups = [groups]
- group_merges = []
- extended_group_merges = []
- current_level = nil
- current_index = nil
-
- _each_record do |source, value, merge_level, merge_index, index|
- source_str, value_str = if block_given?
- yield(source, value)
- else
- [source, value == nil ? '' : PP.singleline_pp(value, '')]
- end
-
- if !group.empty? && (merge_level != current_level || index == 0)
- unless merge_level <= current_level
- groups = []
- extended_groups << groups
- end
-
- group = []
- groups << group
-
- if merge_level < current_level
- if merge_index == 0
- extended_group_merges << group.object_id
- end
-
- unless index == 0
- group_merges << group.object_id
- end
- end
- end
-
- group << "o-[#{source_str}] #{value_str}"
- current_level = merge_level
- current_index = merge_index
+ case obj.length
+ when 0 then nil
+ when 1 then obj[0]
+ else obj
end
-
- lines = []
- group_prefix = ""
- extended_groups.each do |ext_groups|
- indentation = 0
-
- ext_groups.each_with_index do |ext_group, group_num|
- ext_group.each_with_index do |line, line_num|
- if line_num == 0
- unless lines.empty?
- lines << group_prefix + " " * indentation + "| " * (group_num-indentation)
- end
-
- if group_merges.include?(ext_group.object_id)
- lines << group_prefix + " " * indentation + "`-" * (group_num-indentation) + line
- indentation = group_num
-
- if extended_group_merges.include?(ext_group.object_id)
- lines.last.gsub!(/\| \s*/) {|match| "`-" + "-" * (match.length - 2)}
- group_prefix.gsub!(/\| /, " ")
- end
- next
- end
- end
-
- lines << group_prefix + " " * indentation + "| " * (group_num-indentation) + line
- end
- end
-
- group_prefix += " " * (ext_groups.length-1) + "| "
- end
-
- lines.join("\n") + "\n"
end
- protected
-
- attr_writer :_sources, :_values # :nodoc:
-
- private
-
- # helper method to recursively collect the value trail for a given source
- def collect_records(source, value, &block)
- case source
- when AuditMerge
- collection = []
- 0.upto(source.length-1) do |i|
- collection << collect_records(source[i], value[i], &block)
- end
- collection
- when Audit
- source._collect_records(&block)
- else
- yield(source, value)
- end
- end
-
- def each_record(source, value, merge_level, merge_index, index, &block)
- case source
- when AuditMerge
- merge_level += 1
- 0.upto(source.length-1) do |i|
- each_record(source[i], value[i], merge_level, i, index, &block)
- end
- when Audit
- source._each_record(merge_level, merge_index, &block)
- else
- yield(source, value, merge_level, merge_index, index)
+ # helper to optimize storage of nodes
+ def arrayify(obj) # :nodoc:
+ case obj
+ when nil then []
+ when Array then obj
+ else [obj]
end
end
end
end
end
\ No newline at end of file