lib/tap/support/audit.rb in tap-0.8.0 vs lib/tap/support/audit.rb in tap-0.9.0
- old
+ new
@@ -1,187 +1,198 @@
module Tap
module Support
+
+ # Marks the merge of multiple Audit trails
+ class AuditMerge < Array
+ def ==(another)
+ another.kind_of?(AuditMerge) && super
+ end
+ end
+
+ # Marks a split in an Audit trail
+ class AuditSplit
+ attr_reader :block
+ def initialize(block) @block = block end
+
+ def ==(another)
+ another.kind_of?(AuditSplit) && another.block == block
+ end
+ end
+
+ # Marks the expansion of an Audit trail
+ class AuditExpand
+ attr_reader :index
+ def initialize(index) @index = index end
+
+ def ==(another)
+ another.kind_of?(AuditExpand) && another.index == index
+ end
+ end
- # === Overview
- #
- # Audit tracks input and result values passed among tasks within a workflow. At the end
- # of a run, each result will have an audit trail detailing the values it has obtained
- # at various stages, and the source of that value. The ability to do track back all the
- # places where a value was changed or modified is very important during workflow debugging.
+ # == Overview
#
- # Audit is designed so you can ask a result 'hey where did you come from?' rather than
- # being able to ask an input 'what are all the results you ultimately produce?'. Say your
- # workflowconsists of 3 sequential tasks [:a, :b, :c]. Tasks :a and :b add one to their input
- # value, while :c adds two. Behind the scences, this is what happens when we run the workflow
- # with an initial input value of 3:
- #
- # # task :a initializes a new audit with the original
- # # value upon execution
- # ... run :a with input 3 ...
- # audit = Audit.new(3)
+ # Audit provides a way to track the values (inputs and results) passed
+ # among tasks. Audits allow you to track inputs as they make their
+ # way through a workflow, and have a great deal of importance for
+ # debugging and record keeping.
#
- # # when task :a finishes, it records the new value and
- # # the source of the value (ie task ':a')
- # ... task :a adds one ...
- # audit._record(:a, 4)
+ # During execution, the group of inputs for a task are used to initialize
+ # an Audit. These inputs mark the begining of an audit trail; every
+ # task that processes them (including the first) records it's result in
+ # the trail with the task as the 'source' of the result.
#
- # # next the audit is passed to task :b, then task :c
- # # each of which records the next source and value
- # ... task :b adds one ...
- # audit._record(:b, 5)
- # ... task :c adds two ...
- # audit._record(:c, 7)
+ # Since Audits are meant to be fairly general structures, they can take
+ # any object as a source, so for illustration lets use some symbols:
#
- # # at the end, if you want to know how your final
- # # value got to be 7, you can look at the source_trail
- # # (note the very first source is nil)
- # audit._source_trail # => [nil, :a, :b, :c]
+ # # initialize a new audit
+ # a = Audit.new(1, nil)
#
- # Audit supports forks by duplicating an audit trail (ie the recorded sources and values) and
- # merges by storing the various sources and values in an array. For example:
+ # # record some values
+ # a._record(:A, 2)
+ # a._record(:B, 3)
#
- # # now let :a fork its results to both :b and :c
- # audit = Audit.new(3)
- # audit._record(:a, 4)
- # fork_b = audit._fork
- # fork_c = audit._fork
+ # Now you can pull up the trails of sources and values, as well as
+ # information like the current and original values:
#
- # ... tasks :b adds one and :c adds two ...
- # fork_b._record(:b, 5)
- # fork_c._record(:c, 6)
+ # a._source_trail # => [nil, :A, :B]
+ # a._value_trail # => [1, 2, 3]
#
- # # at the end you have a separate source trail for
- # # each result.
- # fork_b._source_trail # => [nil, :a, :b]
- # fork_c._source_trail # => [nil, :a, :c]
+ # a._original # => 1
+ # a._original_source # => nil
#
- # # now lets say you decided to merge both of
- # # these trails into a new task :d which adds
- # # all values that come to it.
- # ... task :d recieves results from :b and :c and adds them ...
- # merged_audit = Audit.merge(fork_b, fork_c)
- # merged_audit._record(:d, 11)
+ # a._current # => 3
+ # a._current_source # => :B
#
- # # now you can look back at the full source trail
- # # where an array of sources indicates two trails
- # # that merged
- # merged_audit._source_trail # => [[[nil,:a,:b], [nil,:a,:c]], :d]
+ # Merges are supported by using an array of the merging trails as the
+ # source, and an array of the merging values as the initial value.
#
- # An important thing to note is that while in these examples symbols have been used
- # to represent the tasks, the actual tasks themselves are recorded as sources in practice.
- # Thus the source trails can be used to access task configurations and other information
- # that may be useful when assessing an audit. Incidentally, this is one of the reasons why Tap
- # is designed to be used with configurations that DO NOT change during execution; if they don't
- # change then you're able to look back at your handiwork.
+ # b = Audit.new(10, nil)
+ # b._record(:C, 11)
+ # b._record(:D, 12)
#
- # === Working with Audits
+ # c = Audit.merge(a, b)
+ # c._source_trail # => [ [[nil, :A, :B], [nil, :C, :D]] ]
+ # c._value_trail # => [ [[1,2,3], [10, 11, 12]] ]
+ # c._current # => [3, 12]
#
- # Once an input enters the execution stream, it will be used to initialize an Audit.
- # From this point on, the Audit and not the value will be passed among tasks and ultimately
- # passed out in the results array.
+ # c._record(:E, "a string value")
+ # c._record(:F, {'a' => 'hash value'})
+ # c._record(:G, ['an', 'array', 'value'])
#
- # This must be kept in mind when building tasks into a workflow. For convenience, Audits are
- # constructed to pass unknown methods and most comparision methods to the current value, such
- # that they behave like the current value. It's important to realize that <em>workflow blocks
- # (ex: on_complete and condition) recieve Audits and NOT values</em>.
+ # c._source_trail # => [ [[nil, :A, :B], [nil, :C, :D]], :E, :F, :G]
+ # c._value_trail # => [ [[1,2,3], [10, 11, 12]], "a string value", {'a' => 'hash value'}, ['an', 'array', 'value']]
#
- # t = Tap::Task.new
- # t.on_complete do |results|
- # results.each do |result|
- # # you might expect result to be a value like 10 or "str"
- # # in fact it's an Audit, but it passes unknown methods
- # # along to it's current value
+ # Audit supports forks by duplicating the source and value trails. Forks
+ # can be developed independently. Importantly, Audits are forked during
+ # a merge; notice the additional record in +a+ doesn't change the source
+ # trail for +c+
#
- # result.class # => Audit
- # result._current # => "str"
- # result == "str" # => true
- # result.upcase # => "STR"
- #
- # # the forwarding behavior is for convenience when
- # # making decisions about what to do with a result
- # # but be sure you don't get caught! The only object
- # # methods forwarded are '==' and '=~'. Other methods
- # # are NOT forwarded, and =~ cannot (due to context
- # # issues) capture match strings
- #
- # result.kind_of?(String) # => false
- # result =~ /s(\w+)/ # => true
- # $1 # => nil (watch out! you may expect "tr")
- #
- # end
- # end
- #
- # Audits and NOT values are passed into these workflow blocks because you may need to make
- # a workflow decision based on where a value came from (ie you may need the source trail).
- # The same does not hold true when processing inputs. <em>The process method recieves the
- # values themselves.</em>
+ # a1 = a._fork
#
- # t = Tap::Task.new do |task, input|
- # # here in the process block, the input is the current value
- # # and NOT the Audit tracking the inputs and results
- # input.class # => Fixnum (given that we execute t with 3)
- # input += 1
- # end
+ # a._record(:X, -1)
+ # a1._record(:Y, -2)
#
- # results = t.execute(3)
- # results.class # => Array
- # results.length # => 1
- # results.first.class # => Audit
- # results.first._current # => 4
+ # a._source_trail # => [nil, :A, :B, :X]
+ # a1._source_trail # => [nil, :A, :B, :Y]
+ # c._source_trail # => [ [[nil, :A, :B], [nil, :C, :D]], :E, :F, :G]
#
- # === Summing it up:
+ # The data structure for an audit gets nasty after a few merges because
+ # the lead array gets more and more nested. Audit provides iterators
+ # to help gain access, as well as a printing method to visualize the
+ # audit trail:
#
- # - Task inputs may be values or Audits
- # - Task results are always an array of Audits
- # - Workflow blocks (ex: on_complete and condition) recieve Audits and not values
- # - The process method recieves the values themselves
+ # [c._to_s]
+ # o-[] 1
+ # o-[A] 2
+ # o-[B] 3
+ # |
+ # | o-[] 10
+ # | o-[C] 11
+ # | o-[D] 12
+ # | |
+ # `-`-o-[E] "a string value"
+ # o-[F] {"a"=>"hash value"}
+ # o-[G] ["an", "array", "value"]
#
+ # In practice, tasks are recored as sources. Thus source trails can be used
+ # to access task configurations and other information that may be useful
+ # when creating reports or making workflow decisions (ex: raise an
+ # error after looping to a given task too many times).
+ #
+ #--
+ # TODO:
+ # Create an AuditMerge class to mark merges (don't use arrays). Track nesting level
+ # of ams; see if you can hook this into _to_s process to make extraction/presentation
+ # of audits more managable.
+ #
+ # Create a FirstLastArray to minimize the audit data collected. Allow different audit
+ # modes:
+ # - full ([] both)
+ # - source_only (fl value)
+ # - minimal (fl source and value)
+ #
+ # Try to work a _to_s that doesn't repeat the same audit twice. Think about a format
+ # like:
+ # |
+ # ------|-----+
+ # | |
+ # ------|-----|-----+
+ # | | |
+ # `-----`-----`-o-[j] j5
+ #
class Audit
+ autoload(:PP, 'pp')
+
class << self
- # Convenience method to create a new Audit for each of the inputs, if the
- # input is not already an Audit. Returns an array of Audits.
- def register(*inputs)
- inputs.collect {|input| input.kind_of?(Audit) ? input : Audit.new(input) }
- end
-
- # Creates a new Audit from the inputs. The value of the new Audit will be the inputs
- # array where any Audits are replaced by their _current value. The source of the
- # new Audit will be a corresponding array of nils, or Audits if provided.
+ # Creates a new Audit by merging the input audits. The value of the new
+ # Audit will be an array of the _current values of the audits. The source
+ # will be an AuditMerge whose values are forks of the audits. Non-Audit
+ # sources can be provided; they are initialized to Audits before merging.
#
- # a = Audit.new(1)
- # b = Audit.merge(a, 2)
- # b._values # => [[1, 2]]
- # b._sources # => [[a, nil]]
+ # a = Audit.new
+ # a._record(:a, 'a')
+ #
+ # b = Audit.new
+ # b._record(:b, 'b')
+ #
+ # c = Audit.merge(a, b, 1)
+ # c._record(:c, 'c')
+ #
+ # c._values # => [['a','b', 1], 'c']
+ # c._sources # => [AuditMerge[a, b, Audit.new(1)], :c]
#
- # If no inputs are provided, then merge a new Audit with an initial value of nil.
- # If only one input is provided, then merge returns a new Audit initialized to
- # the input, or a _fork of the input if it is already an Audit.
- def merge(*inputs)
- case inputs.length
+ # If no audits are provided, merge returns a new Audit. If only one
+ # audit is provided, merge returns a fork of that audit.
+ def merge(*audits)
+ case audits.length
when 0 then Audit.new
- when 1
- input = inputs.first
- input.kind_of?(Audit) ? input._fork : Audit.new(input)
+ when 1 then audits[0]._fork
else
- values = inputs.collect {|input| input.kind_of?(Audit) ? input._current : input}
- sources = inputs.collect {|input| input.kind_of?(Audit) ? input : nil}
+ sources = AuditMerge.new
+ audits.each {|a| sources << (a.kind_of?(Audit) ? a._fork : Audit.new(a)) }
+ values = audits.collect {|a| a.kind_of?(Audit) ? a._current : a}
+
Audit.new(values, sources)
end
end
-
end
attr_reader :_sources, :_values
-
+
+ # An arbitrary constant used to identify when no inputs have been
+ # provided to Audit.new. (nil itself cannot be used as nil is a
+ # valid initial value for an audit trail)
+ AUDIT_NIL = Object.new
+
# A new audit takes a value and/or source. A nil source is typically given
# for the original value.
- def initialize(value=nil, source=nil)
+ def initialize(value=AUDIT_NIL, source=nil)
@_sources = []
@_values = []
- _record(source, value)
+ _record(source, value) unless value == AUDIT_NIL
end
# Records the next value produced by the source. When an audit is
# passed as a value, record will record the current value of the audit.
# Record will similarly resolve every audit in an array containing audits.
@@ -191,16 +202,16 @@
# a = Audit.new(1)
# b = Audit.new(2)
# c = Audit.new(3)
#
# c.record(:a, a)
- # c.sources # => [:a]
- # c.values # => [1]
+ # c.sources # => [:a]
+ # c.values # => [1]
#
# c.record(:ab, [a,b])
- # c.sources # => [:a, :ab]
- # c.values # => [1, [1, 2]]
+ # c.sources # => [:a, :ab]
+ # c.values # => [1, [1, 2]]
def _record(source, value)
_sources << source
_values << value
self
end
@@ -223,87 +234,39 @@
# The current (ie last) source recorded in the Audit
def _current_source
_sources.last
end
- # The index of the last occurence of source in the audit (Note
- # equality is based on the object id of the specified source)
- def _index_last(source)
- _sources.rindex(source)
- end
-
- # The value recorded with the last occurence of source in the audit. (Note
- # equality is based on the object id of the specified source)
- def _last(source)
- index = _index_last(source)
- index.nil? ? nil : _values[index]
- end
-
- # Returns the value at the specified index.
- def _input(index)
- _values[index]
- end
-
- # Returns the input to the last occurence of source in the audit (ie
- # the value prior to this source). Example:
- #
- # a = Audit.new
- # a.record(:a, 'a')
- # a.record(:b, 'b')
- #
- # a.input_last(:a) # => nil
- # a.input_last(:b) # => 'a'
- def _input_last(source)
- index = _index_last(source)
- _input(index-1)
- end
-
- # Returns the value after the specfied index
- def _output(index)
- _values[index+1]
- end
-
- # Returns the output of the last occurence of source in the audit (ie
- # the value at this source). Example:
- #
- # a = Audit.new
- # a.record(:a, 'a')
- # a.record(:b, 'b')
- #
- # a.output_last(:a) # => 'a'
- # a.output_last(:b) # => 'b'
- def _output_last(source)
- index = _index_last(source)
- _output(index-1)
- end
-
# Searches back and recursively (if the source is an audit) collects all sources
# for the current value.
def _source_trail
- _sources.collect do |source|
- source_trail(source)
- end
+ _collect_records {|source, value| source}
end
# Searches back and recursively (if the source is an audit) collects all values
# leading to the current value.
def _value_trail
- trail = []
- 0.upto(_sources.length-1) do |index|
- trail << value_trail(_sources[index], _values[index])
+ _collect_records {|source, value| value}
+ end
+
+ def _collect_records(&block) # :yields: source, value
+ collection = []
+ 0.upto(_sources.length-1) do |i|
+ collection << collect_records(_sources[i], _values[i], &block)
end
- trail
+ collection
end
+
+ def _each_record(merge_level=0, merge_index=0, &block) # :yields: source, value, merge_level, merge_index, index
+ 0.upto(_sources.length-1) do |i|
+ each_record(_sources[i], _values[i], merge_level, merge_index, i, &block)
+ end
+ end
- # Produces a new Audit suitable for development along a separate path, by merging
- # the input sources with self. The value of the new audit will be an array of the
- # current values of the input sources and self.
- #
- # If no sources are provided, then _merge returns _fork.
- def _merge(*sources)
- sources.unshift(self)
- Audit.merge(*sources)
+ # Creates a new Audit by merging self and the input audits, using Audit#merge.
+ def _merge(*audits)
+ Audit.merge(self, *audits)
end
# Produces a new Audit with duplicate sources and values, suitable for
# separate development along a separate path.
def _fork
@@ -311,104 +274,142 @@
a._sources = _sources.dup
a._values = _values.dup
a
end
- # Produces a new Audit from self and records the next value as
- # the return value of the block. The block itself will be recored
- # as the value's source.
- def _split(&block)
- sp = Audit.new(nil, self)
- sp._record(block, yield(_current))
- sp
+ # _forks self and records the next value as [<return from block>, AuditSplit.new(block)]
+ def _split(&block) # :yields: _current
+ _fork._record(AuditSplit.new(block), yield(_current))
end
-
- alias _eql ==
- # Compares _current with another using ==
- def ==(another)
- _current == another
+ # _forks self for each member in _current. Records the next value as
+ # [item, AuditExpand.new(<index of item>)]. Raises an error if _current
+ # does not respond to each.
+ def _expand
+ expanded = []
+ _current.each do |value|
+ expanded << _fork._record(AuditExpand.new(expanded.length), value)
+ end
+ expanded
end
-
- alias _match =~
- # The method =~ does NOT work properly with an audit. As with other
- # methods, =~ is aliased to work on _current. However, variables like
- # $1, $2, etc cannot be passed back. As a result:
- #
- # a = Audit.new "abcd"
- # a =~ /ab(\w)/ # => true
- # $1 # => nil (should be 'c')
- #
- # # instead use _current directly...
- # a._current =~ /ab(\w)/ # => true
- # $1 # => 'c'
- #
- # Note the same applies to !~, as it executes through =~
- def =~(regexp)
- # note: this is not ideal... the variables $1, $2,
- # etc are not sent back to the executing context (binding)
- _current =~ regexp
+ # Returns true if the _sources and _values for self are equal
+ # to those of another.
+ def ==(another)
+ another.kind_of?(Audit) && self._sources == another._sources && self._values == another._values
end
+
+ # A kind of pretty-print for Audits. See the example in the overview.
+ def _to_s
+ # TODO -- find a way to avoid repeating groups
+
+ group = []
+ groups = [group]
+ extended_groups = [groups]
+ group_merges = []
+ extended_group_merges = []
+ current_level = nil
+ current_index = nil
+
+ _each_record do |source, value, merge_level, merge_index, index|
+ source_str, value_str = if block_given?
+ yield(source, value)
+ else
+ [source, value == nil ? '' : PP.singleline_pp(value, '')]
+ end
+
+ if !group.empty? && (merge_level != current_level || index == 0)
+ unless merge_level <= current_level
+ groups = []
+ extended_groups << groups
+ end
- # this shouldn't be necessary as Comparable feeds all it's methods
- #include Comparable
+ group = []
+ groups << group
+
+ if merge_level < current_level
+ if merge_index == 0
+ extended_group_merges << group.object_id
+ end
+
+ unless index == 0
+ group_merges << group.object_id
+ end
+ end
+ end
- # Compares _current with another using <=> if <=> is defined
- # for _current. Otherwise returns 0.
- #def <=>(another)
- # _current.respond_to?(:<=>) ? _current <=> another : 0
- #end
+ group << "o-[#{source_str}] #{value_str}"
+ current_level = merge_level
+ current_index = merge_index
+ end
- # CONSIDER FORWARDING ALL OF THESE!
- #
- #[:eql?, :equal?, :is_a?, :kind_of?, :nil?, :respond_to?, :tainted?, :to_str].each do |method|
- # alias_name = "_#{method}".to_sym
- # alias alias_name method
- # define_method(method) do |*args|
- # _current.send(method, *args)
- # end
- #end
+ lines = []
+ group_prefix = ""
+ extended_groups.each do |ext_groups|
+ indentation = 0
- #alias _cmp ===
- #def ===(another) _current.send('===', another) end
-
+ ext_groups.each_with_index do |ext_group, group_num|
+ ext_group.each_with_index do |line, line_num|
+ if line_num == 0
+ unless lines.empty?
+ lines << group_prefix + " " * indentation + "| " * (group_num-indentation)
+ end
+
+ if group_merges.include?(ext_group.object_id)
+ lines << group_prefix + " " * indentation + "`-" * (group_num-indentation) + line
+ indentation = group_num
+
+ if extended_group_merges.include?(ext_group.object_id)
+ lines.last.gsub!(/\| \s*/) {|match| "`-" + "-" * (match.length - 2)}
+ group_prefix.gsub!(/\| /, " ")
+ end
+ next
+ end
+ end
+
+ lines << group_prefix + " " * indentation + "| " * (group_num-indentation) + line
+ end
+ end
+
+ group_prefix += " " * (ext_groups.length-1) + "| "
+ end
+
+ lines.join("\n") + "\n"
+ end
+
protected
attr_writer :_sources, :_values
-
- # Forwards all missing methods to _current
- def method_missing(sym, *args, &block)
- _current.send(sym, *args, &block)
- end
private
- # helper method to recursively collect the source trail for a given source
- def source_trail(source)
+ # helper method to recursively collect the value trail for a given source
+ def collect_records(source, value, &block)
case source
- when Array
- source.collect {|s| source_trail(s)}
+ when AuditMerge
+ collection = []
+ 0.upto(source.length-1) do |i|
+ collection << collect_records(source[i], value[i], &block)
+ end
+ collection
when Audit
- source._source_trail
+ source._collect_records(&block)
else
- source
+ yield(source, value)
end
end
- # helper method to recursively collect the value trail for a given source
- def value_trail(source, value)
+ def each_record(source, value, merge_level, merge_index, index, &block)
case source
- when Array
- trail = []
- 0.upto(source.length-1) do |index|
- trail << value_trail(source[index], value[index])
+ when AuditMerge
+ merge_level += 1
+ 0.upto(source.length-1) do |i|
+ each_record(source[i], value[i], merge_level, i, index, &block)
end
- trail
when Audit
- source._value_trail
+ source._each_record(merge_level, merge_index, &block)
else
- value
+ yield(source, value, merge_level, merge_index, index)
end
end
end
end
end
\ No newline at end of file