lib/tap/support/audit.rb in tap-0.8.0 vs lib/tap/support/audit.rb in tap-0.9.0

- old
+ new

@@ -1,187 +1,198 @@ module Tap module Support + + # Marks the merge of multiple Audit trails + class AuditMerge < Array + def ==(another) + another.kind_of?(AuditMerge) && super + end + end + + # Marks a split in an Audit trail + class AuditSplit + attr_reader :block + def initialize(block) @block = block end + + def ==(another) + another.kind_of?(AuditSplit) && another.block == block + end + end + + # Marks the expansion of an Audit trail + class AuditExpand + attr_reader :index + def initialize(index) @index = index end + + def ==(another) + another.kind_of?(AuditExpand) && another.index == index + end + end - # === Overview - # - # Audit tracks input and result values passed among tasks within a workflow. At the end - # of a run, each result will have an audit trail detailing the values it has obtained - # at various stages, and the source of that value. The ability to do track back all the - # places where a value was changed or modified is very important during workflow debugging. + # == Overview # - # Audit is designed so you can ask a result 'hey where did you come from?' rather than - # being able to ask an input 'what are all the results you ultimately produce?'. Say your - # workflowconsists of 3 sequential tasks [:a, :b, :c]. Tasks :a and :b add one to their input - # value, while :c adds two. Behind the scences, this is what happens when we run the workflow - # with an initial input value of 3: - # - # # task :a initializes a new audit with the original - # # value upon execution - # ... run :a with input 3 ... - # audit = Audit.new(3) + # Audit provides a way to track the values (inputs and results) passed + # among tasks. Audits allow you to track inputs as they make their + # way through a workflow, and have a great deal of importance for + # debugging and record keeping. # - # # when task :a finishes, it records the new value and - # # the source of the value (ie task ':a') - # ... task :a adds one ... - # audit._record(:a, 4) + # During execution, the group of inputs for a task are used to initialize + # an Audit. These inputs mark the begining of an audit trail; every + # task that processes them (including the first) records it's result in + # the trail with the task as the 'source' of the result. # - # # next the audit is passed to task :b, then task :c - # # each of which records the next source and value - # ... task :b adds one ... - # audit._record(:b, 5) - # ... task :c adds two ... - # audit._record(:c, 7) + # Since Audits are meant to be fairly general structures, they can take + # any object as a source, so for illustration lets use some symbols: # - # # at the end, if you want to know how your final - # # value got to be 7, you can look at the source_trail - # # (note the very first source is nil) - # audit._source_trail # => [nil, :a, :b, :c] + # # initialize a new audit + # a = Audit.new(1, nil) # - # Audit supports forks by duplicating an audit trail (ie the recorded sources and values) and - # merges by storing the various sources and values in an array. For example: + # # record some values + # a._record(:A, 2) + # a._record(:B, 3) # - # # now let :a fork its results to both :b and :c - # audit = Audit.new(3) - # audit._record(:a, 4) - # fork_b = audit._fork - # fork_c = audit._fork + # Now you can pull up the trails of sources and values, as well as + # information like the current and original values: # - # ... tasks :b adds one and :c adds two ... - # fork_b._record(:b, 5) - # fork_c._record(:c, 6) + # a._source_trail # => [nil, :A, :B] + # a._value_trail # => [1, 2, 3] # - # # at the end you have a separate source trail for - # # each result. - # fork_b._source_trail # => [nil, :a, :b] - # fork_c._source_trail # => [nil, :a, :c] + # a._original # => 1 + # a._original_source # => nil # - # # now lets say you decided to merge both of - # # these trails into a new task :d which adds - # # all values that come to it. - # ... task :d recieves results from :b and :c and adds them ... - # merged_audit = Audit.merge(fork_b, fork_c) - # merged_audit._record(:d, 11) + # a._current # => 3 + # a._current_source # => :B # - # # now you can look back at the full source trail - # # where an array of sources indicates two trails - # # that merged - # merged_audit._source_trail # => [[[nil,:a,:b], [nil,:a,:c]], :d] + # Merges are supported by using an array of the merging trails as the + # source, and an array of the merging values as the initial value. # - # An important thing to note is that while in these examples symbols have been used - # to represent the tasks, the actual tasks themselves are recorded as sources in practice. - # Thus the source trails can be used to access task configurations and other information - # that may be useful when assessing an audit. Incidentally, this is one of the reasons why Tap - # is designed to be used with configurations that DO NOT change during execution; if they don't - # change then you're able to look back at your handiwork. + # b = Audit.new(10, nil) + # b._record(:C, 11) + # b._record(:D, 12) # - # === Working with Audits + # c = Audit.merge(a, b) + # c._source_trail # => [ [[nil, :A, :B], [nil, :C, :D]] ] + # c._value_trail # => [ [[1,2,3], [10, 11, 12]] ] + # c._current # => [3, 12] # - # Once an input enters the execution stream, it will be used to initialize an Audit. - # From this point on, the Audit and not the value will be passed among tasks and ultimately - # passed out in the results array. + # c._record(:E, "a string value") + # c._record(:F, {'a' => 'hash value'}) + # c._record(:G, ['an', 'array', 'value']) # - # This must be kept in mind when building tasks into a workflow. For convenience, Audits are - # constructed to pass unknown methods and most comparision methods to the current value, such - # that they behave like the current value. It's important to realize that <em>workflow blocks - # (ex: on_complete and condition) recieve Audits and NOT values</em>. + # c._source_trail # => [ [[nil, :A, :B], [nil, :C, :D]], :E, :F, :G] + # c._value_trail # => [ [[1,2,3], [10, 11, 12]], "a string value", {'a' => 'hash value'}, ['an', 'array', 'value']] # - # t = Tap::Task.new - # t.on_complete do |results| - # results.each do |result| - # # you might expect result to be a value like 10 or "str" - # # in fact it's an Audit, but it passes unknown methods - # # along to it's current value + # Audit supports forks by duplicating the source and value trails. Forks + # can be developed independently. Importantly, Audits are forked during + # a merge; notice the additional record in +a+ doesn't change the source + # trail for +c+ # - # result.class # => Audit - # result._current # => "str" - # result == "str" # => true - # result.upcase # => "STR" - # - # # the forwarding behavior is for convenience when - # # making decisions about what to do with a result - # # but be sure you don't get caught! The only object - # # methods forwarded are '==' and '=~'. Other methods - # # are NOT forwarded, and =~ cannot (due to context - # # issues) capture match strings - # - # result.kind_of?(String) # => false - # result =~ /s(\w+)/ # => true - # $1 # => nil (watch out! you may expect "tr") - # - # end - # end - # - # Audits and NOT values are passed into these workflow blocks because you may need to make - # a workflow decision based on where a value came from (ie you may need the source trail). - # The same does not hold true when processing inputs. <em>The process method recieves the - # values themselves.</em> + # a1 = a._fork # - # t = Tap::Task.new do |task, input| - # # here in the process block, the input is the current value - # # and NOT the Audit tracking the inputs and results - # input.class # => Fixnum (given that we execute t with 3) - # input += 1 - # end + # a._record(:X, -1) + # a1._record(:Y, -2) # - # results = t.execute(3) - # results.class # => Array - # results.length # => 1 - # results.first.class # => Audit - # results.first._current # => 4 + # a._source_trail # => [nil, :A, :B, :X] + # a1._source_trail # => [nil, :A, :B, :Y] + # c._source_trail # => [ [[nil, :A, :B], [nil, :C, :D]], :E, :F, :G] # - # === Summing it up: + # The data structure for an audit gets nasty after a few merges because + # the lead array gets more and more nested. Audit provides iterators + # to help gain access, as well as a printing method to visualize the + # audit trail: # - # - Task inputs may be values or Audits - # - Task results are always an array of Audits - # - Workflow blocks (ex: on_complete and condition) recieve Audits and not values - # - The process method recieves the values themselves + # [c._to_s] + # o-[] 1 + # o-[A] 2 + # o-[B] 3 + # | + # | o-[] 10 + # | o-[C] 11 + # | o-[D] 12 + # | | + # `-`-o-[E] "a string value" + # o-[F] {"a"=>"hash value"} + # o-[G] ["an", "array", "value"] # + # In practice, tasks are recored as sources. Thus source trails can be used + # to access task configurations and other information that may be useful + # when creating reports or making workflow decisions (ex: raise an + # error after looping to a given task too many times). + # + #-- + # TODO: + # Create an AuditMerge class to mark merges (don't use arrays). Track nesting level + # of ams; see if you can hook this into _to_s process to make extraction/presentation + # of audits more managable. + # + # Create a FirstLastArray to minimize the audit data collected. Allow different audit + # modes: + # - full ([] both) + # - source_only (fl value) + # - minimal (fl source and value) + # + # Try to work a _to_s that doesn't repeat the same audit twice. Think about a format + # like: + # | + # ------|-----+ + # | | + # ------|-----|-----+ + # | | | + # `-----`-----`-o-[j] j5 + # class Audit + autoload(:PP, 'pp') + class << self - # Convenience method to create a new Audit for each of the inputs, if the - # input is not already an Audit. Returns an array of Audits. - def register(*inputs) - inputs.collect {|input| input.kind_of?(Audit) ? input : Audit.new(input) } - end - - # Creates a new Audit from the inputs. The value of the new Audit will be the inputs - # array where any Audits are replaced by their _current value. The source of the - # new Audit will be a corresponding array of nils, or Audits if provided. + # Creates a new Audit by merging the input audits. The value of the new + # Audit will be an array of the _current values of the audits. The source + # will be an AuditMerge whose values are forks of the audits. Non-Audit + # sources can be provided; they are initialized to Audits before merging. # - # a = Audit.new(1) - # b = Audit.merge(a, 2) - # b._values # => [[1, 2]] - # b._sources # => [[a, nil]] + # a = Audit.new + # a._record(:a, 'a') + # + # b = Audit.new + # b._record(:b, 'b') + # + # c = Audit.merge(a, b, 1) + # c._record(:c, 'c') + # + # c._values # => [['a','b', 1], 'c'] + # c._sources # => [AuditMerge[a, b, Audit.new(1)], :c] # - # If no inputs are provided, then merge a new Audit with an initial value of nil. - # If only one input is provided, then merge returns a new Audit initialized to - # the input, or a _fork of the input if it is already an Audit. - def merge(*inputs) - case inputs.length + # If no audits are provided, merge returns a new Audit. If only one + # audit is provided, merge returns a fork of that audit. + def merge(*audits) + case audits.length when 0 then Audit.new - when 1 - input = inputs.first - input.kind_of?(Audit) ? input._fork : Audit.new(input) + when 1 then audits[0]._fork else - values = inputs.collect {|input| input.kind_of?(Audit) ? input._current : input} - sources = inputs.collect {|input| input.kind_of?(Audit) ? input : nil} + sources = AuditMerge.new + audits.each {|a| sources << (a.kind_of?(Audit) ? a._fork : Audit.new(a)) } + values = audits.collect {|a| a.kind_of?(Audit) ? a._current : a} + Audit.new(values, sources) end end - end attr_reader :_sources, :_values - + + # An arbitrary constant used to identify when no inputs have been + # provided to Audit.new. (nil itself cannot be used as nil is a + # valid initial value for an audit trail) + AUDIT_NIL = Object.new + # A new audit takes a value and/or source. A nil source is typically given # for the original value. - def initialize(value=nil, source=nil) + def initialize(value=AUDIT_NIL, source=nil) @_sources = [] @_values = [] - _record(source, value) + _record(source, value) unless value == AUDIT_NIL end # Records the next value produced by the source. When an audit is # passed as a value, record will record the current value of the audit. # Record will similarly resolve every audit in an array containing audits. @@ -191,16 +202,16 @@ # a = Audit.new(1) # b = Audit.new(2) # c = Audit.new(3) # # c.record(:a, a) - # c.sources # => [:a] - # c.values # => [1] + # c.sources # => [:a] + # c.values # => [1] # # c.record(:ab, [a,b]) - # c.sources # => [:a, :ab] - # c.values # => [1, [1, 2]] + # c.sources # => [:a, :ab] + # c.values # => [1, [1, 2]] def _record(source, value) _sources << source _values << value self end @@ -223,87 +234,39 @@ # The current (ie last) source recorded in the Audit def _current_source _sources.last end - # The index of the last occurence of source in the audit (Note - # equality is based on the object id of the specified source) - def _index_last(source) - _sources.rindex(source) - end - - # The value recorded with the last occurence of source in the audit. (Note - # equality is based on the object id of the specified source) - def _last(source) - index = _index_last(source) - index.nil? ? nil : _values[index] - end - - # Returns the value at the specified index. - def _input(index) - _values[index] - end - - # Returns the input to the last occurence of source in the audit (ie - # the value prior to this source). Example: - # - # a = Audit.new - # a.record(:a, 'a') - # a.record(:b, 'b') - # - # a.input_last(:a) # => nil - # a.input_last(:b) # => 'a' - def _input_last(source) - index = _index_last(source) - _input(index-1) - end - - # Returns the value after the specfied index - def _output(index) - _values[index+1] - end - - # Returns the output of the last occurence of source in the audit (ie - # the value at this source). Example: - # - # a = Audit.new - # a.record(:a, 'a') - # a.record(:b, 'b') - # - # a.output_last(:a) # => 'a' - # a.output_last(:b) # => 'b' - def _output_last(source) - index = _index_last(source) - _output(index-1) - end - # Searches back and recursively (if the source is an audit) collects all sources # for the current value. def _source_trail - _sources.collect do |source| - source_trail(source) - end + _collect_records {|source, value| source} end # Searches back and recursively (if the source is an audit) collects all values # leading to the current value. def _value_trail - trail = [] - 0.upto(_sources.length-1) do |index| - trail << value_trail(_sources[index], _values[index]) + _collect_records {|source, value| value} + end + + def _collect_records(&block) # :yields: source, value + collection = [] + 0.upto(_sources.length-1) do |i| + collection << collect_records(_sources[i], _values[i], &block) end - trail + collection end + + def _each_record(merge_level=0, merge_index=0, &block) # :yields: source, value, merge_level, merge_index, index + 0.upto(_sources.length-1) do |i| + each_record(_sources[i], _values[i], merge_level, merge_index, i, &block) + end + end - # Produces a new Audit suitable for development along a separate path, by merging - # the input sources with self. The value of the new audit will be an array of the - # current values of the input sources and self. - # - # If no sources are provided, then _merge returns _fork. - def _merge(*sources) - sources.unshift(self) - Audit.merge(*sources) + # Creates a new Audit by merging self and the input audits, using Audit#merge. + def _merge(*audits) + Audit.merge(self, *audits) end # Produces a new Audit with duplicate sources and values, suitable for # separate development along a separate path. def _fork @@ -311,104 +274,142 @@ a._sources = _sources.dup a._values = _values.dup a end - # Produces a new Audit from self and records the next value as - # the return value of the block. The block itself will be recored - # as the value's source. - def _split(&block) - sp = Audit.new(nil, self) - sp._record(block, yield(_current)) - sp + # _forks self and records the next value as [<return from block>, AuditSplit.new(block)] + def _split(&block) # :yields: _current + _fork._record(AuditSplit.new(block), yield(_current)) end - - alias _eql == - # Compares _current with another using == - def ==(another) - _current == another + # _forks self for each member in _current. Records the next value as + # [item, AuditExpand.new(<index of item>)]. Raises an error if _current + # does not respond to each. + def _expand + expanded = [] + _current.each do |value| + expanded << _fork._record(AuditExpand.new(expanded.length), value) + end + expanded end - - alias _match =~ - # The method =~ does NOT work properly with an audit. As with other - # methods, =~ is aliased to work on _current. However, variables like - # $1, $2, etc cannot be passed back. As a result: - # - # a = Audit.new "abcd" - # a =~ /ab(\w)/ # => true - # $1 # => nil (should be 'c') - # - # # instead use _current directly... - # a._current =~ /ab(\w)/ # => true - # $1 # => 'c' - # - # Note the same applies to !~, as it executes through =~ - def =~(regexp) - # note: this is not ideal... the variables $1, $2, - # etc are not sent back to the executing context (binding) - _current =~ regexp + # Returns true if the _sources and _values for self are equal + # to those of another. + def ==(another) + another.kind_of?(Audit) && self._sources == another._sources && self._values == another._values end + + # A kind of pretty-print for Audits. See the example in the overview. + def _to_s + # TODO -- find a way to avoid repeating groups + + group = [] + groups = [group] + extended_groups = [groups] + group_merges = [] + extended_group_merges = [] + current_level = nil + current_index = nil + + _each_record do |source, value, merge_level, merge_index, index| + source_str, value_str = if block_given? + yield(source, value) + else + [source, value == nil ? '' : PP.singleline_pp(value, '')] + end + + if !group.empty? && (merge_level != current_level || index == 0) + unless merge_level <= current_level + groups = [] + extended_groups << groups + end - # this shouldn't be necessary as Comparable feeds all it's methods - #include Comparable + group = [] + groups << group + + if merge_level < current_level + if merge_index == 0 + extended_group_merges << group.object_id + end + + unless index == 0 + group_merges << group.object_id + end + end + end - # Compares _current with another using <=> if <=> is defined - # for _current. Otherwise returns 0. - #def <=>(another) - # _current.respond_to?(:<=>) ? _current <=> another : 0 - #end + group << "o-[#{source_str}] #{value_str}" + current_level = merge_level + current_index = merge_index + end - # CONSIDER FORWARDING ALL OF THESE! - # - #[:eql?, :equal?, :is_a?, :kind_of?, :nil?, :respond_to?, :tainted?, :to_str].each do |method| - # alias_name = "_#{method}".to_sym - # alias alias_name method - # define_method(method) do |*args| - # _current.send(method, *args) - # end - #end + lines = [] + group_prefix = "" + extended_groups.each do |ext_groups| + indentation = 0 - #alias _cmp === - #def ===(another) _current.send('===', another) end - + ext_groups.each_with_index do |ext_group, group_num| + ext_group.each_with_index do |line, line_num| + if line_num == 0 + unless lines.empty? + lines << group_prefix + " " * indentation + "| " * (group_num-indentation) + end + + if group_merges.include?(ext_group.object_id) + lines << group_prefix + " " * indentation + "`-" * (group_num-indentation) + line + indentation = group_num + + if extended_group_merges.include?(ext_group.object_id) + lines.last.gsub!(/\| \s*/) {|match| "`-" + "-" * (match.length - 2)} + group_prefix.gsub!(/\| /, " ") + end + next + end + end + + lines << group_prefix + " " * indentation + "| " * (group_num-indentation) + line + end + end + + group_prefix += " " * (ext_groups.length-1) + "| " + end + + lines.join("\n") + "\n" + end + protected attr_writer :_sources, :_values - - # Forwards all missing methods to _current - def method_missing(sym, *args, &block) - _current.send(sym, *args, &block) - end private - # helper method to recursively collect the source trail for a given source - def source_trail(source) + # helper method to recursively collect the value trail for a given source + def collect_records(source, value, &block) case source - when Array - source.collect {|s| source_trail(s)} + when AuditMerge + collection = [] + 0.upto(source.length-1) do |i| + collection << collect_records(source[i], value[i], &block) + end + collection when Audit - source._source_trail + source._collect_records(&block) else - source + yield(source, value) end end - # helper method to recursively collect the value trail for a given source - def value_trail(source, value) + def each_record(source, value, merge_level, merge_index, index, &block) case source - when Array - trail = [] - 0.upto(source.length-1) do |index| - trail << value_trail(source[index], value[index]) + when AuditMerge + merge_level += 1 + 0.upto(source.length-1) do |i| + each_record(source[i], value[i], merge_level, i, index, &block) end - trail when Audit - source._value_trail + source._each_record(merge_level, merge_index, &block) else - value + yield(source, value, merge_level, merge_index, index) end end end end end \ No newline at end of file