audit.rb in tap-0.9.0

- old
+ new

@@ -1,187 +1,198 @@
 module Tap
   module Support 
+    
+    # Marks the merge of multiple Audit trails
+    class AuditMerge < Array
+      def ==(another)
+        another.kind_of?(AuditMerge) && super
+      end
+    end
+    
+    # Marks a split in an Audit trail
+    class AuditSplit
+      attr_reader :block
+      def initialize(block) @block = block end
+        
+      def ==(another)
+        another.kind_of?(AuditSplit) && another.block == block
+      end
+    end
+    
+    # Marks the expansion of an Audit trail
+    class AuditExpand
+      attr_reader :index
+      def initialize(index) @index = index end
+        
+      def ==(another)
+        another.kind_of?(AuditExpand) && another.index == index
+      end
+    end
 
-    # === Overview
-    # 
-    # Audit tracks input and result values passed among tasks within a workflow.  At the end
-    # of a run, each result will have an audit trail detailing the values it has obtained
-    # at various stages, and the source of that value.  The ability to do track back all the
-    # places where a value was changed or modified is very important during workflow debugging.
+    # == Overview
     #
-    # Audit is designed so you can ask a result 'hey where did you come from?' rather than 
-    # being able to ask an input 'what are all the results you ultimately produce?'. Say your 
-    # workflowconsists of 3 sequential tasks [:a, :b, :c]. Tasks :a and :b add one to their input
-    # value, while :c adds two.  Behind the scences, this is what happens when we run the workflow
-    # with an initial input value of 3:
-    #   
-    #   # task :a initializes a new audit with the original 
-    #   # value upon execution
-    #   ... run :a with input 3 ...
-    #   audit = Audit.new(3)
+    # Audit provides a way to track the values (inputs and results) passed
+    # among tasks.  Audits allow you to track inputs as they make their
+    # way through a workflow, and have a great deal of importance for 
+    # debugging and record keeping. 
     #
-    #   # when task :a finishes, it records the new value and 
-    #   # the source of the value (ie task ':a')
-    #   ... task :a adds one ...
-    #   audit._record(:a, 4)
+    # During execution, the group of inputs for a task are used to initialize 
+    # an Audit.  These inputs mark the begining of an audit trail; every 
+    # task that processes them (including the first) records it's result in 
+    # the trail with the task as the 'source' of the result.
     #
-    #   # next the audit is passed to task :b, then task :c
-    #   # each of which records the next source and value
-    #   ... task :b adds one ...
-    #   audit._record(:b, 5)
-    #   ... task :c adds two ...
-    #   audit._record(:c, 7)
+    # Since Audits are meant to be fairly general structures, they can take 
+    # any object as a source, so for illustration lets use some symbols:
     #   
-    #   # at the end, if you want to know how your final
-    #   # value got to be 7, you can look at the source_trail
-    #   # (note the very first source is nil)
-    #   audit._source_trail       # => [nil, :a, :b, :c]
+    #   # initialize a new audit
+    #   a = Audit.new(1, nil)
     #
-    # Audit supports forks by duplicating an audit trail (ie the recorded sources and values) and  
-    # merges by storing the various sources and values in an array.  For example:
+    #   # record some values
+    #   a._record(:A, 2)
+    #   a._record(:B, 3)
     #
-    #   # now let :a fork its results to both :b and :c
-    #   audit = Audit.new(3)
-    #   audit._record(:a, 4)
-    #   fork_b = audit._fork
-    #   fork_c = audit._fork 
+    # Now you can pull up the trails of sources and values, as well as 
+    # information like the current and original values:
     #
-    #   ... tasks :b adds one and :c adds two ...
-    #   fork_b._record(:b, 5)
-    #   fork_c._record(:c, 6)
+    #   a._source_trail      # => [nil, :A, :B]
+    #   a._value_trail       # => [1, 2, 3]
     #
-    #   # at the end you have a separate source trail for 
-    #   # each result.
-    #   fork_b._source_trail      # => [nil, :a, :b]
-    #   fork_c._source_trail      # => [nil, :a, :c]
+    #   a._original          # => 1
+    #   a._original_source   # => nil
     #
-    #   # now lets say you decided to merge both of
-    #   # these trails into a new task :d which adds
-    #   # all values that come to it.
-    #   ... task :d recieves results from :b and :c and adds them ...
-    #   merged_audit = Audit.merge(fork_b, fork_c)
-    #   merged_audit._record(:d, 11)
+    #   a._current           # => 3
+    #   a._current_source    # => :B
     #
-    #   # now you can look back at the full source trail
-    #   # where an array of sources indicates two trails
-    #   # that merged
-    #   merged_audit._source_trail   # => [[[nil,:a,:b], [nil,:a,:c]], :d]
+    # Merges are supported by using an array of the merging trails as the 
+    # source, and an array of the merging values as the initial value.  
     #
-    # An important thing to note is that while in these examples symbols have been used
-    # to represent the tasks, the actual tasks themselves are recorded as sources in practice.
-    # Thus the source trails can be used to access task configurations and other information
-    # that may be useful when assessing an audit. Incidentally, this is one of the reasons why Tap 
-    # is designed to be used with configurations that DO NOT change during execution; if they don't 
-    # change then you're able to look back at your handiwork.
+    #   b = Audit.new(10, nil)
+    #   b._record(:C, 11)
+    #   b._record(:D, 12)  
     #
-    # === Working with Audits
+    #   c = Audit.merge(a, b)
+    #   c._source_trail      # => [ [[nil, :A, :B], [nil, :C, :D]] ]
+    #   c._value_trail       # => [ [[1,2,3], [10, 11, 12]] ]
+    #   c._current           # => [3, 12]
     #
-    # Once an input enters the execution stream, it will be used to initialize an Audit.  
-    # From this point on, the Audit and not the value will be passed among tasks and ultimately
-    # passed out in the results array. 
+    #   c._record(:E, "a string value")
+    #   c._record(:F, {'a' => 'hash value'})
+    #   c._record(:G, ['an', 'array', 'value'])
     #
-    # This must be kept in mind when building tasks into a workflow.  For convenience, Audits are 
-    # constructed to pass unknown methods and most comparision methods to the current value, such 
-    # that they behave like the current value.  It's important to realize that <em>workflow blocks 
-    # (ex: on_complete and condition) recieve Audits and NOT values</em>.
+    #   c._source_trail      # => [ [[nil, :A, :B], [nil, :C, :D]], :E, :F, :G]
+    #   c._value_trail       # => [ [[1,2,3], [10, 11, 12]], "a string value", {'a' => 'hash value'}, ['an', 'array', 'value']]
     #
-    #   t = Tap::Task.new
-    #   t.on_complete do |results|
-    #     results.each do |result|
-    #       # you might expect result to be a value like 10 or "str"
-    #       # in fact it's an Audit, but it passes unknown methods
-    #       # along to it's current value
+    # Audit supports forks by duplicating the source and value trails.  Forks
+    # can be developed independently.  Importantly, Audits are forked during 
+    # a merge; notice the additional record in +a+ doesn't change the source 
+    # trail for +c+ 
     #
-    #       result.class            # => Audit
-    #       result._current         # => "str"
-    #       result == "str"         # => true
-    #       result.upcase           # => "STR"
-    #       
-    #       # the forwarding behavior is for convenience when 
-    #       # making decisions about what to do with a result
-    #       # but be sure you don't get caught!  The only object
-    #       # methods forwarded are '==' and '=~'.  Other methods
-    #       # are NOT forwarded, and =~ cannot (due to context 
-    #       # issues) capture match strings
-    #       
-    #       result.kind_of?(String) # => false
-    #       result =~ /s(\w+)/      # => true
-    #       $1                      # => nil  (watch out! you may expect "tr")
-    #  
-    #     end
-    #   end
-    # 
-    # Audits and NOT values are passed into these workflow blocks because you may need to make 
-    # a workflow decision based on where a value came from (ie you may need the source trail).  
-    # The same does not hold true when processing inputs.  <em>The process method recieves the 
-    # values themselves.</em>  
+    #   a1 = a._fork
     #
-    #   t = Tap::Task.new do |task, input|
-    #     # here in the process block, the input is the current value
-    #     # and NOT the Audit tracking the inputs and results
-    #     input.class               # => Fixnum  (given that we execute t with 3)
-    #     input += 1
-    #   end
+    #   a._record(:X, -1)
+    #   a1._record(:Y, -2)
     #
-    #   results = t.execute(3)
-    #   results.class               # => Array
-    #   results.length              # => 1
-    #   results.first.class         # => Audit
-    #   results.first._current      # => 4
+    #   a._source_trail      # => [nil, :A, :B, :X]
+    #   a1._source_trail     # => [nil, :A, :B, :Y]
+    #   c._source_trail      # => [ [[nil, :A, :B], [nil, :C, :D]], :E, :F, :G]
     #
-    # === Summing it up:
+    # The data structure for an audit gets nasty after a few merges because
+    # the lead array gets more and more nested.  Audit provides iterators
+    # to help gain access, as well as a printing method to visualize the
+    # audit trail:
     #
-    # - Task inputs may be values or Audits
-    # - Task results are always an array of Audits
-    # - Workflow blocks (ex: on_complete and condition) recieve Audits and not values
-    # - The process method recieves the values themselves
+    #   [c._to_s]
+    #   o-[] 1
+    #   o-[A] 2
+    #   o-[B] 3
+    #   | 
+    #   | o-[] 10
+    #   | o-[C] 11
+    #   | o-[D] 12
+    #   | | 
+    #   `-`-o-[E] "a string value"
+    #       o-[F] {"a"=>"hash value"}
+    #       o-[G] ["an", "array", "value"]
     #
+    # In practice, tasks are recored as sources. Thus source trails can be used  
+    # to access task configurations and other information that may be useful 
+    # when creating reports or making workflow decisions (ex: raise an 
+    # error after looping to a given task too many times). 
+    #
+    #--
+    # TODO:
+    # Create an AuditMerge class to mark merges (don't use arrays).  Track nesting level
+    # of ams; see if you can hook this into _to_s process to make extraction/presentation
+    # of audits more managable.
+    #
+    # Create a FirstLastArray to minimize the audit data collected.  Allow different audit
+    # modes:
+    # - full        ([] both)
+    # - source_only (fl value)
+    # - minimal     (fl source and value)
+    #
+    # Try to work a _to_s that doesn't repeat the same audit twice.  Think about a format
+    # like:
+    #         | 
+    #   ------|-----+
+    #         |     | 
+    #   ------|-----|-----+ 
+    #         |     |     | 
+    #         `-----`-----`-o-[j] j5
+    #
     class Audit
+      autoload(:PP, 'pp')
+      
       class << self
 
-        # Convenience method to create a new Audit for each of the inputs, if the 
-        # input is not already an Audit.  Returns an array of Audits.
-        def register(*inputs)
-          inputs.collect {|input| input.kind_of?(Audit) ? input : Audit.new(input) }
-        end
-
-        # Creates a new Audit from the inputs. The value of the new Audit will be the inputs
-        # array where any Audits are replaced by their _current value.  The source of the
-        # new Audit will be a corresponding array of nils, or Audits if provided.
+        # Creates a new Audit by merging the input audits. The value of the new 
+        # Audit will be an array of the _current values of the audits.  The source 
+        # will be an AuditMerge whose values are forks of the audits. Non-Audit 
+        # sources can be provided; they are initialized to Audits before merging.
         #
-        #   a = Audit.new(1)
-        #   b = Audit.merge(a, 2)
-        #   b._values               # => [[1, 2]]
-        #   b._sources              # => [[a, nil]]
+        #   a = Audit.new
+        #   a._record(:a, 'a')
+        # 
+        #   b = Audit.new
+        #   b._record(:b, 'b')
+        # 
+        #   c = Audit.merge(a, b, 1)
+        #   c._record(:c, 'c')
+        # 
+        #   c._values        # => [['a','b', 1], 'c']
+        #   c._sources       # => [AuditMerge[a, b, Audit.new(1)], :c]
         #
-        # If no inputs are provided, then merge a new Audit with an initial value of nil.
-        # If only one input is provided, then merge returns a new Audit initialized to
-        # the input, or a _fork of the input if it is already an Audit.
-        def merge(*inputs)
-          case inputs.length
+        # If no audits are provided, merge returns a new Audit.  If only one
+        # audit is provided, merge returns a fork of that audit.
+        def merge(*audits)
+          case audits.length
           when 0 then Audit.new
-          when 1
-            input = inputs.first
-            input.kind_of?(Audit) ? input._fork : Audit.new(input)
+          when 1 then audits[0]._fork
           else
-            values = inputs.collect {|input| input.kind_of?(Audit) ? input._current : input}
-            sources =  inputs.collect {|input| input.kind_of?(Audit) ? input : nil}
+            sources = AuditMerge.new
+            audits.each {|a| sources << (a.kind_of?(Audit) ? a._fork : Audit.new(a)) }
+            values = audits.collect {|a| a.kind_of?(Audit) ? a._current : a}
+          
             Audit.new(values, sources)
           end
         end
-        
       end
       
       attr_reader :_sources, :_values
-
+      
+      # An arbitrary constant used to identify when no inputs have been
+      # provided to Audit.new.  (nil itself cannot be used as nil is a 
+      # valid initial value for an audit trail)
+      AUDIT_NIL = Object.new
+      
       # A new audit takes a value and/or source.  A nil source is typically given
       # for the original value.  
-      def initialize(value=nil, source=nil)
+      def initialize(value=AUDIT_NIL, source=nil)
         @_sources = []
         @_values = []
         
-        _record(source, value)
+        _record(source, value) unless value == AUDIT_NIL
       end
 
       # Records the next value produced by the source.  When an audit is
       # passed as a value, record will record the current value of the audit.
       # Record will similarly resolve every audit in an array containing audits.
@@ -191,16 +202,16 @@
       #    a = Audit.new(1)
       #    b = Audit.new(2)
       #    c = Audit.new(3)
       #
       #    c.record(:a, a) 
-      #    c.sources # => [:a]
-      #    c.values # => [1]
+      #    c.sources           # => [:a]
+      #    c.values            # => [1]
       # 
       #    c.record(:ab, [a,b])
-      #    c.sources # => [:a, :ab]
-      #    c.values # => [1, [1, 2]]
+      #    c.sources           # => [:a, :ab]
+      #    c.values            # => [1, [1, 2]]
       def _record(source, value)
         _sources << source
         _values << value
         self
       end
@@ -223,87 +234,39 @@
       # The current (ie last) source recorded in the Audit
       def _current_source
         _sources.last
       end
 
-      # The index of the last occurence of source in the audit (Note
-      # equality is based on the object id of the specified source)
-      def _index_last(source)
-        _sources.rindex(source)
-      end
-
-      # The value recorded with the last occurence of source in the audit.  (Note
-      # equality is based on the object id of the specified source)
-      def _last(source)
-        index = _index_last(source)
-        index.nil? ? nil : _values[index]
-      end
-
-      # Returns the value at the specified index.
-      def _input(index)
-        _values[index]
-      end
-
-      # Returns the input to the last occurence of source in the audit (ie
-      # the value prior to this source).  Example:
-      #
-      #    a = Audit.new
-      #    a.record(:a, 'a') 
-      #    a.record(:b, 'b')
-      #
-      #    a.input_last(:a) # => nil
-      #    a.input_last(:b) # => 'a'
-      def _input_last(source)
-        index = _index_last(source)
-        _input(index-1)
-      end
-
-      # Returns the value after the specfied index
-      def _output(index)
-        _values[index+1]
-      end
-
-      # Returns the output of the last occurence of source in the audit (ie
-      # the value at this source).  Example:
-      #
-      #    a = Audit.new
-      #    a.record(:a, 'a') 
-      #    a.record(:b, 'b')
-      #
-      #    a.output_last(:a) # => 'a'
-      #    a.output_last(:b) # => 'b'
-      def _output_last(source)
-        index = _index_last(source)
-        _output(index-1)
-      end
-
       # Searches back and recursively (if the source is an audit) collects all sources 
       # for the current value.
       def _source_trail
-        _sources.collect do |source|
-          source_trail(source)
-        end
+        _collect_records {|source, value| source}
       end
       
       # Searches back and recursively (if the source is an audit) collects all values 
       # leading to the current value.
       def _value_trail
-        trail = []
-        0.upto(_sources.length-1) do |index|
-          trail << value_trail(_sources[index], _values[index])
+        _collect_records {|source, value| value}
+      end
+      
+      def _collect_records(&block) # :yields: source, value
+        collection = []
+        0.upto(_sources.length-1) do |i|
+          collection << collect_records(_sources[i], _values[i], &block)
         end
-        trail
+        collection
       end
+      
+      def _each_record(merge_level=0, merge_index=0, &block) # :yields: source, value, merge_level, merge_index, index
+        0.upto(_sources.length-1) do |i|
+          each_record(_sources[i], _values[i], merge_level, merge_index, i, &block)
+        end
+      end
 
-      # Produces a new Audit suitable for development along a separate path, by merging 
-      # the input sources with self.  The value of the new audit will be an array of the 
-      # current values of the input sources and self.
-      #
-      # If no sources are provided, then _merge returns _fork.
-      def _merge(*sources)
-        sources.unshift(self)
-        Audit.merge(*sources)
+      # Creates a new Audit by merging self and the input audits, using Audit#merge.
+      def _merge(*audits)
+        Audit.merge(self, *audits)
       end
       
       # Produces a new Audit with duplicate sources and values, suitable for
       # separate development along a separate path.
       def _fork
@@ -311,104 +274,142 @@
         a._sources = _sources.dup
         a._values = _values.dup
         a
       end
 
-      # Produces a new Audit from self and records the next value as
-      # the return value of the block.  The block itself will be recored
-      # as the value's source.
-      def _split(&block)
-        sp = Audit.new(nil, self)
-        sp._record(block, yield(_current))
-        sp
+      # _forks self and records the next value as [<return from block>, AuditSplit.new(block)] 
+      def _split(&block) # :yields: _current
+        _fork._record(AuditSplit.new(block), yield(_current))
       end
-
-      alias _eql ==
       
-      # Compares _current with another using == 
-      def ==(another) 
-        _current == another 
+      # _forks self for each member in _current.  Records the next value as
+      # [item, AuditExpand.new(<index of item>)].  Raises an error if _current 
+      # does not respond to each.
+      def _expand
+        expanded = []
+        _current.each do |value|
+          expanded << _fork._record(AuditExpand.new(expanded.length), value)
+        end
+        expanded
       end
-
-      alias _match =~
       
-      # The method =~ does NOT work properly with an audit.  As with other 
-      # methods, =~ is aliased to work on _current.  However, variables like 
-      # $1, $2, etc cannot be passed back.  As a result:
-      #
-      #   a = Audit.new "abcd"
-      #   a =~ /ab(\w)/           # => true
-      #   $1                      # => nil (should be 'c')
-      #   
-      #   # instead use _current directly...
-      #   a._current =~ /ab(\w)/  # => true
-      #   $1                      # => 'c'
-      #
-      # Note the same applies to !~, as it executes through =~
-      def =~(regexp)
-        # note: this is not ideal... the variables $1, $2,
-        # etc are not sent back to the executing context (binding)
-        _current =~ regexp
+      # Returns true if the _sources and _values for self are equal
+      # to those of another.
+      def ==(another)
+        another.kind_of?(Audit) && self._sources == another._sources && self._values == another._values
       end
+      
+      # A kind of pretty-print for Audits.  See the example in the overview.
+      def _to_s
+        # TODO -- find a way to avoid repeating groups
+        
+        group = []
+        groups = [group]
+        extended_groups = [groups]
+        group_merges = []
+        extended_group_merges = []
+        current_level = nil
+        current_index = nil
+        
+        _each_record do |source, value, merge_level, merge_index, index|  
+          source_str, value_str = if block_given?
+            yield(source, value)
+          else
+            [source, value == nil ? '' : PP.singleline_pp(value, '')]
+          end
+          
+          if !group.empty? && (merge_level != current_level || index == 0)
+            unless merge_level <= current_level
+              groups = [] 
+              extended_groups << groups
+            end
 
-      # this shouldn't be necessary as Comparable feeds all it's methods
-      #include Comparable
+            group = []
+            groups << group 
+            
+            if merge_level < current_level
+              if merge_index == 0
+                extended_group_merges << group.object_id
+              end
+              
+              unless index == 0   
+                group_merges << group.object_id
+              end
+            end
+          end
       
-      # Compares _current with another using <=> if <=> is defined
-      # for _current.  Otherwise returns 0.
-      #def <=>(another) 
-      #  _current.respond_to?(:<=>) ? _current <=> another : 0
-      #end
+          group << "o-[#{source_str}] #{value_str}"
+          current_level = merge_level
+          current_index = merge_index
+        end
 
-      # CONSIDER FORWARDING ALL OF THESE!
-      # 
-      #[:eql?, :equal?, :is_a?, :kind_of?, :nil?, :respond_to?, :tainted?, :to_str].each do |method|
-      #  alias_name = "_#{method}".to_sym
-      #  alias alias_name method
-      #  define_method(method) do |*args|
-      #    _current.send(method, *args)
-      #  end
-      #end
+        lines = []
+        group_prefix = ""
+        extended_groups.each do |ext_groups|
+          indentation = 0
 
-      #alias _cmp ===
-      #def ===(another) _current.send('===', another) end
-
+          ext_groups.each_with_index do |ext_group, group_num|
+            ext_group.each_with_index do |line, line_num|
+              if line_num == 0
+                unless lines.empty?
+                  lines << group_prefix + "  " * indentation + "| " * (group_num-indentation) 
+                end
+                
+                if group_merges.include?(ext_group.object_id)
+                  lines << group_prefix + "  " * indentation + "`-" * (group_num-indentation) + line
+                  indentation = group_num
+                  
+                  if extended_group_merges.include?(ext_group.object_id) 
+                    lines.last.gsub!(/\| \s*/) {|match| "`-" + "-" * (match.length - 2)}
+                    group_prefix.gsub!(/\| /, " ")
+                  end
+                  next
+                end
+              end
+              
+              lines << group_prefix + "  " * indentation + "| " * (group_num-indentation) + line
+            end
+          end
+          
+          group_prefix += "  " * (ext_groups.length-1) + "| "
+        end
+        
+        lines.join("\n") + "\n"
+      end
+      
       protected  
 
       attr_writer :_sources, :_values
-      
-      # Forwards all missing methods to _current
-      def method_missing(sym, *args, &block)
-        _current.send(sym, *args, &block)
-      end
 
       private
       
-      # helper method to recursively collect the source trail for a given source
-      def source_trail(source)
+      # helper method to recursively collect the value trail for a given source
+      def collect_records(source, value, &block)
         case source
-        when Array
-          source.collect {|s| source_trail(s)}
+        when AuditMerge
+          collection = []
+          0.upto(source.length-1) do |i|
+            collection << collect_records(source[i], value[i], &block)
+          end
+          collection
         when Audit
-          source._source_trail
+          source._collect_records(&block)
         else
-          source
+          yield(source, value)
         end
       end
       
-      # helper method to recursively collect the value trail for a given source
-      def value_trail(source, value)
+      def each_record(source, value, merge_level, merge_index, index, &block)
         case source
-        when Array
-          trail = []
-          0.upto(source.length-1) do |index|
-            trail << value_trail(source[index], value[index])
+        when AuditMerge
+          merge_level += 1 
+          0.upto(source.length-1) do |i|
+            each_record(source[i], value[i], merge_level, i, index, &block)
           end
-          trail
         when Audit
-          source._value_trail
+          source._each_record(merge_level, merge_index, &block)
         else
-          value
+          yield(source, value, merge_level, merge_index, index)
         end
       end
     end
   end
 end
\ No newline at end of file