audit.rb in tap-0.12.0

- old
+ new
@@ -1,411 +1,332 @@
 module Tap
-  module Support 
+  module Support
     
-    # Marks the merge of multiple Audit trails
-    class AuditMerge < Array
-      
-      # True if another is an AuditMerge and passes Array#==
-      def ==(another)
-        another.kind_of?(AuditMerge) && super
-      end
-    end
-    
-    # Marks the expansion of an Audit trail
-    class AuditIterate
-      attr_reader :index
-      def initialize(index) @index = index end
-      
-      # True if another is an AuditIterate with the same index.
-      def ==(another)
-        another.kind_of?(AuditIterate) && another.index == index
-      end
-      
-      # Returns a string like '_iterate(<index>)'.
-      def to_s
-        "_iterate(#{index})"
-      end
-    end
-
-    # Audit provides a way to track the values (inputs and results) passed among 
-    # tasks or, more generally, any Executable.  Audits allow you to track inputs
-    # as they make their way through a workflow, and have great utility in 
-    # debugging and record keeping. 
+    # Audit provides a way to track the values passed among tasks or, more 
+    # generally, any Executable.  Audits collectively build a {directed
+    # acyclic graph}[http://en.wikipedia.org/wiki/Directed_acyclic_graph] 
+    # of task execution and have great utility in debugging and record keeping.
     #
-    # During execution, the inputs to a task are used to initialize an Audit.
-    # These inputs are the original value of the audit and mark the begining 
-    # of an audit trail; every task adds to the trail by recording it's result
-    # and itself as the 'source' of the result.
+    # Audits record a key, a current value, and the previous audit(s) in the
+    # trail.  Keys are arbitrary identifiers of where the value comes from.
+    # To illustrate, lets use symbols as keys.
     #
-    # Audits can take any object as a source, so for illustration lets use some
-    # symbols:
-    #   
     #   # initialize a new audit
-    #   a = Audit.new(1, nil)
+    #   _a = Audit.new(:one, 1)
+    #   _a.key                              # => :one
+    #   _a.value                            # => 1
     #
-    #   # record some values
-    #   a._record(:A, 2)
-    #   a._record(:B, 3)
+    #   # build a short trail
+    #   _b = Audit.new(:two, 2, _a)
+    #   _c = Audit.new(:three, 3, _b)
     #
-    # Now you can pull up the source and value trails, as well as the current
-    # and original values:
+    #   _a.sources                          # => []
+    #   _b.sources                          # => [_a]
+    #   _c.sources                          # => [_b]
     #
-    #   a._source_trail      # => [nil, :A, :B]
-    #   a._value_trail       # => [1, 2, 3]
+    # Audits allow you track back through the sources of each audit to build
+    # a trail describing how a particular value was produced.
     #
-    #   a._original          # => 1
-    #   a._original_source   # => nil
+    #   _c.trail                            # => [_a,_b,_c]
+    #   _c.trail {|audit| audit.key }       # => [:one, :two, :three]
+    #   _c.trail {|audit| audit.value }     # => [1,2,3]
     #
-    #   a._current           # => 3
-    #   a._current_source    # => :B
+    # Any number of audits may share the same source, so forks are naturally
+    # supported.
     #
-    # Merges are supported by using an array of the merged trails (actually
-    # an AuditMerge) as the source, and an array of the merged values as the 
-    # original value.  
+    #   _d = Audit.new(:four, 4, _b)
+    #   _d.trail                            # => [_a,_b,_d]
     #
-    #   b = Audit.new(10, nil)
-    #   b._record(:C, 11)
-    #   b._record(:D, 12)  
+    #   _e = Audit.new(:five, 5, _b)
+    #   _e.trail                            # => [_a,_b,_e]
     #
-    #   c = Audit.merge(a, b)
-    #   c._source_trail      # => [ [[nil, :A, :B], [nil, :C, :D]] ]
-    #   c._value_trail       # => [ [[1,2,3], [10, 11, 12]] ]
-    #   c._current           # => [3, 12]
+    # Merges are supported by specifying more than one source.  Merges have 
+    # the effect of nesting audit trails within an array:
     #
-    #   c._record(:E, "a string value")
-    #   c._record(:F, {'a' => 'hash value'})
-    #   c._record(:G, ['an', 'array', 'value'])
+    #   _f = Audit.new(:six, 6)
+    #   _g = Audit.new(:seven, 7, _f)
+    #   _h = Audit.new(:eight, 8, [_c,_d,_g])
+    #   _h.trail                            # => [[[_a,_b,_c], [_a,_b,_d], [_f,_g]], _h]
+    #   
+    # Nesting can get quite ugly after a couple merges so Audit provides a
+    # scalable pretty-print dump that helps visualize the audit trail.
     #
-    #   c._source_trail      # => [ [[nil, :A, :B], [nil, :C, :D]], :E, :F, :G]
-    #   c._value_trail       # => [ [[1,2,3], [10, 11, 12]], "a string value", {'a' => 'hash value'}, ['an', 'array', 'value']]
+    #   "\n" + _h.dump
+    #   # => %q{
+    #   # o-[one] 1
+    #   # o-[two] 2
+    #   # |
+    #   # |-o-[three] 3
+    #   # | |
+    #   # `---o-[four] 4
+    #   #   | |
+    #   #   | | o-[six] 6
+    #   #   | | o-[seven] 7
+    #   #   | | |
+    #   #   `-`-`-o-[eight] 8
+    #   # }
     #
-    # Audit supports forks by duplicating the source and value trails.  Forks
-    # can be developed independently.  Audits are also forked during a merge; 
-    # notice the additional record in 'a' doesn't change the source trail for
-    # 'c':
+    # In practice, tasks are recorded as keys. Thus audit trails can be used
+    # to access task configurations and other information that may be useful
+    # when creating reports or making workflow decisions.  Note that by
+    # convention Audits and non-Audit methods that return Audits are
+    # prefixed with an underscore.
     #
-    #   a1 = a._fork
-    #
-    #   a._record(:X, -1)
-    #   a1._record(:Y, -2)
-    #
-    #   a._source_trail      # => [nil, :A, :B, :X]
-    #   a1._source_trail     # => [nil, :A, :B, :Y]
-    #   c._source_trail      # => [ [[nil, :A, :B], [nil, :C, :D]], :E, :F, :G]
-    #
-    # The data structure for an audit gets nasty after a few merges because
-    # the lead array gets more and more nested.  Audit provides iterators
-    # to help gain access, as well as a printing method to visualize the
-    # audit trail:
-    #
-    #   c._to_s
-    #   # =>
-    #   # o-[] 1
-    #   # o-[A] 2
-    #   # o-[B] 3
-    #   # | 
-    #   # | o-[] 10
-    #   # | o-[C] 11
-    #   # | o-[D] 12
-    #   # | | 
-    #   # `-`-o-[E] "a string value"
-    #   #     o-[F] {"a"=>"hash value"}
-    #   #     o-[G] ["an", "array", "value"]
-    #
-    # In practice, tasks are recored as sources. Thus source trails can be used  
-    # to access task configurations and other information that may be useful 
-    # when creating reports or making workflow decisions. 
-    #
     #--
-    # TODO:
-    # Track nesting level of ams; see if you can hook this into the _to_s process to make 
-    # extraction/presentation of audits more managable.
+    # Note Audit could easily be expanded to track sinks as well as sources.
+    # In initialize:
     #
-    # Create a FirstLastArray to minimize the audit data collected.  Allow different audit
-    # modes:
-    # - full        ([] both)
-    # - source_only (fl value)
-    # - minimal     (fl source and value)
-    #
-    # Try to work a _to_s that doesn't repeat the same audit twice.  Think about a format
-    # like:
-    #         | 
-    #   ------|-----+
-    #         |     | 
-    #   ------|-----|-----+ 
-    #         |     |     | 
-    #         `-----`-----`-o-[j] j5
-    #
+    #   @sinks = []
+    #   sources.each do |source|
+    #     source.sinks << self
+    #   end
+    # 
+    # The downside is that this may not circumvent cleanly if you want light
+    # or no auditing.  It also adds additonal references which will prevent
+    # garbage collection.  On the plus side, sinks will make it easier to
+    # truly use Audits as a DAG
     class Audit
       class << self
-
-        # Creates a new Audit by merging the input audits. The value of the new 
-        # Audit will be an array of the _current values of the inputs.  The source 
-        # will be an AuditMerge whose values are forks of the inputs. Non-Audit 
-        # sources may be provided; they are initialized to Audits before merging.
-        #
-        #   a = Audit.new
-        #   a._record(:a, 'a')
-        # 
-        #   b = Audit.new
-        #   b._record(:b, 'b')
-        # 
-        #   c = Audit.merge(a, b, 1)
-        #   c._record(:c, 'c')
-        # 
-        #   c._values        # => [['a','b', 1], 'c']
-        #   c._sources       # => [AuditMerge[a, b, Audit.new(1)], :c]
-        #
-        # If no audits are provided, merge returns a new Audit.  If only one
-        # audit is provided, merge returns a fork of that audit.
-        def merge(*audits)
-          case audits.length
-          when 0 then Audit.new
-          when 1 then audits[0]._fork
-          else
-            sources = AuditMerge.new
-            audits.each {|a| sources << (a.kind_of?(Audit) ? a._fork : Audit.new(a)) }
-            values = audits.collect {|a| a.kind_of?(Audit) ? a._current : a}
+        
+        # Produces a pretty-print dump of the specified audits to target. 
+        # A block may be provided to format the trailer of each line.
+        def dump(audits, target=$stdout) # :yields: audit
+          return dump(audits, target) do |audit| 
+            "o-[#{audit.key}] #{audit.value.inspect}"
+          end unless block_given?
           
-            Audit.new(values, sources)
+          # arrayify audits
+          audits = [audits].flatten
+          
+          # the order of audits
+          order = []
+          
+          # (audit, sinks) hash preventing double iteration over 
+          # audits, and identifying sinks for a particular audit
+          sinks = {}
+          
+          # iterate over all audits, collecting in order
+          audits.each do |audit|
+            traverse(audit, order, sinks)
           end
+          
+          # visit each audit, collecting audits into indent groups
+          groups = []
+          current = nil
+          order.each do |audit|
+            sources = audit.sources
+            unless sources.length == 1 && sinks[sources[0]].length <= 1
+              current = []
+              groups << current
+            end
+            
+            current << audit
+          end
+          
+          # identify nodes at which a fork occurs... these are audits
+          # that have more than one sink, and they cause a fork-style
+          # leader to be printed
+          forks = {}
+          sinks.each_pair do |audit, audit_sinks|
+            n = audit_sinks.length
+            forks[audit] = [0, n] if n > 1
+          end
+          
+          # setup print
+          index = 0
+          leader = ""
+          
+          # print each group
+          groups.each do |group|
+            sources = group[0].sources
+            complete = audits.include?(group[-1])
+            
+            case 
+            when sources.length > 1
+              # print a merge
+              # `-`-`-o-[merge]
+              
+              leader =~ /^(.*)((\| *){#{sources.length}})$/
+              leader = "#{$1}#{' ' * $2.length} "
+              target << "#{$1}#{$2.gsub('|', '`').gsub(' ', '-')}-#{yield(group.shift)}\n"
+              
+            when fork = forks[sources[0]]
+              # print a fork
+              # |-o-[a]
+              # |
+              # `---o-[b]
+              
+              n = fork[0] += 1
+              base = leader[0, leader.length - (2 * n - 1)]
+              target << "#{base}#{fork[0] == fork[1] ? '`-' : '|-'}#{'--' * (n-1)}#{yield(group.shift)}\n"
+              leader  = "#{base}#{fork[0] == fork[1] ? '  ' : '| '}#{'| ' * (n-1)}"
+              
+            when index > 0
+              # simply get ready to print the next series of audits
+              # o-[a]
+              # o-[b]
+              
+              leader = "#{leader} "
+              leader = "" if leader.strip.empty?
+            end
+            
+            # print the next series of audits
+            group.each do |audit|
+              target << "#{leader}#{yield(audit)}\n"
+            end
+            
+            # add a continuation line, if necessary
+            unless group == groups.last
+              if complete
+                leader = "#{leader} "
+              else
+                leader = "#{leader}|"
+              end
+              target << "#{leader}\n"
+            end
+            
+            index += 1
+          end
+          
+          target
         end
+        
+        protected
+        
+        # helper to determine the order and sinks for a node
+        def traverse(node, order=[], sinks={}) # :nodoc:
+          return if sinks.has_key?(node)
+          
+          node.sources.each do |source|
+            traverse(source, order, sinks)
+            (sinks[source] ||= []) << node
+          end
+          
+          order << node
+        end
       end
       
-      # An array of the sources in self
-      attr_reader :_sources
+      # A key for self (typically the task producing value, or
+      # nil if the value has an unknown origin)
+      attr_reader :key
       
-      # An array of the values in self
-      attr_reader :_values
+      # The current value
+      attr_reader :value
       
-      # An arbitrary object used to identify when no inputs have been
-      # provided to Audit.new.  (nil cannot be used since nil is a valid 
-      # initial value)
-      AUDIT_NIL = Object.new
-      
-      # A new audit takes a value and/or source.  A nil source is typically given
-      # for the original value.  
-      def initialize(value=AUDIT_NIL, source=nil)
-        @_sources = []
-        @_values = []
-        
-        _record(source, value) unless value == AUDIT_NIL
-      end
-
-      # Records the next value produced by the source.  When an audit is
-      # passed as a value, record will record the current value of the audit.
-      # Record will similarly resolve every audit in an array containing audits.
+      # Initializes a new Audit.  Sources may be an array, a single value
+      # (which is turned into an array), or nil (indicating no sources).
       #
-      # Example:
+      #   _a = Audit.new(nil, nil, nil)
+      #   _a.sources                        # => []
       #
-      #    a = Audit.new(1)
-      #    b = Audit.new(2)
-      #    c = Audit.new(3)
+      #   _b = Audit.new(nil, nil, _a)
+      #   _b.sources                        # => [_a]
       #
-      #    c.record(:a, a) 
-      #    c.sources           # => [:a]
-      #    c.values            # => [1]
-      # 
-      #    c.record(:ab, [a,b])
-      #    c.sources           # => [:a, :ab]
-      #    c.values            # => [1, [1, 2]]
-      def _record(source, value)
-        _sources << source
-        _values << value
-        self
+      #   _c = Audit.new(nil, nil, [_a,_b])
+      #   _c.sources                        # => [_a,_b]
+      #
+      def initialize(key=nil, value=nil, sources=nil)
+        @key = key
+        @value = value
+        @source = singularize(sources)
       end
-
-      # The original value used to initialize the Audit
-      def _original
-        _values.first
-      end
-
-      # The current (ie last) value recorded in the Audit
-      def _current
-        _values.last
-      end
-
-      # The original source used to initialize the Audit
-      def _original_source
-        _sources.first
-      end
       
-      # The current (ie last) source recorded in the Audit
-      def _current_source
-        _sources.last
+      # An array of source audits for self.  Sources may be empty.
+      def sources
+        arrayify(@source)
       end
-
-      # Searches back and recursively (if the source is an audit) collects all sources 
-      # for the current value.
-      def _source_trail
-        _collect_records {|source, value| source}
-      end
       
-      # Searches back and recursively (if the source is an audit) collects all values 
-      # leading to the current value.
-      def _value_trail
-        _collect_records {|source, value| value}
-      end
-      
-      def _collect_records(&block) # :yields: source, value
+      # Produces a fork of self for each item in value, using the index of
+      # the item as a key.  Splat is useful for developing each item of an
+      # array value along different paths.
+      #
+      #   _a = Audit.new(nil, [:x, :y, :z])
+      #   _b,_c,_d = _a.splat
+      #
+      #   _b.key                            # => 0
+      #   _b.value                          # => :x
+      #
+      #   _c.key                            # => 1
+      #   _c.value                          # => :y
+      #
+      #   _d.key                            # => 2
+      #   _d.value                          # => :z
+      #   _d.trail                          # => [_a,_d]
+      # 
+      # If value does not respond to 'each', an array with self as the only
+      # member will be returned.  This ensures that the result of splat
+      # is an array of audits ready for further development.
+      # 
+      #   _a = Audit.new(nil, :value)
+      #   _a.splat                          # => [_a]
+      #
+      def splat
+        return [self] unless value.respond_to?(:each)
+        
         collection = []
-        0.upto(_sources.length-1) do |i|
-          collection << collect_records(_sources[i], _values[i], &block)
+        index = 0
+        value.each do |obj|
+          collection << Audit.new(index, obj, self)
+          index += 1
         end
         collection
       end
       
-      def _each_record(merge_level=0, merge_index=0, &block) # :yields: source, value, merge_level, merge_index, index
-        0.upto(_sources.length-1) do |i|
-          each_record(_sources[i], _values[i], merge_level, merge_index, i, &block)
+      # Recursively collects an audit trail leading to self.  Single sources
+      # are collected into the trail directly, while multiple sources are
+      # collected into arrays.
+      #
+      #   _a = Audit.new(:one, 1)
+      #   _b = Audit.new(:two, 2, _a)
+      #   _b.trail                          # => [_a,_b]
+      #
+      #   _a = Audit.new(:one, 1)
+      #   _b = Audit.new(:two, 2)
+      #   _c = Audit.new(:three, 3, [_a, _b])
+      #   _c.trail                          # => [[[_a],[_b]],_c]
+      #
+      # A block may be provided to collect a specific audit attribute
+      # instead of the audit itself.
+      #
+      #   _c.trail {|audit| audit.value }   # => [[[1],[2]],3]
+      #
+      def trail(trail=[], &block)
+        trail.unshift(block_given? ? block.call(self) : self)
+        
+        case @source
+        when Audit
+          @source.trail(trail, &block)
+        when Array
+          trail.unshift @source.collect {|audit| audit.trail(&block) }
         end
+        
+        trail
       end
-
-      # Creates a new Audit by merging self and the input audits, using Audit#merge.
-      def _merge(*audits)
-        Audit.merge(self, *audits)
-      end
       
-      # Produces a new Audit with duplicate sources and values, suitable for
-      # independent development.
-      def _fork
-        a = Audit.new
-        a._sources = _sources.dup
-        a._values = _values.dup
-        a
+      # A kind of pretty-print for Audits.
+      def dump(&block)
+        Audit.dump(self, "", &block)
       end
-
-      # Produces a fork of self for each item in the current value (_current).
-      # Iterate is useful for developing each item of (say) an array along 
-      # different paths.
-      # 
-      # Records the next value of each fork as [item, AuditIterate.new(<index of item>)].  
-      # Raises an error if _current does not respond to each.
-      def _iterate
-        expanded = []
-        _current.each do |value|
-          expanded << _fork._record(AuditIterate.new(expanded.length), value)
-        end
-        expanded
-      end
       
-      # Returns true if the _sources and _values for self are equal
-      # to those of another.
-      def ==(another)
-        another.kind_of?(Audit) && self._sources == another._sources && self._values == another._values
-      end
+      private
       
-      # A kind of pretty-print for Audits.  See the example in the overview.
-      def _to_s
-        # TODO -- find a way to avoid repeating groups
+      # helper to optimize storage of nodes
+      def singularize(obj) # :nodoc:
+        return obj unless obj.kind_of?(Array)
         
-        group = []
-        groups = [group]
-        extended_groups = [groups]
-        group_merges = []
-        extended_group_merges = []
-        current_level = nil
-        current_index = nil
-        
-        _each_record do |source, value, merge_level, merge_index, index|  
-          source_str, value_str = if block_given?
-            yield(source, value)
-          else
-            [source, value == nil ? '' : PP.singleline_pp(value, '')]
-          end
-          
-          if !group.empty? && (merge_level != current_level || index == 0)
-            unless merge_level <= current_level
-              groups = [] 
-              extended_groups << groups
-            end
-
-            group = []
-            groups << group 
-            
-            if merge_level < current_level
-              if merge_index == 0
-                extended_group_merges << group.object_id
-              end
-              
-              unless index == 0   
-                group_merges << group.object_id
-              end
-            end
-          end
-      
-          group << "o-[#{source_str}] #{value_str}"
-          current_level = merge_level
-          current_index = merge_index
+        case obj.length
+        when 0 then nil
+        when 1 then obj[0]
+        else obj
         end
-
-        lines = []
-        group_prefix = ""
-        extended_groups.each do |ext_groups|
-          indentation = 0
-
-          ext_groups.each_with_index do |ext_group, group_num|
-            ext_group.each_with_index do |line, line_num|
-              if line_num == 0
-                unless lines.empty?
-                  lines << group_prefix + "  " * indentation + "| " * (group_num-indentation) 
-                end
-                
-                if group_merges.include?(ext_group.object_id)
-                  lines << group_prefix + "  " * indentation + "`-" * (group_num-indentation) + line
-                  indentation = group_num
-                  
-                  if extended_group_merges.include?(ext_group.object_id) 
-                    lines.last.gsub!(/\| \s*/) {|match| "`-" + "-" * (match.length - 2)}
-                    group_prefix.gsub!(/\| /, " ")
-                  end
-                  next
-                end
-              end
-              
-              lines << group_prefix + "  " * indentation + "| " * (group_num-indentation) + line
-            end
-          end
-          
-          group_prefix += "  " * (ext_groups.length-1) + "| "
-        end
-        
-        lines.join("\n") + "\n"
       end
       
-      protected  
-
-      attr_writer :_sources, :_values # :nodoc:
-
-      private
-      
-      # helper method to recursively collect the value trail for a given source
-      def collect_records(source, value, &block)
-        case source
-        when AuditMerge
-          collection = []
-          0.upto(source.length-1) do |i|
-            collection << collect_records(source[i], value[i], &block)
-          end
-          collection
-        when Audit
-          source._collect_records(&block)
-        else
-          yield(source, value)
-        end
-      end
-      
-      def each_record(source, value, merge_level, merge_index, index, &block)
-        case source
-        when AuditMerge
-          merge_level += 1 
-          0.upto(source.length-1) do |i|
-            each_record(source[i], value[i], merge_level, i, index, &block)
-          end
-        when Audit
-          source._each_record(merge_level, merge_index, &block)
-        else
-          yield(source, value, merge_level, merge_index, index)
+      # helper to optimize storage of nodes
+      def arrayify(obj) # :nodoc:
+        case obj
+        when nil then []
+        when Array then obj
+        else [obj]
         end
       end
     end
   end
 end
\ No newline at end of file