comment.rb in lazydoc-0.3.0

- old
+ new

@@ -1,277 +1,33 @@
-require 'strscan'
+require 'lazydoc/utils'
 
 module Lazydoc
-  # Comment represents a code comment parsed by Lazydoc.  Comments consist
-  # of a subject and content.
-  #   
-  #   sample_comment = %Q{
-  #   # this is the content
-  #   #
-  #   # content may stretch across
-  #   # multiple lines
-  #   this is the subject
-  #   }
-  #   
-  # Normally the subject is the first non-comment line following the content,
-  # although in some cases the subject will be manually set to something else
-  # (as in a Lazydoc constant attribute). The content is an array of comment
-  # fragments organized by line:
-  #
-  #   c = Comment.parse(sample_comment)
-  #   c.subject      # => "this is the subject"
-  #   c.content      
-  #   # => [
-  #   # ["this is the content"], 
-  #   # [""], 
-  #   # ["content may stretch across", "multiple lines"]]
-  #
-  # Comments may be initialized to the subject line and then resolved later:
-  #
-  #   doc = %Q{
-  #   module Sample
-  #     # this is the content of the comment
-  #     # for method_one
-  #     def method_one
-  #     end
-  #
-  #     # this is the content of the comment
-  #     # for method_two
-  #     def method_two
-  #     end
-  #   end}
-  #
-  #   c1 = Comment.new(4).resolve(doc)
-  #   c1.subject     # => "  def method_one"
-  #   c1.content     # => [["this is the content of the comment", "for method_one"]]
-  #
-  #   c2 = Comment.new(9).resolve(doc)
-  #   c2.subject     # => "  def method_two"
-  #   c2.content     # => [["this is the content of the comment", "for method_two"]]
-  # 
-  # A Regexp (or Proc) may be used in place of a line number; during resolve,
-  # the lines will be scanned and the first matching line will be used.
-  #
-  #   c3 = Comment.new(/def method_two/).resolve(doc)
-  #   c3.subject     # => "  def method_two"
-  #   c3.content     # => [["this is the content of the comment", "for method_two"]]
-  #
-  class Comment
-
-    class << self
   
-      # Parses the input string into a comment.  Takes a string or a 
-      # StringScanner and returns the comment.
-      #
-      #   comment_string = %Q{
-      #   # comments spanning multiple
-      #   # lines are collected
-      #   #
-      #   #   while indented lines
-      #   #   are preserved individually
-      #   #    
-      #   this is the subject line
-      #
-      #   # this line is not parsed
-      #   }
-      #
-      #   c = Comment.parse(comment_string)
-      #   c.content   
-      #   # => [
-      #   # ['comments spanning multiple', 'lines are collected'],
-      #   # [''],
-      #   # ['  while indented lines'],
-      #   # ['  are preserved individually'],
-      #   # [''],
-      #   # []]
-      #   c.subject   # => "this is the subject line"
-      #
-      # Parsing may be manually ended by providing a block; parse yields
-      # each line fragment to the block and stops parsing when the block
-      # returns true.  Note that no subject will be parsed under these
-      # circumstances.
-      #
-      #   c = Comment.parse(comment_string) {|frag| frag.strip.empty? }
-      #   c.content   
-      #   # => [
-      #   # ['comments spanning multiple', 'lines are collected']]
-      #   c.subject   # => nil
-      #
-      # Subject parsing may also be suppressed by setting parse_subject
-      # to false.
-      def parse(str, parse_subject=true) # :yields: fragment
-        scanner = case str
-        when StringScanner then str
-        when String then StringScanner.new(str)
-        else raise TypeError, "can't convert #{str.class} into StringScanner or String"
-        end
+  # Comment represents a code comment parsed by Lazydoc.  
+  class Comment
+    include Utils
     
-        comment = self.new
-        while scanner.scan(/\r?\n?[ \t]*#[ \t]?(([ \t]*).*?)\r?$/)
-          fragment = scanner[1]
-          indent = scanner[2]
-        
-          # collect continuous description line
-          # fragments and join into a single line
-          if block_given? && yield(fragment)
-            # break on comment if the description end is reached
-            parse_subject = false
-            break
-          else
-            categorize(fragment, indent) {|f| comment.push(f) }
-          end
-        end
+    # Returns the line number for the subject line, if known.
+    # Although normally an integer, line_number may be
+    # set to a Regexp or Proc to dynamically determine
+    # the subject line during resolve
+    attr_accessor :line_number
     
-        if parse_subject
-          scanner.skip(/\s+/)
-          unless scanner.peek(1) == '#'
-            if subject = scanner.scan(/.+?$/) 
-              subject.strip!
-            end
-            comment.subject = subject
-          end
-        end
+    # A back-reference to the Document that registered self
+    attr_accessor :document
     
-        comment
-      end
-    
-      # Scan determines if and how to add a line fragment to a comment and
-      # yields the appropriate fragments to the block.  Returns true if
-      # fragments are yielded and false otherwise.  
-      #
-      # Content may be built from an array of lines using scan like so:
-      #
-      #   lines = [
-      #     "# comments spanning multiple",
-      #     "# lines are collected",
-      #     "#",
-      #     "#   while indented lines",
-      #     "#   are preserved individually",
-      #     "#    ",
-      #     "not a comment line",
-      #     "# skipped since the loop breaks",
-      #     "# at the first non-comment line"]
-      #
-      #   c = Comment.new
-      #   lines.each do |line|
-      #     break unless Comment.scan(line) do |fragment|
-      #       c.push(fragment)  
-      #     end
-      #   end
-      #
-      #   c.content   
-      #   # => [
-      #   # ['comments spanning multiple', 'lines are collected'],
-      #   # [''],
-      #   # ['  while indented lines'],
-      #   # ['  are preserved individually'],
-      #   # [''],
-      #   # []]
-      #
-      def scan(line) # :yields: fragment
-        return false unless line =~ /^[ \t]*#[ \t]?(([ \t]*).*?)\r?$/
-        categorize($1, $2) do |fragment|
-          yield(fragment)
-        end
-        true
-      end
-      
-      # Scans a stripped trailing comment off of str, tolerant to a leader
-      # that uses '#' within a string.  Returns nil for strings without a 
-      # trailing comment.
-      #
-      #   Comment.scan_trailer "str with # trailer"           # => "trailer"
-      #   Comment.scan_trailer "'# in str' # trailer"         # => "trailer"
-      #   Comment.scan_trailer "str with without trailer"     # => nil
-      # 
-      # Note the %-syntax for strings is not fully supported, ie %Q, %q,
-      # etc. may not parse correctly.  Accepts Strings or a StringScanner.
-      def scan_trailer(str)
-        scanner = case str
-        when StringScanner then str
-        when String then StringScanner.new(str)
-        else raise TypeError, "can't convert #{str.class} into StringScanner or String"
-        end
-
-        args = []
-        brakets = braces = parens = 0
-        start = scanner.pos
-        while scanner.skip(/.*?['"#]/)
-          pos = scanner.pos - 1
-          
-          case str[pos]
-          when ?# then return scanner.rest.strip     # return the trailer
-          when ?' then skip_quote(scanner, /'/)      # parse over quoted strings
-          when ?" then skip_quote(scanner, /"/)      # parse over double-quoted string
-          end
-        end
-        
-        return nil
-      end
-      
-      # Splits a line of text along whitespace breaks into fragments of cols
-      # width.  Tabs in the line will be expanded into tabsize spaces; 
-      # fragments are rstripped of whitespace.
-      # 
-      #   Comment.wrap("some line that will wrap", 10)       # => ["some line", "that will", "wrap"]
-      #   Comment.wrap("     line that will wrap    ", 10)   # => ["     line", "that will", "wrap"]
-      #   Comment.wrap("                            ", 10)   # => []
-      #
-      # The wrapping algorithm is slightly modified from:
-      # http://blog.macromates.com/2006/wrapping-text-with-regular-expressions/
-      def wrap(line, cols=80, tabsize=2)
-        line = line.gsub(/\t/, " " * tabsize) unless tabsize == nil
-        line.gsub(/(.{1,#{cols}})( +|$\r?\n?)|(.{1,#{cols}})/, "\\1\\3\n").split(/\s*?\n/)
-      end
-    
-      private
-    
-      # utility method used by scan to categorize and yield
-      # the appropriate objects to add the fragment to a
-      # comment
-      def categorize(fragment, indent) # :nodoc:
-        case
-        when fragment == indent
-          # empty comment line
-          yield [""]
-          yield []
-        when indent.empty?
-          # continuation line
-          yield fragment.rstrip
-        else 
-          # indented line
-          yield [fragment.rstrip]
-          yield []
-        end
-      end
-      
-      # helper method to skip to the next non-escaped instance
-      # matching the quote regexp (/'/ or /"/).
-      def skip_quote(scanner, regexp) # :nodoc:
-        scanner.skip_until(regexp)
-        scanner.skip_until(regexp) while scanner.string[scanner.pos-2] == ?\\
-      end
-    end
-
     # An array of comment fragments organized into lines
     attr_reader :content
 
-    # The subject of the comment (normally set to the next 
-    # non-comment line after the content ends; ie the line 
-    # that would receive the comment in RDoc documentation)
+    # The subject of the comment
     attr_accessor :subject
-  
-    # Returns the line number for the subject line, if known.
-    # Although normally an integer, line_number may be
-    # set to a Regexp or Proc to dynamically determine
-    # the subject line during resolve
-    attr_accessor :line_number
-  
-    def initialize(line_number=nil)
+
+    def initialize(line_number=nil, document=nil)
+      @line_number = line_number
+      @document = document
       @content = []
       @subject = nil
-      @line_number = line_number
     end
     
     # Alias for subject
     def value
       subject
@@ -279,14 +35,19 @@
     
     # Alias for subject=
     def value=(value)
       self.subject = value
     end
+    
+    # Returns the comment trailing the subject.
+    def trailer
+      subject ? scan_trailer(subject) : nil
+    end
 
-    # Pushes the fragment onto the last line array of content.  If the
-    # fragment is an array itself then it will be pushed onto content 
-    # as a new line.
+    # Pushes the fragment onto the last line of content.  If the
+    # fragment is an array itself then it will be pushed onto
+    # content as a new line.
     #
     #   c = Comment.new
     #   c.push "some line"
     #   c.push "fragments"
     #   c.push ["a", "whole", "new line"]
@@ -340,16 +101,16 @@
     #   # ['  indented line two'],
     #   # [''],
     #   # []]
     #
     def append(line)
-      Comment.scan(line) {|f| push(f) }
+      scan(line) {|f| push(f) }
     end
   
-    # Unshifts the fragment to the first line array of content.  If the
-    # fragment is an array itself then it will be unshifted onto content
-    # as a new line.
+    # Unshifts the fragment to the first line of content.  If the
+    # fragment is an array itself then it will be unshifted onto
+    # content as a new line.
     #
     #   c = Comment.new
     #   c.unshift "some line"
     #   c.unshift "fragments"
     #   c.unshift ["a", "whole", "new line"]
@@ -397,152 +158,198 @@
     #   # ['  indented line one'],
     #   # ['  indented line two'],
     #   # ['']]
     #
     def prepend(line)
-      Comment.scan(line) {|f| unshift(f) }
+      scan(line) {|f| unshift(f) }
     end
-  
-    # Builds the subject and content of self using lines; resolve sets
-    # the subject to the line at line_number, and parses content up
-    # from there.  Any previously set subject and content is overridden.  
-    # Returns self.
+    
+    # Builds the content of self by parsing comments up from line_number.
+    # Whitespace lines between line_number and the preceding comment are
+    # skipped.  Previous content is overridden.  Returns self.
     #
     #   document = %Q{
     #   module Sample
+    #
     #     # this is the content of the comment
     #     # for method_one
     #     def method_one
     #     end
     # 
     #     # this is the content of the comment
     #     # for method_two
+    #
     #     def method_two
     #     end
     #   end}
     #
     #   c = Comment.new 4
-    #   c.resolve(document)
-    #   c.subject     # => "  def method_one"
-    #   c.content     # => [["this is the content of the comment", "for method_one"]]
+    #   c.parse_up(document)
+    #   c.comment      # => "this is the content of the comment for method_one"
     #
-    # Lines may be an array or a string; string inputs are split into an
-    # array along newline boundaries.
+    # The input may be a String or StringScanner and, for optimal parsing of
+    # multiple comments from the same document, may also take an array of lines
+    # representing the input split along newline boundaries.
     #
-    # === dynamic line numbers
-    # The line_number used by resolve may be determined dynamically from
-    # lines by setting line_number to a Regexp and Proc. In the case
+    # ==== Stop Block
+    # 
+    # A block may be provided to determine when to stop parsing comment
+    # content.  When the block returns true, parsing stops.
+    #
+    #   c = Comment.new 4
+    #   c.parse_up(document) {|line| line =~ /# this is/ }
+    #   c.comment      # => "for method_one"
+    #
+    # ==== Dynamic Line Numbers
+    #
+    # The line_number used by parse_up may be determined dynamically from
+    # the input by setting line_number to a Regexp and Proc. In the case
     # of a Regexp, the first line matching the regexp is used:
     #
     #   c = Comment.new(/def method/)
-    #   c.resolve(document)
-    #   c.line_number = 4
-    #   c.subject     # => "  def method_one"
-    #   c.content     # => [["this is the content of the comment", "for method_one"]]
+    #   c.parse_up(document)
+    #   c.line_number  # => 4
+    #   c.comment      # => "this is the content of the comment for method_one"
     #
     # Procs are called with lines and are expected to return the
     # actual line number.  
     #
-    #   c = Comment.new lambda {|lines| 9 }
-    #   c.resolve(document)
-    #   c.line_number = 9
-    #   c.subject     # => "  def method_two"
-    #   c.content     # => [["this is the content of the comment", "for method_two"]]
+    #   c = Comment.new lambda {|scanner, lines| 9 }
+    #   c.parse_up(document)
+    #   c.line_number  # => 9
+    #   c.comment      # => "this is the content of the comment for method_two"
     #
-    # As shown in the examples, in both cases the dynamically determined
-    # line_number overwrites the Regexp or Proc.
-    def resolve(lines)
-      lines = lines.split(/\r?\n/) if lines.kind_of?(String)
-    
-      # resolve late-evaluation line numbers
-      n = case line_number
-      when Regexp then match_index(line_number, lines)
-      when Proc then line_number.call(lines)
-      else line_number
+    # As shown in the examples, the dynamically determined line_number
+    # overwrites the Regexp or Proc.
+    def parse_up(str, lines=nil, skip_subject=true)
+      parse(str, lines) do |n, lines|
+        # remove whitespace lines
+        n -= 1 if skip_subject
+        n -= 1 while n >=0 && lines[n].strip.empty?
+
+        # put together the comment
+        while n >= 0
+          line = lines[n]
+          break if block_given? && yield(line)
+          break unless prepend(line)
+          n -= 1
+        end
       end
-     
-      # quietly exit if a line number was not found
-      return self unless n.kind_of?(Integer)
-      
-      # update negative line numbers
-      n += lines.length if n < 0
-      unless n < lines.length
-        raise RangeError, "line_number outside of lines: #{n} (#{lines.length})"
-      end
-      
-      self.line_number = n
-      self.subject = lines[n]
-      self.content.clear
+    end
     
-      # remove whitespace lines
-      n -= 1
-      n -= 1 while n >=0 && lines[n].strip.empty?
-
-      # put together the comment
-      while n >= 0
-        break unless prepend(lines[n])
-        n -= 1
+    # Like parse_up but builds the content of self by parsing comments down
+    # from line_number.  Parsing begins immediately after line_number (no
+    # whitespace lines are skipped).  Previous content is overridden.
+    # Returns self.
+    #
+    #   document = %Q{
+    #   # == Section One
+    #   # documentation for section one
+    #   #   'with' + 'indentation'
+    #   #
+    #   # == Section Two
+    #   # documentation for section two
+    #   }
+    #
+    #   c = Comment.new 1
+    #   c.parse_down(document) {|line| line =~ /Section Two/}
+    #   c.comment      # => "documentation for section one\n  'with' + 'indentation'"
+    #
+    #   c = Comment.new /Section Two/
+    #   c.parse_down(document)
+    #   c.line_number  # => 5
+    #   c.comment      # => "documentation for section two"
+    #
+    def parse_down(str, lines=nil, skip_subject=true)
+      parse(str, lines) do |n, lines|
+        # skip the subject line
+        n += 1 if skip_subject
+        
+        # put together the comment
+        while line = lines[n]
+          break if block_given? && yield(line)
+          break unless append(line)
+          n += 1
+        end
       end
-     
+    end
+    
+    # Resolves the document for self, if set.
+    def resolve(str=nil, force=false)
+      document.resolve(str, force) if document
       self
     end
   
     # Removes leading and trailing lines from content that are
-    # empty ([]) or whitespace (['']).  Returns self.
+    # empty or whitespace.  Returns self.
     def trim
       content.shift while !content.empty? && (content[0].empty? || content[0].join.strip.empty?)
       content.pop   while !content.empty? && (content[-1].empty? || content[-1].join.strip.empty?)
       self
     end
-  
-    # True if all lines in content are empty.
-    def empty?
-      !content.find {|line| !line.empty?}
-    end
-  
-    # Returns a comment trailing the subject.
-    def trailer
-      subject ? Comment.scan_trailer(subject) : nil
-    end
     
     # Returns content as a string where line fragments are joined by
     # fragment_sep and lines are joined by line_sep. 
-    def to_s(fragment_sep=" ", line_sep="\n", strip=true)
+    def comment(fragment_sep=" ", line_sep="\n", strip=true)
       lines = content.collect {|line| line.join(fragment_sep)}
     
       # strip leading an trailing whitespace lines
       if strip
         lines.shift while !lines.empty? && lines[0].empty?
         lines.pop while !lines.empty? && lines[-1].empty?
       end
     
       line_sep ? lines.join(line_sep) : lines
     end
-  
-    # Like to_s, but wraps the content to the specified number of cols
+
+    # Like comment, but wraps the content to the specified number of cols
     # and expands tabs to tabsize spaces.
     def wrap(cols=80, tabsize=2, line_sep="\n", fragment_sep=" ", strip=true)
-      lines = Comment.wrap(to_s(fragment_sep, "\n", strip), cols, tabsize)
+      lines = super(comment(fragment_sep, "\n", strip), cols, tabsize)
       line_sep ? lines.join(line_sep) : lines
     end
-  
-    # Returns true if another is a Comment with the same
-    # line_number, subject, and content as self
-    def ==(another)
-      another.kind_of?(Comment) && 
-      self.line_number == another.line_number &&
-      self.subject == another.subject &&
-      self.content == another.content
+    
+    # True if to_s is empty.
+    def empty?
+      to_s.empty?
     end
-  
+    
+    # Self-resolves and returns comment.
+    def to_s
+      resolve
+      comment
+    end
+    
     private
-  
-    # utility method used to by resolve to find the index
-    # of a line matching a regexp line_number.
-    def match_index(regexp, lines) # :nodoc:
-      lines.each_with_index do |line, index|
-        return index if line =~ regexp
+    
+    # helper standardizing the shared code of parse up/down
+    def parse(str, lines) # :nodoc:
+      scanner = convert_to_scanner(str)
+      lines ||= split_lines(scanner.string)
+    
+      # resolve late-evaluation line numbers
+      n = case line_number
+      when nil then determine_line_number(scanner)
+      when Regexp then scan_index(scanner, line_number)
+      when Proc then line_number.call(scanner, lines)
+      else line_number
       end
-      nil
+     
+      # do nothing unless a line number was found
+      unless n.kind_of?(Integer)
+        raise "invalid dynamic line number: #{line_number.inspect}"
+      end
+        
+      # update negative line numbers
+      n += lines.length if n < 0
+      unless n < lines.length
+        raise RangeError, "line_number outside of lines: #{n} (#{lines.length})"
+      end
+    
+      self.line_number = n
+      self.content.clear
+      yield(n, lines)
+      
+      self
     end
   end
 end