require 'strscan' module Lazydoc # Comment represents a code comment parsed by Lazydoc. Comments consist # of a subject and content. # # sample_comment = %Q{ # # this is the content # # # # content may stretch across # # multiple lines # this is the subject # } # # Normally the subject is the first non-comment line following the content, # although in some cases the subject will be manually set to something else # (as in a Lazydoc constant attribute). The content is an array of comment # fragments organized by line: # # c = Comment.parse(sample_comment) # c.subject # => "this is the subject" # c.content # # => [ # # ["this is the content"], # # [""], # # ["content may stretch across", "multiple lines"]] # # Comments may be initialized to the subject line and then resolved later: # # doc = %Q{ # module Sample # # this is the content of the comment # # for method_one # def method_one # end # # # this is the content of the comment # # for method_two # def method_two # end # end} # # c1 = Comment.new(4).resolve(doc) # c1.subject # => " def method_one" # c1.content # => [["this is the content of the comment", "for method_one"]] # # c2 = Comment.new(9).resolve(doc) # c2.subject # => " def method_two" # c2.content # => [["this is the content of the comment", "for method_two"]] # # A Regexp (or Proc) may be used in place of a line number; during resolve, # the lines will be scanned and the first matching line will be used. # # c3 = Comment.new(/def method_two/).resolve(doc) # c3.subject # => " def method_two" # c3.content # => [["this is the content of the comment", "for method_two"]] # class Comment class << self # Parses the input string into a comment. Takes a string or a # StringScanner and returns the comment. # # comment_string = %Q{ # # comments spanning multiple # # lines are collected # # # # while indented lines # # are preserved individually # # # this is the subject line # # # this line is not parsed # } # # c = Comment.parse(comment_string) # c.content # # => [ # # ['comments spanning multiple', 'lines are collected'], # # [''], # # [' while indented lines'], # # [' are preserved individually'], # # [''], # # []] # c.subject # => "this is the subject line" # # Parsing may be manually ended by providing a block; parse yields # each line fragment to the block and stops parsing when the block # returns true. Note that no subject will be parsed under these # circumstances. # # c = Comment.parse(comment_string) {|frag| frag.strip.empty? } # c.content # # => [ # # ['comments spanning multiple', 'lines are collected']] # c.subject # => nil # # Subject parsing may also be suppressed by setting parse_subject # to false. def parse(str, parse_subject=true) # :yields: fragment scanner = case str when StringScanner then str when String then StringScanner.new(str) else raise TypeError, "can't convert #{str.class} into StringScanner or String" end comment = self.new while scanner.scan(/\r?\n?[ \t]*#[ \t]?(([ \t]*).*?)\r?$/) fragment = scanner[1] indent = scanner[2] # collect continuous description line # fragments and join into a single line if block_given? && yield(fragment) # break on comment if the description end is reached parse_subject = false break else categorize(fragment, indent) {|f| comment.push(f) } end end if parse_subject scanner.skip(/\s+/) unless scanner.peek(1) == '#' if subject = scanner.scan(/.+?$/) subject.strip! end comment.subject = subject end end comment end # Scan determines if and how to add a line fragment to a comment and # yields the appropriate fragments to the block. Returns true if # fragments are yielded and false otherwise. # # Content may be built from an array of lines using scan like so: # # lines = [ # "# comments spanning multiple", # "# lines are collected", # "#", # "# while indented lines", # "# are preserved individually", # "# ", # "not a comment line", # "# skipped since the loop breaks", # "# at the first non-comment line"] # # c = Comment.new # lines.each do |line| # break unless Comment.scan(line) do |fragment| # c.push(fragment) # end # end # # c.content # # => [ # # ['comments spanning multiple', 'lines are collected'], # # [''], # # [' while indented lines'], # # [' are preserved individually'], # # [''], # # []] # def scan(line) # :yields: fragment return false unless line =~ /^[ \t]*#[ \t]?(([ \t]*).*?)\r?$/ categorize($1, $2) do |fragment| yield(fragment) end true end # Scans a stripped trailing comment off of str, tolerant to a leader # that uses '#' within a string. Returns nil for strings without a # trailing comment. # # Comment.scan_trailer "str with # trailer" # => "trailer" # Comment.scan_trailer "'# in str' # trailer" # => "trailer" # Comment.scan_trailer "str with without trailer" # => nil # # Note the %-syntax for strings is not fully supported, ie %Q, %q, # etc. may not parse correctly. Accepts Strings or a StringScanner. def scan_trailer(str) scanner = case str when StringScanner then str when String then StringScanner.new(str) else raise TypeError, "can't convert #{str.class} into StringScanner or String" end args = [] brakets = braces = parens = 0 start = scanner.pos while scanner.skip(/.*?['"#]/) pos = scanner.pos - 1 case str[pos] when ?# then return scanner.rest.strip # return the trailer when ?' then skip_quote(scanner, /'/) # parse over quoted strings when ?" then skip_quote(scanner, /"/) # parse over double-quoted string end end return nil end # Splits a line of text along whitespace breaks into fragments of cols # width. Tabs in the line will be expanded into tabsize spaces; # fragments are rstripped of whitespace. # # Comment.wrap("some line that will wrap", 10) # => ["some line", "that will", "wrap"] # Comment.wrap(" line that will wrap ", 10) # => [" line", "that will", "wrap"] # Comment.wrap(" ", 10) # => [] # # The wrapping algorithm is slightly modified from: # http://blog.macromates.com/2006/wrapping-text-with-regular-expressions/ def wrap(line, cols=80, tabsize=2) line = line.gsub(/\t/, " " * tabsize) unless tabsize == nil line.gsub(/(.{1,#{cols}})( +|$\r?\n?)|(.{1,#{cols}})/, "\\1\\3\n").split(/\s*?\n/) end private # utility method used by scan to categorize and yield # the appropriate objects to add the fragment to a # comment def categorize(fragment, indent) # :nodoc: case when fragment == indent # empty comment line yield [""] yield [] when indent.empty? # continuation line yield fragment.rstrip else # indented line yield [fragment.rstrip] yield [] end end # helper method to skip to the next non-escaped instance # matching the quote regexp (/'/ or /"/). def skip_quote(scanner, regexp) # :nodoc: scanner.skip_until(regexp) scanner.skip_until(regexp) while scanner.string[scanner.pos-2] == ?\\ end end # An array of comment fragments organized into lines attr_reader :content # The subject of the comment (normally set to the next # non-comment line after the content ends; ie the line # that would receive the comment in RDoc documentation) attr_accessor :subject # Returns the line number for the subject line, if known. # Although normally an integer, line_number may be # set to a Regexp or Proc to dynamically determine # the subject line during resolve attr_accessor :line_number def initialize(line_number=nil) @content = [] @subject = nil @line_number = line_number end # Alias for subject def value subject end # Alias for subject= def value=(value) self.subject = value end # Pushes the fragment onto the last line array of content. If the # fragment is an array itself then it will be pushed onto content # as a new line. # # c = Comment.new # c.push "some line" # c.push "fragments" # c.push ["a", "whole", "new line"] # # c.content # # => [ # # ["some line", "fragments"], # # ["a", "whole", "new line"]] # def push(fragment) content << [] if content.empty? case fragment when Array if content[-1].empty? content[-1] = fragment else content.push fragment end else content[-1].push fragment end end # Alias for push. def <<(fragment) push(fragment) end # Scans the comment line using Comment.scan and pushes the appropriate # fragments onto self. Used to build a content by scanning down a set # of lines. # # lines = [ # "# comment spanning multiple", # "# lines", # "#", # "# indented line one", # "# indented line two", # "# ", # "not a comment line"] # # c = Comment.new # lines.each {|line| c.append(line) } # # c.content # # => [ # # ['comment spanning multiple', 'lines'], # # [''], # # [' indented line one'], # # [' indented line two'], # # [''], # # []] # def append(line) Comment.scan(line) {|f| push(f) } end # Unshifts the fragment to the first line array of content. If the # fragment is an array itself then it will be unshifted onto content # as a new line. # # c = Comment.new # c.unshift "some line" # c.unshift "fragments" # c.unshift ["a", "whole", "new line"] # # c.content # # => [ # # ["a", "whole", "new line"], # # ["fragments", "some line"]] # def unshift(fragment) content << [] if content.empty? case fragment when Array if content[0].empty? content[0] = fragment else content.unshift fragment end else content[0].unshift fragment end end # Scans the comment line using Comment.scan and unshifts the appropriate # fragments onto self. Used to build a content by scanning up a set of # lines. # # lines = [ # "# comment spanning multiple", # "# lines", # "#", # "# indented line one", # "# indented line two", # "# ", # "not a comment line"] # # c = Comment.new # lines.reverse_each {|line| c.prepend(line) } # # c.content # # => [ # # ['comment spanning multiple', 'lines'], # # [''], # # [' indented line one'], # # [' indented line two'], # # ['']] # def prepend(line) Comment.scan(line) {|f| unshift(f) } end # Builds the subject and content of self using lines; resolve sets # the subject to the line at line_number, and parses content up # from there. Any previously set subject and content is overridden. # Returns self. # # document = %Q{ # module Sample # # this is the content of the comment # # for method_one # def method_one # end # # # this is the content of the comment # # for method_two # def method_two # end # end} # # c = Comment.new 4 # c.resolve(document) # c.subject # => " def method_one" # c.content # => [["this is the content of the comment", "for method_one"]] # # Lines may be an array or a string; string inputs are split into an # array along newline boundaries. # # === dynamic line numbers # The line_number used by resolve may be determined dynamically from # lines by setting line_number to a Regexp and Proc. In the case # of a Regexp, the first line matching the regexp is used: # # c = Comment.new(/def method/) # c.resolve(document) # c.line_number = 4 # c.subject # => " def method_one" # c.content # => [["this is the content of the comment", "for method_one"]] # # Procs are called with lines and are expected to return the # actual line number. # # c = Comment.new lambda {|lines| 9 } # c.resolve(document) # c.line_number = 9 # c.subject # => " def method_two" # c.content # => [["this is the content of the comment", "for method_two"]] # # As shown in the examples, in both cases the dynamically determined # line_number overwrites the Regexp or Proc. def resolve(lines) lines = lines.split(/\r?\n/) if lines.kind_of?(String) # resolve late-evaluation line numbers n = case line_number when Regexp then match_index(line_number, lines) when Proc then line_number.call(lines) else line_number end # quietly exit if a line number was not found return self unless n.kind_of?(Integer) # update negative line numbers n += lines.length if n < 0 unless n < lines.length raise RangeError, "line_number outside of lines: #{n} (#{lines.length})" end self.line_number = n self.subject = lines[n] self.content.clear # remove whitespace lines n -= 1 n -= 1 while n >=0 && lines[n].strip.empty? # put together the comment while n >= 0 break unless prepend(lines[n]) n -= 1 end self end # Removes leading and trailing lines from content that are # empty ([]) or whitespace (['']). Returns self. def trim content.shift while !content.empty? && (content[0].empty? || content[0].join.strip.empty?) content.pop while !content.empty? && (content[-1].empty? || content[-1].join.strip.empty?) self end # True if all lines in content are empty. def empty? !content.find {|line| !line.empty?} end # Returns a comment trailing the subject. def trailer subject ? Comment.scan_trailer(subject) : nil end # Returns content as a string where line fragments are joined by # fragment_sep and lines are joined by line_sep. def to_s(fragment_sep=" ", line_sep="\n", strip=true) lines = content.collect {|line| line.join(fragment_sep)} # strip leading an trailing whitespace lines if strip lines.shift while !lines.empty? && lines[0].empty? lines.pop while !lines.empty? && lines[-1].empty? end line_sep ? lines.join(line_sep) : lines end # Like to_s, but wraps the content to the specified number of cols # and expands tabs to tabsize spaces. def wrap(cols=80, tabsize=2, line_sep="\n", fragment_sep=" ", strip=true) lines = Comment.wrap(to_s(fragment_sep, "\n", strip), cols, tabsize) line_sep ? lines.join(line_sep) : lines end # Returns true if another is a Comment with the same # line_number, subject, and content as self def ==(another) another.kind_of?(Comment) && self.line_number == another.line_number && self.subject == another.subject && self.content == another.content end private # utility method used to by resolve to find the index # of a line matching a regexp line_number. def match_index(regexp, lines) # :nodoc: lines.each_with_index do |line, index| return index if line =~ regexp end nil end end end