require 'polytrix/documentation/comment_styles' module Polytrix module Documentation # This class was extracted from the [Rocco](http://rtomayko.github.com/rocco/) project # which was in turn based on the [Docco](http://jashkenas.github.com/docco/). class CodeSegmenter # rubocop:disable all # Cops are disabled because the code is from Rocco include CommentStyles DEFAULT_OPTIONS = { language: 'rb', comment_chars: '#' } def initialize(options = {}) @options = DEFAULT_OPTIONS.merge options @options[:comment_chars] = generate_comment_chars if @options[:comment_chars].is_a? String end # Internal Parsing and Highlighting # --------------------------------- # Parse the raw file source_code into a list of two-tuples. Each tuple has the # form `[docs, code]` where both elements are arrays containing the # raw lines parsed from the input file, comment characters stripped. def segment(source_code) sections, docs, code = [], [], [] lines = source_code.split("\n") # The first line is ignored if it is a shebang line. We also ignore the # PEP 263 encoding information in python sourcefiles, and the similar ruby # 1.9 syntax. lines.shift if lines[0] =~ /^\#\!/ lines.shift if lines[0] =~ /coding[:=]\s*[-\w.]+/ && %w(python rb).include?(@options[:language]) # To detect both block comments and single-line comments, we'll set # up a tiny state machine, and loop through each line of the file. # This requires an `in_comment_block` boolean, and a few regular # expressions for line tests. We'll do the same for fake heredoc parsing. in_comment_block = false in_heredoc = false single_line_comment, block_comment_start, block_comment_mid, block_comment_end = nil, nil, nil, nil if not @options[:comment_chars][:single].nil? single_line_comment = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:single])}\\s?") end if not @options[:comment_chars][:multi].nil? block_comment_start = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*$") block_comment_end = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$") block_comment_one_liner = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*(.*?)\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$") block_comment_start_with = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:start])}\\s*(.*?)$") block_comment_end_with = Regexp.new("\\s*(.*?)\\s*#{Regexp.escape(@options[:comment_chars][:multi][:end])}\\s*$") if @options[:comment_chars][:multi][:middle] block_comment_mid = Regexp.new("^\\s*#{Regexp.escape(@options[:comment_chars][:multi][:middle])}\\s?") end end if not @options[:comment_chars][:heredoc].nil? heredoc_start = Regexp.new("#{Regexp.escape(@options[:comment_chars][:heredoc])}(\\S+)$") end lines.each do |line| # If we're currently in a comment block, check whether the line matches # the _end_ of a comment block or the _end_ of a comment block with a # comment. if in_comment_block if block_comment_end && line.match(block_comment_end) in_comment_block = false elsif block_comment_end_with && line.match(block_comment_end_with) in_comment_block = false docs << line.match(block_comment_end_with).captures.first. sub(block_comment_mid || '', '') else docs << line.sub(block_comment_mid || '', '') end # If we're currently in a heredoc, we're looking for the end of the # heredoc, and everything it contains is code. elsif in_heredoc if line.match(Regexp.new("^#{Regexp.escape(in_heredoc)}$")) in_heredoc = false end code << line # Otherwise, check whether the line starts a heredoc. If so, note the end # pattern, and the line is code. Otherwise check whether the line matches # the beginning of a block, or a single-line comment all on it's lonesome. # In either case, if there's code, start a new section. else if heredoc_start && line.match(heredoc_start) in_heredoc = Regexp.last_match[1] code << line elsif block_comment_one_liner && line.match(block_comment_one_liner) if code.any? sections << [docs, code] docs, code = [], [] end docs << line.match(block_comment_one_liner).captures.first elsif block_comment_start && line.match(block_comment_start) in_comment_block = true if code.any? sections << [docs, code] docs, code = [], [] end elsif block_comment_start_with && line.match(block_comment_start_with) in_comment_block = true if code.any? sections << [docs, code] docs, code = [], [] end docs << line.match(block_comment_start_with).captures.first elsif single_line_comment && line.match(single_line_comment) if code.any? sections << [docs, code] docs, code = [], [] end docs << line.sub(single_line_comment || '', '') else code << line end end end sections << [docs, code] if docs.any? || code.any? normalize_leading_spaces(sections) end # Normalizes documentation whitespace by checking for leading whitespace, # removing it, and then removing the same amount of whitespace from each # succeeding line. That is: # # def func(): # """ # Comment 1 # Comment 2 # """ # print "omg!" # # should yield a comment block of `Comment 1\nComment 2` and code of # `def func():\n print "omg!"` def normalize_leading_spaces(sections) sections.map do |section| if section.any? && section[0].any? leading_space = section[0][0].match("^\s+") if leading_space section[0] = section[0].map { |line| line.sub(/^#{leading_space.to_s}/, '') } end end section end end def comment(lines) lines.map do | line | "#{@options[:comment_chars][:single]} #{line}" end.join "\n" end private def generate_comment_chars @_commentchar ||= if COMMENT_STYLES[@options[:language]] COMMENT_STYLES[@options[:language]] else { single: @options[:comment_chars], multi: nil, heredoc: nil } end end end # rubocop:enable all end end