# Public: Methods to parse and build objects from Asciidoc lines class Asciidoctor::Lexer include Asciidoctor # Public: Make sure the Lexer object doesn't get initialized. def initialize raise 'Au contraire, mon frere. No lexer instances will be running around.' end def self.document_from_parent(parent) if parent.is_a? Document parent else parent.document end end # Return the next block from the Reader. # # * Skip over blank lines to find the start of the next content block. # * Use defined regular expressions to determine the type of content block. # * Based on the type of content block, grab lines to the end of the block. # * Return a new Asciidoctor::Block or Asciidoctor::Section instance with the # content set to the grabbed lines. def self.next_block(reader, parent = self) # Skip ahead to the block content reader.skip_blank return nil unless reader.has_lines? # NOTE: An anchor looks like this: # [[foo]] # with the inside [foo] (including brackets) as match[1] if match = reader.peek_line.match(REGEXP[:anchor]) Asciidoctor.debug "Found an anchor in line:\n\t#{reader.peek_line}" # NOTE: This expression conditionally strips off the brackets from # [foo], though REGEXP[:anchor] won't actually match without # match[1] being bracketed, so the condition isn't necessary. anchor = match[1].match(/^\[(.*)\]/) ? $1 : match[1] # NOTE: Set @references['foo'] = '[foo]' document_from_parent(parent).references[anchor] = match[1] reader.get_line else anchor = nil end Asciidoctor.debug "/"*64 Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__} - First two lines are:" Asciidoctor.debug reader.peek_line tmp_line = reader.get_line Asciidoctor.debug reader.peek_line reader.unshift tmp_line Asciidoctor.debug "/"*64 block = nil title = nil caption = nil source_type = nil buffer = [] while reader.has_lines? && block.nil? buffer.clear this_line = reader.get_line next_line = reader.peek_line || '' if this_line.match(REGEXP[:comment]) next elsif match = this_line.match(REGEXP[:title]) title = match[1] reader.skip_blank elsif match = this_line.match(REGEXP[:listing_source]) source_type = match[1] reader.skip_blank elsif match = this_line.match(REGEXP[:caption]) caption = match[1] elsif is_section_heading?(this_line, next_line) # If we've come to a new section, then we've found the end of this # current block. Likewise if we'd found an unassigned anchor, push # it back as well, so it can go with this next heading. # NOTE - I don't think this will assign the anchor properly. Anchors # only match with double brackets - [[foo]], but what's stored in # `anchor` at this point is only the `foo` part that was stripped out # after matching. TODO: Need a way to test this. reader.unshift(this_line) reader.unshift(anchor) unless anchor.nil? Asciidoctor.debug "#{__method__}: SENDING to next_section with lines[0] = #{reader.peek_line}" block = next_section(reader, parent) elsif this_line.match(REGEXP[:oblock]) # oblock is surrounded by '--' lines and has zero or more blocks inside buffer = Reader.new(reader.grab_lines_until { |line| line.match(REGEXP[:oblock]) }) # Strip lines off end of block - not implemented yet # while buffer.has_lines? && buffer.last.strip.empty? # buffer.pop # end block = Block.new(parent, :oblock, []) while buffer.has_lines? new_block = next_block(buffer, block) block.blocks << new_block unless new_block.nil? end elsif list_type = [:olist, :colist].detect{|l| this_line.match( REGEXP[l] )} items = [] Asciidoctor.debug "Creating block of type: #{list_type}" block = Block.new(parent, list_type) while !this_line.nil? && match = this_line.match(REGEXP[list_type]) item = ListItem.new reader.unshift match[2].lstrip.sub(/^\./, '\.') item_segment = Reader.new(list_item_segment(reader, :alt_ending => REGEXP[list_type])) while item_segment.has_lines? new_block = next_block(item_segment, block) item.blocks << new_block unless new_block.nil? end if item.blocks.any? && item.blocks.first.is_a?(Block) && (item.blocks.first.context == :paragraph || item.blocks.first.context == :literal) item.content = item.blocks.shift.buffer.map{|l| l.strip}.join("\n") end items << item reader.skip_blank this_line = reader.get_line end reader.unshift(this_line) unless this_line.nil? block.buffer = items elsif match = this_line.match(REGEXP[:ulist]) reader.unshift(this_line) block = build_ulist(reader, parent) elsif match = this_line.match(REGEXP[:dlist]) pairs = [] block = Block.new(parent, :dlist) this_dlist = Regexp.new(/^#{match[1]}(.*)#{match[3]}\s*$/) while !this_line.nil? && match = this_line.match(this_dlist) if anchor = match[1].match( /\[\[([^\]]+)\]\]/ ) dt = ListItem.new( $` + $' ) dt.anchor = anchor[1] else dt = ListItem.new( match[1] ) end dd = ListItem.new # workaround eg. git-config OPTIONS --get-colorbool reader.get_line if reader.has_lines? && reader.peek_line.strip.empty? dd_segment = Reader.new(list_item_segment(reader, :alt_ending => this_dlist)) while dd_segment.has_lines? new_block = next_block(dd_segment, block) dd.blocks << new_block unless new_block.nil? end if dd.blocks.any? && dd.blocks.first.is_a?(Block) && (dd.blocks.first.context == :paragraph || dd.blocks.first.context == :literal) dd.content = dd.blocks.shift.buffer.map{|l| l.strip}.join("\n") end pairs << [dt, dd] reader.skip_blank this_line = reader.get_line end reader.unshift(this_line) unless this_line.nil? block.buffer = pairs elsif this_line.match(REGEXP[:verse]) # verse is preceded by [verse] and lasts until a blank line buffer = reader.grab_lines_until(:break_on_blank_lines => true) block = Block.new(parent, :verse, buffer) elsif this_line.match(REGEXP[:note]) # note is an admonition preceded by [NOTE] and lasts until a blank line buffer = reader.grab_lines_until(:break_on_blank_lines => true) block = Block.new(parent, :note, buffer) elsif block_type = [:listing, :example].detect{|t| this_line.match( REGEXP[t] )} buffer = reader.grab_lines_until {|line| line.match( REGEXP[block_type] )} block = Block.new(parent, block_type, buffer) elsif this_line.match( REGEXP[:quote] ) block = Block.new(parent, :quote) buffer = Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:quote] ) }) while buffer.has_lines? new_block = next_block(buffer, block) block.blocks << new_block unless new_block.nil? end elsif this_line.match(REGEXP[:lit_blk]) # example is surrounded by '....' (4 or more '.' chars) lines buffer = reader.grab_lines_until {|line| line.match( REGEXP[:lit_blk] ) } block = Block.new(parent, :literal, buffer) elsif this_line.match(REGEXP[:lit_par]) # literal paragraph is contiguous lines starting with # one or more space or tab characters # So we need to actually include this one in the grab_lines group reader.unshift this_line buffer = reader.grab_lines_until(:preserve_last_line => true) {|line| ! line.match( REGEXP[:lit_par] ) } block = Block.new(parent, :literal, buffer) elsif this_line.match(REGEXP[:sidebar_blk]) # example is surrounded by '****' (4 or more '*' chars) lines buffer = reader.grab_lines_until {|line| line.match( REGEXP[:sidebar_blk] ) } block = Block.new(parent, :sidebar, buffer) else # paragraph is contiguous nonblank/noncontinuation lines while !this_line.nil? && !this_line.strip.empty? if this_line.match( REGEXP[:listing] ) || this_line.match( REGEXP[:oblock] ) reader.unshift this_line break end buffer << this_line this_line = reader.get_line end if buffer.any? && admonition = buffer.first.match(/^NOTE:\s*/) buffer[0] = admonition.post_match block = Block.new(parent, :note, buffer) elsif source_type block = Block.new(parent, :listing, buffer) else Asciidoctor.debug "Proud parent #{parent} getting a new paragraph with buffer: #{buffer}" block = Block.new(parent, :paragraph, buffer) end end end block.anchor ||= anchor block.title ||= title block.caption ||= caption block end # Private: Return the Array of lines constituting the next list item # segment, removing them from the 'lines' Array passed in. # # reader - the Reader instance from which to get input. # options - an optional Hash of processing options: # * :alt_ending may be used to specify a regular expression match # other than a blank line to signify the end of the segment. # * :list_types may be used to specify list item patterns to # include. May be either a single Symbol or an Array of Symbols. # * :list_level may be used to specify a mimimum list item level # to include. If this is specified, then break if we find a list # item of a lower level. # # Returns the Array of lines forming the next segment. # # Examples # # reader = Asciidoctor::Reader.new( # ["First paragraph\n", "+\n", "Second paragraph\n", "--\n", # "Open block\n", "\n", "Can have blank lines\n", "--\n", "\n", # "In a different segment\n"]) # # list_item_segment(reader) # => ["First paragraph\n", "+\n", "Second paragraph\n", "--\n", # "Open block\n", "\n", "Can have blank lines\n", "--\n"] # # reader.peek_line # => "In a different segment\n" def self.list_item_segment(reader, options={}) alternate_ending = options[:alt_ending] list_types = Array(options[:list_types]) || [:ulist, :olist, :colist, :dlist] list_level = options[:list_level].to_i # We know we want to include :lit_par types, even if we have specified, # say, only :ulist type list entries. list_types << :lit_par unless list_types.include? :lit_par segment = [] reader.skip_blank # Grab lines until the first blank line not inside an open block # or listing in_oblock = false in_listing = false while reader.has_lines? this_line = reader.get_line Asciidoctor.debug "-----> Processing: #{this_line}" in_oblock = !in_oblock if this_line.match(REGEXP[:oblock]) in_listing = !in_listing if this_line.match(REGEXP[:listing]) if !in_oblock && !in_listing if this_line.strip.empty? # TODO - FIX THIS BEFORE ANY MORE KITTENS DIE AUGGGHHH!!! next_nonblank = reader.instance_variable_get(:@lines).detect{|l| !l.strip.empty?} # If there are blank lines ahead, but there's at least one # more non-blank line that doesn't trigger an alternate_ending # for the block of lines, then vacuum up all the blank lines # into this segment and continue with the next non-blank line. if next_nonblank && ( alternate_ending.nil? || !next_nonblank.match(alternate_ending) ) && list_types.find { |list_type| next_nonblank.match(REGEXP[list_type]) } while reader.has_lines? and reader.peek_line.strip.empty? segment << this_line this_line = reader.get_line end else break end # Have we come to a line matching an alternate_ending regexp? elsif alternate_ending && this_line.match(alternate_ending) reader.unshift this_line break # Do we have a minimum list_level, and have come to a list item # line with a lower level? elsif list_level && list_types.find { |list_type| this_line.match(REGEXP[list_type]) } && ($1.length < list_level) reader.unshift this_line break end # From the Asciidoc user's guide: # Another list or a literal paragraph immediately following # a list item will be implicitly included in the list item # Thus, the list_level stuff may be wrong here. end segment << this_line end Asciidoctor.debug "*"*40 Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__}: Returning this:" Asciidoctor.debug segment.inspect Asciidoctor.debug "*"*10 Asciidoctor.debug "Leaving #{__method__}: Top of reader queue is:" Asciidoctor.debug reader.peek_line Asciidoctor.debug "*"*40 segment end # Private: Get the Integer ulist level based on the characters # in front of the list item text. # # line - the String line containing the list item def self.ulist_level(line) if m = line.strip.match(/^(- | \*{1,5})\s+/x) return m[1].length end end def self.build_ulist_item(reader, block, match = nil) list_type = :ulist this_line = reader.get_line return nil unless this_line match ||= this_line.match(REGEXP[list_type]) if match.nil? reader.unshift(this_line) return nil end level = match[1].length list_item = ListItem.new list_item.level = level Asciidoctor.debug "#{__FILE__}:#{__LINE__}: Created ListItem #{list_item} with match[2]: #{match[2]} and level: #{list_item.level}" # Prevent bullet list text starting with . from being treated as a paragraph # title or some other unseemly thing in list_item_segment. I think. (NOTE) reader.unshift match[2].lstrip.sub(/^\./, '\.') item_segment = Reader.new(list_item_segment(reader, :alt_ending => REGEXP[list_type])) # item_segment = list_item_segment(reader) while item_segment.has_lines? new_block = next_block(item_segment, block) list_item.blocks << new_block unless new_block.nil? end Asciidoctor.debug "\n\nlist_item has #{list_item.blocks.count} blocks, and first is a #{list_item.blocks.first.class} with context #{list_item.blocks.first.context rescue 'n/a'}\n\n" first_block = list_item.blocks.first if first_block.is_a?(Block) && (first_block.context == :paragraph || first_block.context == :literal) list_item.content = first_block.buffer.map{|l| l.strip}.join("\n") list_item.blocks.shift end list_item end def self.build_ulist(reader, parent = nil) items = [] list_type = :ulist block = Block.new(parent, list_type) Asciidoctor.debug "Created :ulist block: #{block}" first_item_level = nil while reader.has_lines? && match = reader.peek_line.match(REGEXP[list_type]) this_item_level = match[1].length if first_item_level && first_item_level < this_item_level # If this next :uline level is down one from the # current Block's, put it in a Block of its own list_item = next_block(reader, block) else list_item = build_ulist_item(reader, block, match) # Set the base item level for this Block first_item_level ||= list_item.level end items << list_item reader.skip_blank end block.buffer = items block end def self.build_ulist_ref(lines, parent = nil) items = [] list_type = :ulist block = Block.new(parent, list_type) Asciidoctor.debug "Created :ulist block: #{block}" last_item_level = nil this_line = lines.shift while this_line && match = this_line.match(REGEXP[list_type]) level = match[1].length list_item = ListItem.new list_item.level = level Asciidoctor.debug "Created ListItem #{list_item} with match[2]: #{match[2]} and level: #{list_item.level}" lines.unshift match[2].lstrip.sub(/^\./, '\.') item_segment = list_item_segment(lines, :alt_ending => REGEXP[list_type], :list_level => level) while item_segment.any? new_block = next_block(item_segment, block) list_item.blocks << new_block unless new_block.nil? end first_block = list_item.blocks.first if first_block.is_a?(Block) && (first_block.context == :paragraph || first_block.context == :literal) list_item.content = first_block.buffer.map{|l| l.strip}.join("\n") list_item.blocks.shift end if items.any? && (level > items.last.level) Asciidoctor.debug "--> Putting this new level #{level} ListItem under my pops, #{items.last} (level: #{items.last.level})" items.last.blocks << list_item else Asciidoctor.debug "Stacking new list item in parent block's blocks" items << list_item end last_item_level = list_item.level # TODO: This has to come from a Reader object skip_blank(lines) this_line = lines.shift end lines.unshift(this_line) unless this_line.nil? block.buffer = items block end # Private: Get the Integer section level based on the characters # used in the ASCII line under the section name. # # line - the String line from under the section name. def self.section_level(line) char = line.strip.chars.to_a.uniq case char when ['=']; 0 when ['-']; 1 when ['~']; 2 when ['^']; 3 when ['+']; 4 end end # == is level 0, === is level 1, etc. def self.single_line_section_level(line) [line.length - 1, 0].max end def self.is_single_line_section_heading?(line) !line.nil? && line.match(REGEXP[:level_title]) end def self.is_two_line_section_heading?(line1, line2) !line1.nil? && !line2.nil? && line1.match(REGEXP[:name]) && line2.match(REGEXP[:line]) && (line1.size - line2.size).abs <= 1 end def self.is_section_heading?(line1, line2 = nil) is_single_line_section_heading?(line1) || is_two_line_section_heading?(line1, line2) end # Private: Extracts the name, level and (optional) embedded anchor from a # 1- or 2-line section heading. # # Returns an array of a String, Integer, and String or nil. # # Examples # # line1 # => "Foo\n" # line2 # => "~~~\n" # # name, level, anchor = extract_section_heading(line1, line2) # # name # => "Foo" # level # => 2 # anchor # => nil # # line1 # => "==== Foo\n" # # name, level, anchor = extract_section_heading(line1) # # name # => "Foo" # level # => 3 # anchor # => nil # def self.extract_section_heading(line1, line2 = nil) Asciidoctor.debug "#{__method__} -> line1: #{line1.chomp rescue 'nil'}, line2: #{line2.chomp rescue 'nil'}" sect_name = sect_anchor = nil sect_level = 0 if is_single_line_section_heading?(line1) header_match = line1.match(REGEXP[:level_title]) sect_name = header_match[2] sect_level = single_line_section_level(header_match[1]) elsif is_two_line_section_heading?(line1, line2) header_match = line1.match(REGEXP[:name]) if anchor_match = header_match[1].match(REGEXP[:anchor_embedded]) sect_name = anchor_match[1] sect_anchor = anchor_match[2] else sect_name = header_match[1] end sect_level = section_level(line2) end Asciidoctor.debug "#{__method__} -> Returning #{sect_name}, #{sect_level} (anchor: '#{sect_anchor || ''}')" return [sect_name, sect_level, sect_anchor] end # Private: Return the next section from the Reader. # # Examples # # source # => "GREETINGS\n---------\nThis is my doc.\n\nSALUTATIONS\n-----------\nIt is awesome." # # TODO: doc = Asciidoctor::Document.new(source) # # doc.next_section # ["GREETINGS", [:paragraph, "This is my doc."]] # # doc.next_section # ["SALUTATIONS", [:paragraph, "It is awesome."]] def self.next_section(reader, parent = self) section = Section.new(parent) Asciidoctor.debug "%"*64 Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__} - First two lines are:" Asciidoctor.debug reader.peek_line tmp_line = reader.get_line Asciidoctor.debug reader.peek_line reader.unshift tmp_line Asciidoctor.debug "%"*64 # Skip ahead to the next section definition while reader.has_lines? && section.name.nil? this_line = reader.get_line next_line = reader.peek_line || '' if match = this_line.match(REGEXP[:anchor]) section.anchor = match[1] elsif is_section_heading?(this_line, next_line) section.name, section.level, section.anchor = extract_section_heading(this_line, next_line) reader.get_line unless is_single_line_section_heading?(this_line) end end if !section.anchor.nil? anchor_id = section.anchor.match(/^\[(.*)\]/) ? $1 : section.anchor document_from_parent(parent).references[anchor_id] = section.anchor section.anchor = anchor_id end # Grab all the lines that belong to this section section_lines = [] while reader.has_lines? this_line = reader.get_line next_line = reader.peek_line if is_section_heading?(this_line, next_line) _, this_level, _ = extract_section_heading(this_line, next_line) if this_level <= section.level # A section can't contain a section level lower than itself, # so this signifies the end of the section. reader.unshift this_line if section_lines.any? && section_lines.last.match(REGEXP[:anchor]) # Put back the anchor that came before this new-section line # on which we're bailing. reader.unshift section_lines.pop end break else section_lines << this_line section_lines << reader.get_line unless is_single_line_section_heading?(this_line) end elsif this_line.match(REGEXP[:listing]) section_lines << this_line section_lines.concat reader.grab_lines_until {|line| line.match( REGEXP[:listing] ) } # Also grab the last line, if there is one this_line = reader.get_line section_lines << this_line unless this_line.nil? else section_lines << this_line end end section_reader = Reader.new(section_lines) # Now parse section_lines into Blocks belonging to the current Section while section_reader.has_lines? section_reader.skip_blank new_block = next_block(section_reader, section) if section_reader.has_lines? section << new_block unless new_block.nil? end section end end