lib/asciidoctor/lexer.rb in asciidoctor-0.0.5 vs lib/asciidoctor/lexer.rb in asciidoctor-0.0.6

- old
+ new

@@ -26,10 +26,11 @@ def self.next_block(reader, parent = self) # Skip ahead to the block content reader.skip_blank return nil unless reader.has_lines? + context = parent.is_a?(Block) ? parent.context : nil # NOTE: An anchor looks like this: # [[foo]] # with the inside [foo] (including brackets) as match[1] if match = reader.peek_line.match(REGEXP[:anchor]) @@ -43,10 +44,15 @@ reader.get_line else anchor = nil end + # skip a list continuation character if we're processing a list + if LIST_CONTEXTS.include?(context) + reader.skip_list_continuation + end + Asciidoctor.debug "/"*64 Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__} - First two lines are:" Asciidoctor.debug reader.peek_line tmp_line = reader.get_line Asciidoctor.debug reader.peek_line @@ -54,48 +60,50 @@ Asciidoctor.debug "/"*64 block = nil title = nil caption = nil - source_type = nil buffer = [] + attributes = {} + context = parent.is_a?(Block) ? parent.context : nil while reader.has_lines? && block.nil? buffer.clear this_line = reader.get_line next_line = reader.peek_line || '' if this_line.match(REGEXP[:comment_blk]) Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:comment_blk] ) }) - next elsif this_line.match(REGEXP[:comment]) - next - - elsif match = this_line.match(REGEXP[:title]) - title = match[1] reader.skip_blank - elsif match = this_line.match(REGEXP[:listing_source]) - source_type = match[1] + elsif match = this_line.match(REGEXP[:attr_list_blk]) + collect_attributes(match[1], attributes) reader.skip_blank - elsif match = this_line.match(REGEXP[:caption]) - caption = match[1] - elsif is_section_heading?(this_line, next_line) # If we've come to a new section, then we've found the end of this # current block. Likewise if we'd found an unassigned anchor, push - # it back as well, so it can go with this next heading. - # NOTE - I don't think this will assign the anchor properly. Anchors - # only match with double brackets - [[foo]], but what's stored in - # `anchor` at this point is only the `foo` part that was stripped out - # after matching. TODO: Need a way to test this. + # + # FIXME when slurping up next section, give back trailing anchor to following section reader.unshift(this_line) - reader.unshift(anchor) unless anchor.nil? Asciidoctor.debug "#{__method__}: SENDING to next_section with lines[0] = #{reader.peek_line}" block = next_section(reader, parent) + elsif match = this_line.match(REGEXP[:title]) + title = match[1] + reader.skip_blank + + elsif match = this_line.match(REGEXP[:image_blk]) + collect_attributes(match[2], attributes, ['alt', 'width', 'height']) + block = Block.new(parent, :image) + # FIXME this seems kind of one-off here + target = block.sub_attributes(match[1]) + attributes['target'] = target + attributes['alt'] ||= File.basename(target, File.extname(target)) + reader.skip_blank + elsif this_line.match(REGEXP[:oblock]) # oblock is surrounded by '--' lines and has zero or more blocks inside buffer = Reader.new(reader.grab_lines_until { |line| line.match(REGEXP[:oblock]) }) # Strip lines off end of block - not implemented yet @@ -107,29 +115,38 @@ while buffer.has_lines? new_block = next_block(buffer, block) block.blocks << new_block unless new_block.nil? end + # needs to come before list detection + elsif this_line.match(REGEXP[:sidebar_blk]) + # sidebar is surrounded by '****' (4 or more '*' chars) lines + # FIXME violates DRY because it's a duplication of quote parsing + block = Block.new(parent, :sidebar) + buffer = Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:sidebar_blk] ) }) + + while buffer.has_lines? + new_block = next_block(buffer, block) + block.blocks << new_block unless new_block.nil? + end + elsif list_type = [:olist, :colist].detect{|l| this_line.match( REGEXP[l] )} items = [] Asciidoctor.debug "Creating block of type: #{list_type}" block = Block.new(parent, list_type) + attributes['style'] ||= 'arabic' while !this_line.nil? && match = this_line.match(REGEXP[list_type]) - item = ListItem.new + item = ListItem.new(block) reader.unshift match[2].lstrip.sub(/^\./, '\.') item_segment = Reader.new(list_item_segment(reader, :alt_ending => REGEXP[list_type])) while item_segment.has_lines? new_block = next_block(item_segment, block) item.blocks << new_block unless new_block.nil? end - if item.blocks.any? && - item.blocks.first.is_a?(Block) && - (item.blocks.first.context == :paragraph || item.blocks.first.context == :literal) - item.content = item.blocks.shift.buffer.map{|l| l.strip}.join("\n") - end + item.fold_first items << item reader.skip_blank @@ -138,121 +155,173 @@ reader.unshift(this_line) unless this_line.nil? block.buffer = items elsif match = this_line.match(REGEXP[:ulist]) - reader.unshift(this_line) block = build_ulist(reader, parent) elsif match = this_line.match(REGEXP[:dlist]) + # TODO build_dlist method? pairs = [] block = Block.new(parent, :dlist) + # allows us to capture until we find a labeled item using the same delimiter (::, :::, :::: or ;;) + sibling_matcher = REGEXP[:dlist_siblings][match[3]] - this_dlist = Regexp.new(/^#{match[1]}(.*)#{match[3]}\s*$/) + begin + dt = ListItem.new(block, match[2]) + dt.anchor = match[1] unless match[1].nil? + dd = ListItem.new(block, match[5]) - while !this_line.nil? && match = this_line.match(this_dlist) - if anchor = match[1].match( /\[\[([^\]]+)\]\]/ ) - dt = ListItem.new( $` + $' ) - dt.anchor = anchor[1] - else - dt = ListItem.new( match[1] ) - end - dd = ListItem.new - # workaround eg. git-config OPTIONS --get-colorbool - reader.get_line if reader.has_lines? && reader.peek_line.strip.empty? - - dd_segment = Reader.new(list_item_segment(reader, :alt_ending => this_dlist)) + dd_segment = Reader.new(list_item_segment(reader, :alt_ending => sibling_matcher)) while dd_segment.has_lines? new_block = next_block(dd_segment, block) dd.blocks << new_block unless new_block.nil? end - if dd.blocks.any? && - dd.blocks.first.is_a?(Block) && - (dd.blocks.first.context == :paragraph || dd.blocks.first.context == :literal) - dd.content = dd.blocks.shift.buffer.map{|l| l.strip}.join("\n") - end + dd.fold_first pairs << [dt, dd] + # this skip_blank might be redundant reader.skip_blank - this_line = reader.get_line - end + end while !this_line.nil? && match = this_line.match(sibling_matcher) + reader.unshift(this_line) unless this_line.nil? block.buffer = pairs + + # FIXME violates DRY because it's a duplication of other block parsing + elsif this_line.match(REGEXP[:example]) + # example is surrounded by lines with 4 or more '=' chars + rekey_positional_attributes(attributes, ['style']) + if admonition_style = ADMONITION_STYLES.detect {|s| attributes['style'] == s} + block = Block.new(parent, :admonition) + attributes['name'] = admonition_style.downcase + attributes['caption'] ||= admonition_style.capitalize + else + block = Block.new(parent, :example) + end + buffer = Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:example] ) }) - elsif this_line.match(REGEXP[:verse]) - # verse is preceded by [verse] and lasts until a blank line - buffer = reader.grab_lines_until(:break_on_blank_lines => true) - block = Block.new(parent, :verse, buffer) + while buffer.has_lines? + new_block = next_block(buffer, block) + block.blocks << new_block unless new_block.nil? + end - elsif this_line.match(REGEXP[:note]) - # note is an admonition preceded by [NOTE] and lasts until a blank line - buffer = reader.grab_lines_until(:break_on_blank_lines => true) - block = Block.new(parent, :note, buffer) + # FIXME violates DRY w/ non-delimited block listing + elsif this_line.match(REGEXP[:listing]) + rekey_positional_attributes(attributes, ['style', 'language', 'linenums']) + buffer = reader.grab_lines_until {|line| line.match( REGEXP[:listing] )} + buffer.last.chomp! unless buffer.empty? + block = Block.new(parent, :listing, buffer) - elsif block_type = [:listing, :example].detect{|t| this_line.match( REGEXP[t] )} - buffer = reader.grab_lines_until {|line| line.match( REGEXP[block_type] )} - block = Block.new(parent, block_type, buffer) - - elsif this_line.match( REGEXP[:quote] ) - block = Block.new(parent, :quote) + elsif this_line.match(REGEXP[:quote]) + # multi-line verse or quote is surrounded by a block delimiter + rekey_positional_attributes(attributes, ['style', 'attribution', 'citetitle']) + quote_context = (attributes['style'] == 'verse' ? :verse : :quote) buffer = Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:quote] ) }) - while buffer.has_lines? - new_block = next_block(buffer, block) - block.blocks << new_block unless new_block.nil? + # only quote can have other section elements (as as section block) + section_body = (quote_context == :quote) + + if section_body + block = Block.new(parent, quote_context) + while buffer.has_lines? + new_block = next_block(buffer, block) + block.blocks << new_block unless new_block.nil? + end + else + block = Block.new(parent, quote_context, buffer.lines) end elsif this_line.match(REGEXP[:lit_blk]) # example is surrounded by '....' (4 or more '.' chars) lines buffer = reader.grab_lines_until {|line| line.match( REGEXP[:lit_blk] ) } + buffer.last.chomp! unless buffer.empty? block = Block.new(parent, :literal, buffer) elsif this_line.match(REGEXP[:lit_par]) # literal paragraph is contiguous lines starting with # one or more space or tab characters # So we need to actually include this one in the grab_lines group reader.unshift this_line - buffer = reader.grab_lines_until(:preserve_last_line => true) {|line| ! line.match( REGEXP[:lit_par] ) } + buffer = reader.grab_lines_until(:preserve_last_line => true) {|line| + (context == :dlist && line.match(REGEXP[:dlist])) || !line.match(REGEXP[:lit_par]) + } + # trim off the indentation that put us in this literal paragraph + if !buffer.empty? && match = buffer.first.match(/^([[:blank:]]+)/) + offset = match[1].length + buffer = buffer.map {|l| l.slice(offset..-1)} + buffer.last.chomp! + end + block = Block.new(parent, :literal, buffer) - elsif this_line.match(REGEXP[:sidebar_blk]) - # example is surrounded by '****' (4 or more '*' chars) lines - buffer = reader.grab_lines_until {|line| line.match( REGEXP[:sidebar_blk] ) } - block = Block.new(parent, :sidebar, buffer) + ## these switches based on style need to come immediately before the else ## + elsif attributes[0] == 'source' + rekey_positional_attributes(attributes, ['style', 'language', 'linenums']) + reader.unshift(this_line) + buffer = reader.grab_lines_until(:break_on_blank_lines => true) + buffer.last.chomp! unless buffer.empty? + block = Block.new(parent, :listing, buffer) + + elsif admonition_style = ADMONITION_STYLES.detect{|s| attributes[0] == s} + # an admonition preceded by [*TYPE*] and lasts until a blank line + reader.unshift(this_line) + buffer = reader.grab_lines_until(:break_on_blank_lines => true) + block = Block.new(parent, :admonition, buffer) + attributes['style'] = admonition_style + attributes['name'] = admonition_style.downcase + attributes['caption'] ||= admonition_style.capitalize + + elsif quote_context = [:quote, :verse].detect{|s| attributes[0] == s.to_s} + # single-paragraph verse or quote is preceded by [verse] or [quote], respectively, and lasts until a blank line + rekey_positional_attributes(attributes, ['style', 'attribution', 'citetitle']) + reader.unshift(this_line) + buffer = reader.grab_lines_until(:break_on_blank_lines => true) + block = Block.new(parent, quote_context, buffer) + else # paragraph is contiguous nonblank/noncontinuation lines - while !this_line.nil? && !this_line.strip.empty? - if this_line.match( REGEXP[:listing] ) || this_line.match( REGEXP[:oblock] ) - reader.unshift this_line - break - end - buffer << this_line - this_line = reader.get_line + reader.unshift this_line + buffer = reader.grab_lines_until(:break_on_blank_lines => true, :preserve_last_line => true) {|line| + (context == :dlist && line.match(REGEXP[:dlist])) || + ([:ulist, :olist, :dlist].include?(context) && line.chomp == LIST_CONTINUATION) || + line.match(REGEXP[:oblock]) + } + + if LIST_CONTEXTS.include?(context) + reader.skip_list_continuation end - if buffer.any? && admonition = buffer.first.match(/^NOTE:\s*/) + if !buffer.empty? && admonition = buffer.first.match(Regexp.new('^(' + ADMONITION_STYLES.join('|') + '):\s+')) buffer[0] = admonition.post_match - block = Block.new(parent, :note, buffer) - elsif source_type - block = Block.new(parent, :listing, buffer) + block = Block.new(parent, :admonition, buffer) + attributes['style'] = admonition[1] + attributes['name'] = admonition[1].downcase + attributes['caption'] ||= admonition[1].capitalize else + buffer.last.chomp! unless buffer.empty? Asciidoctor.debug "Proud parent #{parent} getting a new paragraph with buffer: #{buffer}" block = Block.new(parent, :paragraph, buffer) end end end - block.anchor ||= anchor - block.title ||= title - block.caption ||= caption + # when looking for nested content, a series of + # line comments or a comment block could leave us + # without a block + if !block.nil? + block.anchor ||= (anchor || attributes['id']) + block.title ||= title + block.caption ||= caption + block.update_attributes(attributes) + end block end # Private: Return the Array of lines constituting the next list item @@ -350,11 +419,11 @@ segment << this_line end Asciidoctor.debug "*"*40 Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__}: Returning this:" - Asciidoctor.debug segment.inspect + #Asciidoctor.debug segment.inspect Asciidoctor.debug "*"*10 Asciidoctor.debug "Leaving #{__method__}: Top of reader queue is:" Asciidoctor.debug reader.peek_line Asciidoctor.debug "*"*40 segment @@ -381,15 +450,16 @@ return nil end level = match[1].length - list_item = ListItem.new + list_item = ListItem.new(block) list_item.level = level Asciidoctor.debug "#{__FILE__}:#{__LINE__}: Created ListItem #{list_item} with match[2]: #{match[2]} and level: #{list_item.level}" - # Prevent bullet list text starting with . from being treated as a paragraph + # Restore first line of list item + # Also prevent bullet list text starting with . from being treated as a paragraph # title or some other unseemly thing in list_item_segment. I think. (NOTE) reader.unshift match[2].lstrip.sub(/^\./, '\.') item_segment = Reader.new(list_item_segment(reader, :alt_ending => REGEXP[list_type])) # item_segment = list_item_segment(reader) @@ -398,16 +468,11 @@ list_item.blocks << new_block unless new_block.nil? end Asciidoctor.debug "\n\nlist_item has #{list_item.blocks.count} blocks, and first is a #{list_item.blocks.first.class} with context #{list_item.blocks.first.context rescue 'n/a'}\n\n" - first_block = list_item.blocks.first - if first_block.is_a?(Block) && - (first_block.context == :paragraph || first_block.context == :literal) - list_item.content = first_block.buffer.map{|l| l.strip}.join("\n") - list_item.blocks.shift - end + list_item.fold_first list_item end def self.build_ulist(reader, parent = nil) @@ -421,19 +486,22 @@ this_item_level = match[1].length if first_item_level && first_item_level < this_item_level # If this next :uline level is down one from the - # current Block's, put it in a Block of its own - list_item = next_block(reader, block) + # current Block's, append it to content of the current list item + items.last.blocks << next_block(reader, block) + elsif first_item_level && first_item_level > this_item_level + break else list_item = build_ulist_item(reader, block, match) # Set the base item level for this Block first_item_level ||= list_item.level end - items << list_item + items << list_item unless list_item.nil? + list_item = nil reader.skip_blank end block.buffer = items @@ -449,27 +517,22 @@ this_line = lines.shift while this_line && match = this_line.match(REGEXP[list_type]) level = match[1].length - list_item = ListItem.new + list_item = ListItem.new(block) list_item.level = level Asciidoctor.debug "Created ListItem #{list_item} with match[2]: #{match[2]} and level: #{list_item.level}" lines.unshift match[2].lstrip.sub(/^\./, '\.') item_segment = list_item_segment(lines, :alt_ending => REGEXP[list_type], :list_level => level) while item_segment.any? new_block = next_block(item_segment, block) list_item.blocks << new_block unless new_block.nil? end - first_block = list_item.blocks.first - if first_block.is_a?(Block) && - (first_block.context == :paragraph || first_block.context == :literal) - list_item.content = first_block.buffer.map{|l| l.strip}.join("\n") - list_item.blocks.shift - end + list_item.fold_first if items.any? && (level > items.last.level) Asciidoctor.debug "--> Putting this new level #{level} ListItem under my pops, #{items.last} (level: #{items.last.level})" items.last.blocks << list_item else @@ -488,10 +551,36 @@ block.buffer = items block end + def self.collect_attributes(attrs, attributes, posattrs = []) + # TODO walk be properly rather than using split + attrs.split(/\s*,\s*/).each_with_index do |entry, i| + key, val = entry.split(/\s*=\s*/) + if !val.nil? + val.gsub!(/^(['"])(.*)\1$/, '\2') unless val.nil? + attributes[key] = val + else + attributes[i] = key + # positional attribute has a known key + if posattrs.size >= (i + 1) + attributes[posattrs[i]] = key + end + end + end + end + + def self.rekey_positional_attributes(attributes, posattrs) + posattrs.each_with_index do |key, i| + val = attributes[i] + if !val.nil? + attributes[key] = val + end + end + end + # Private: Get the Integer section level based on the characters # used in the ASCII line under the section name. # # line - the String line from under the section name. def self.section_level(line) @@ -515,11 +604,12 @@ end def self.is_two_line_section_heading?(line1, line2) !line1.nil? && !line2.nil? && line1.match(REGEXP[:name]) && line2.match(REGEXP[:line]) && - (line1.size - line2.size).abs <= 1 + # chomp so that a (non-visible) endline does not impact calculation + (line1.chomp.size - line2.chomp.size).abs <= 1 end def self.is_section_heading?(line1, line2 = nil) is_single_line_section_heading?(line1) || is_two_line_section_heading?(line1, line2) @@ -645,16 +735,10 @@ break else section_lines << this_line section_lines << reader.get_line unless is_single_line_section_heading?(this_line) end - elsif this_line.match(REGEXP[:listing]) - section_lines << this_line - section_lines.concat reader.grab_lines_until {|line| line.match( REGEXP[:listing] ) } - # Also grab the last line, if there is one - this_line = reader.get_line - section_lines << this_line unless this_line.nil? else section_lines << this_line end end @@ -664,9 +748,22 @@ section_reader.skip_blank if section_reader.has_lines? new_block = next_block(section_reader, section) section << new_block unless new_block.nil? + end + end + + # detect preamble and push it into a block + # QUESTION make this an operation on Section? + if section.level == 0 + blocks = section.blocks.take_while {|b| !b.is_a? Section} + if !blocks.empty? + # QUESTION Should we propagate the buffer? + #preamble = Block.new(section, :preamble, blocks.reduce {|a, b| a.buffer + b.buffer}) + preamble = Block.new(section, :preamble) + blocks.each { preamble << section.delete_at(0) } + section.insert(0, preamble) end end section end