lib/asciidoctor/lexer.rb in asciidoctor-0.0.5 vs lib/asciidoctor/lexer.rb in asciidoctor-0.0.6
- old
+ new
@@ -26,10 +26,11 @@
def self.next_block(reader, parent = self)
# Skip ahead to the block content
reader.skip_blank
return nil unless reader.has_lines?
+ context = parent.is_a?(Block) ? parent.context : nil
# NOTE: An anchor looks like this:
# [[foo]]
# with the inside [foo] (including brackets) as match[1]
if match = reader.peek_line.match(REGEXP[:anchor])
@@ -43,10 +44,15 @@
reader.get_line
else
anchor = nil
end
+ # skip a list continuation character if we're processing a list
+ if LIST_CONTEXTS.include?(context)
+ reader.skip_list_continuation
+ end
+
Asciidoctor.debug "/"*64
Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__} - First two lines are:"
Asciidoctor.debug reader.peek_line
tmp_line = reader.get_line
Asciidoctor.debug reader.peek_line
@@ -54,48 +60,50 @@
Asciidoctor.debug "/"*64
block = nil
title = nil
caption = nil
- source_type = nil
buffer = []
+ attributes = {}
+ context = parent.is_a?(Block) ? parent.context : nil
while reader.has_lines? && block.nil?
buffer.clear
this_line = reader.get_line
next_line = reader.peek_line || ''
if this_line.match(REGEXP[:comment_blk])
Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:comment_blk] ) })
- next
elsif this_line.match(REGEXP[:comment])
- next
-
- elsif match = this_line.match(REGEXP[:title])
- title = match[1]
reader.skip_blank
- elsif match = this_line.match(REGEXP[:listing_source])
- source_type = match[1]
+ elsif match = this_line.match(REGEXP[:attr_list_blk])
+ collect_attributes(match[1], attributes)
reader.skip_blank
- elsif match = this_line.match(REGEXP[:caption])
- caption = match[1]
-
elsif is_section_heading?(this_line, next_line)
# If we've come to a new section, then we've found the end of this
# current block. Likewise if we'd found an unassigned anchor, push
- # it back as well, so it can go with this next heading.
- # NOTE - I don't think this will assign the anchor properly. Anchors
- # only match with double brackets - [[foo]], but what's stored in
- # `anchor` at this point is only the `foo` part that was stripped out
- # after matching. TODO: Need a way to test this.
+ #
+ # FIXME when slurping up next section, give back trailing anchor to following section
reader.unshift(this_line)
- reader.unshift(anchor) unless anchor.nil?
Asciidoctor.debug "#{__method__}: SENDING to next_section with lines[0] = #{reader.peek_line}"
block = next_section(reader, parent)
+ elsif match = this_line.match(REGEXP[:title])
+ title = match[1]
+ reader.skip_blank
+
+ elsif match = this_line.match(REGEXP[:image_blk])
+ collect_attributes(match[2], attributes, ['alt', 'width', 'height'])
+ block = Block.new(parent, :image)
+ # FIXME this seems kind of one-off here
+ target = block.sub_attributes(match[1])
+ attributes['target'] = target
+ attributes['alt'] ||= File.basename(target, File.extname(target))
+ reader.skip_blank
+
elsif this_line.match(REGEXP[:oblock])
# oblock is surrounded by '--' lines and has zero or more blocks inside
buffer = Reader.new(reader.grab_lines_until { |line| line.match(REGEXP[:oblock]) })
# Strip lines off end of block - not implemented yet
@@ -107,29 +115,38 @@
while buffer.has_lines?
new_block = next_block(buffer, block)
block.blocks << new_block unless new_block.nil?
end
+ # needs to come before list detection
+ elsif this_line.match(REGEXP[:sidebar_blk])
+ # sidebar is surrounded by '****' (4 or more '*' chars) lines
+ # FIXME violates DRY because it's a duplication of quote parsing
+ block = Block.new(parent, :sidebar)
+ buffer = Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:sidebar_blk] ) })
+
+ while buffer.has_lines?
+ new_block = next_block(buffer, block)
+ block.blocks << new_block unless new_block.nil?
+ end
+
elsif list_type = [:olist, :colist].detect{|l| this_line.match( REGEXP[l] )}
items = []
Asciidoctor.debug "Creating block of type: #{list_type}"
block = Block.new(parent, list_type)
+ attributes['style'] ||= 'arabic'
while !this_line.nil? && match = this_line.match(REGEXP[list_type])
- item = ListItem.new
+ item = ListItem.new(block)
reader.unshift match[2].lstrip.sub(/^\./, '\.')
item_segment = Reader.new(list_item_segment(reader, :alt_ending => REGEXP[list_type]))
while item_segment.has_lines?
new_block = next_block(item_segment, block)
item.blocks << new_block unless new_block.nil?
end
- if item.blocks.any? &&
- item.blocks.first.is_a?(Block) &&
- (item.blocks.first.context == :paragraph || item.blocks.first.context == :literal)
- item.content = item.blocks.shift.buffer.map{|l| l.strip}.join("\n")
- end
+ item.fold_first
items << item
reader.skip_blank
@@ -138,121 +155,173 @@
reader.unshift(this_line) unless this_line.nil?
block.buffer = items
elsif match = this_line.match(REGEXP[:ulist])
-
reader.unshift(this_line)
block = build_ulist(reader, parent)
elsif match = this_line.match(REGEXP[:dlist])
+ # TODO build_dlist method?
pairs = []
block = Block.new(parent, :dlist)
+ # allows us to capture until we find a labeled item using the same delimiter (::, :::, :::: or ;;)
+ sibling_matcher = REGEXP[:dlist_siblings][match[3]]
- this_dlist = Regexp.new(/^#{match[1]}(.*)#{match[3]}\s*$/)
+ begin
+ dt = ListItem.new(block, match[2])
+ dt.anchor = match[1] unless match[1].nil?
+ dd = ListItem.new(block, match[5])
- while !this_line.nil? && match = this_line.match(this_dlist)
- if anchor = match[1].match( /\[\[([^\]]+)\]\]/ )
- dt = ListItem.new( $` + $' )
- dt.anchor = anchor[1]
- else
- dt = ListItem.new( match[1] )
- end
- dd = ListItem.new
- # workaround eg. git-config OPTIONS --get-colorbool
- reader.get_line if reader.has_lines? && reader.peek_line.strip.empty?
-
- dd_segment = Reader.new(list_item_segment(reader, :alt_ending => this_dlist))
+ dd_segment = Reader.new(list_item_segment(reader, :alt_ending => sibling_matcher))
while dd_segment.has_lines?
new_block = next_block(dd_segment, block)
dd.blocks << new_block unless new_block.nil?
end
- if dd.blocks.any? &&
- dd.blocks.first.is_a?(Block) &&
- (dd.blocks.first.context == :paragraph || dd.blocks.first.context == :literal)
- dd.content = dd.blocks.shift.buffer.map{|l| l.strip}.join("\n")
- end
+ dd.fold_first
pairs << [dt, dd]
+ # this skip_blank might be redundant
reader.skip_blank
-
this_line = reader.get_line
- end
+ end while !this_line.nil? && match = this_line.match(sibling_matcher)
+
reader.unshift(this_line) unless this_line.nil?
block.buffer = pairs
+
+ # FIXME violates DRY because it's a duplication of other block parsing
+ elsif this_line.match(REGEXP[:example])
+ # example is surrounded by lines with 4 or more '=' chars
+ rekey_positional_attributes(attributes, ['style'])
+ if admonition_style = ADMONITION_STYLES.detect {|s| attributes['style'] == s}
+ block = Block.new(parent, :admonition)
+ attributes['name'] = admonition_style.downcase
+ attributes['caption'] ||= admonition_style.capitalize
+ else
+ block = Block.new(parent, :example)
+ end
+ buffer = Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:example] ) })
- elsif this_line.match(REGEXP[:verse])
- # verse is preceded by [verse] and lasts until a blank line
- buffer = reader.grab_lines_until(:break_on_blank_lines => true)
- block = Block.new(parent, :verse, buffer)
+ while buffer.has_lines?
+ new_block = next_block(buffer, block)
+ block.blocks << new_block unless new_block.nil?
+ end
- elsif this_line.match(REGEXP[:note])
- # note is an admonition preceded by [NOTE] and lasts until a blank line
- buffer = reader.grab_lines_until(:break_on_blank_lines => true)
- block = Block.new(parent, :note, buffer)
+ # FIXME violates DRY w/ non-delimited block listing
+ elsif this_line.match(REGEXP[:listing])
+ rekey_positional_attributes(attributes, ['style', 'language', 'linenums'])
+ buffer = reader.grab_lines_until {|line| line.match( REGEXP[:listing] )}
+ buffer.last.chomp! unless buffer.empty?
+ block = Block.new(parent, :listing, buffer)
- elsif block_type = [:listing, :example].detect{|t| this_line.match( REGEXP[t] )}
- buffer = reader.grab_lines_until {|line| line.match( REGEXP[block_type] )}
- block = Block.new(parent, block_type, buffer)
-
- elsif this_line.match( REGEXP[:quote] )
- block = Block.new(parent, :quote)
+ elsif this_line.match(REGEXP[:quote])
+ # multi-line verse or quote is surrounded by a block delimiter
+ rekey_positional_attributes(attributes, ['style', 'attribution', 'citetitle'])
+ quote_context = (attributes['style'] == 'verse' ? :verse : :quote)
buffer = Reader.new(reader.grab_lines_until {|line| line.match( REGEXP[:quote] ) })
- while buffer.has_lines?
- new_block = next_block(buffer, block)
- block.blocks << new_block unless new_block.nil?
+ # only quote can have other section elements (as as section block)
+ section_body = (quote_context == :quote)
+
+ if section_body
+ block = Block.new(parent, quote_context)
+ while buffer.has_lines?
+ new_block = next_block(buffer, block)
+ block.blocks << new_block unless new_block.nil?
+ end
+ else
+ block = Block.new(parent, quote_context, buffer.lines)
end
elsif this_line.match(REGEXP[:lit_blk])
# example is surrounded by '....' (4 or more '.' chars) lines
buffer = reader.grab_lines_until {|line| line.match( REGEXP[:lit_blk] ) }
+ buffer.last.chomp! unless buffer.empty?
block = Block.new(parent, :literal, buffer)
elsif this_line.match(REGEXP[:lit_par])
# literal paragraph is contiguous lines starting with
# one or more space or tab characters
# So we need to actually include this one in the grab_lines group
reader.unshift this_line
- buffer = reader.grab_lines_until(:preserve_last_line => true) {|line| ! line.match( REGEXP[:lit_par] ) }
+ buffer = reader.grab_lines_until(:preserve_last_line => true) {|line|
+ (context == :dlist && line.match(REGEXP[:dlist])) || !line.match(REGEXP[:lit_par])
+ }
+ # trim off the indentation that put us in this literal paragraph
+ if !buffer.empty? && match = buffer.first.match(/^([[:blank:]]+)/)
+ offset = match[1].length
+ buffer = buffer.map {|l| l.slice(offset..-1)}
+ buffer.last.chomp!
+ end
+
block = Block.new(parent, :literal, buffer)
- elsif this_line.match(REGEXP[:sidebar_blk])
- # example is surrounded by '****' (4 or more '*' chars) lines
- buffer = reader.grab_lines_until {|line| line.match( REGEXP[:sidebar_blk] ) }
- block = Block.new(parent, :sidebar, buffer)
+ ## these switches based on style need to come immediately before the else ##
+ elsif attributes[0] == 'source'
+ rekey_positional_attributes(attributes, ['style', 'language', 'linenums'])
+ reader.unshift(this_line)
+ buffer = reader.grab_lines_until(:break_on_blank_lines => true)
+ buffer.last.chomp! unless buffer.empty?
+ block = Block.new(parent, :listing, buffer)
+
+ elsif admonition_style = ADMONITION_STYLES.detect{|s| attributes[0] == s}
+ # an admonition preceded by [*TYPE*] and lasts until a blank line
+ reader.unshift(this_line)
+ buffer = reader.grab_lines_until(:break_on_blank_lines => true)
+ block = Block.new(parent, :admonition, buffer)
+ attributes['style'] = admonition_style
+ attributes['name'] = admonition_style.downcase
+ attributes['caption'] ||= admonition_style.capitalize
+
+ elsif quote_context = [:quote, :verse].detect{|s| attributes[0] == s.to_s}
+ # single-paragraph verse or quote is preceded by [verse] or [quote], respectively, and lasts until a blank line
+ rekey_positional_attributes(attributes, ['style', 'attribution', 'citetitle'])
+ reader.unshift(this_line)
+ buffer = reader.grab_lines_until(:break_on_blank_lines => true)
+ block = Block.new(parent, quote_context, buffer)
+
else
# paragraph is contiguous nonblank/noncontinuation lines
- while !this_line.nil? && !this_line.strip.empty?
- if this_line.match( REGEXP[:listing] ) || this_line.match( REGEXP[:oblock] )
- reader.unshift this_line
- break
- end
- buffer << this_line
- this_line = reader.get_line
+ reader.unshift this_line
+ buffer = reader.grab_lines_until(:break_on_blank_lines => true, :preserve_last_line => true) {|line|
+ (context == :dlist && line.match(REGEXP[:dlist])) ||
+ ([:ulist, :olist, :dlist].include?(context) && line.chomp == LIST_CONTINUATION) ||
+ line.match(REGEXP[:oblock])
+ }
+
+ if LIST_CONTEXTS.include?(context)
+ reader.skip_list_continuation
end
- if buffer.any? && admonition = buffer.first.match(/^NOTE:\s*/)
+ if !buffer.empty? && admonition = buffer.first.match(Regexp.new('^(' + ADMONITION_STYLES.join('|') + '):\s+'))
buffer[0] = admonition.post_match
- block = Block.new(parent, :note, buffer)
- elsif source_type
- block = Block.new(parent, :listing, buffer)
+ block = Block.new(parent, :admonition, buffer)
+ attributes['style'] = admonition[1]
+ attributes['name'] = admonition[1].downcase
+ attributes['caption'] ||= admonition[1].capitalize
else
+ buffer.last.chomp! unless buffer.empty?
Asciidoctor.debug "Proud parent #{parent} getting a new paragraph with buffer: #{buffer}"
block = Block.new(parent, :paragraph, buffer)
end
end
end
- block.anchor ||= anchor
- block.title ||= title
- block.caption ||= caption
+ # when looking for nested content, a series of
+ # line comments or a comment block could leave us
+ # without a block
+ if !block.nil?
+ block.anchor ||= (anchor || attributes['id'])
+ block.title ||= title
+ block.caption ||= caption
+ block.update_attributes(attributes)
+ end
block
end
# Private: Return the Array of lines constituting the next list item
@@ -350,11 +419,11 @@
segment << this_line
end
Asciidoctor.debug "*"*40
Asciidoctor.debug "#{File.basename(__FILE__)}:#{__LINE__} -> #{__method__}: Returning this:"
- Asciidoctor.debug segment.inspect
+ #Asciidoctor.debug segment.inspect
Asciidoctor.debug "*"*10
Asciidoctor.debug "Leaving #{__method__}: Top of reader queue is:"
Asciidoctor.debug reader.peek_line
Asciidoctor.debug "*"*40
segment
@@ -381,15 +450,16 @@
return nil
end
level = match[1].length
- list_item = ListItem.new
+ list_item = ListItem.new(block)
list_item.level = level
Asciidoctor.debug "#{__FILE__}:#{__LINE__}: Created ListItem #{list_item} with match[2]: #{match[2]} and level: #{list_item.level}"
- # Prevent bullet list text starting with . from being treated as a paragraph
+ # Restore first line of list item
+ # Also prevent bullet list text starting with . from being treated as a paragraph
# title or some other unseemly thing in list_item_segment. I think. (NOTE)
reader.unshift match[2].lstrip.sub(/^\./, '\.')
item_segment = Reader.new(list_item_segment(reader, :alt_ending => REGEXP[list_type]))
# item_segment = list_item_segment(reader)
@@ -398,16 +468,11 @@
list_item.blocks << new_block unless new_block.nil?
end
Asciidoctor.debug "\n\nlist_item has #{list_item.blocks.count} blocks, and first is a #{list_item.blocks.first.class} with context #{list_item.blocks.first.context rescue 'n/a'}\n\n"
- first_block = list_item.blocks.first
- if first_block.is_a?(Block) &&
- (first_block.context == :paragraph || first_block.context == :literal)
- list_item.content = first_block.buffer.map{|l| l.strip}.join("\n")
- list_item.blocks.shift
- end
+ list_item.fold_first
list_item
end
def self.build_ulist(reader, parent = nil)
@@ -421,19 +486,22 @@
this_item_level = match[1].length
if first_item_level && first_item_level < this_item_level
# If this next :uline level is down one from the
- # current Block's, put it in a Block of its own
- list_item = next_block(reader, block)
+ # current Block's, append it to content of the current list item
+ items.last.blocks << next_block(reader, block)
+ elsif first_item_level && first_item_level > this_item_level
+ break
else
list_item = build_ulist_item(reader, block, match)
# Set the base item level for this Block
first_item_level ||= list_item.level
end
- items << list_item
+ items << list_item unless list_item.nil?
+ list_item = nil
reader.skip_blank
end
block.buffer = items
@@ -449,27 +517,22 @@
this_line = lines.shift
while this_line && match = this_line.match(REGEXP[list_type])
level = match[1].length
- list_item = ListItem.new
+ list_item = ListItem.new(block)
list_item.level = level
Asciidoctor.debug "Created ListItem #{list_item} with match[2]: #{match[2]} and level: #{list_item.level}"
lines.unshift match[2].lstrip.sub(/^\./, '\.')
item_segment = list_item_segment(lines, :alt_ending => REGEXP[list_type], :list_level => level)
while item_segment.any?
new_block = next_block(item_segment, block)
list_item.blocks << new_block unless new_block.nil?
end
- first_block = list_item.blocks.first
- if first_block.is_a?(Block) &&
- (first_block.context == :paragraph || first_block.context == :literal)
- list_item.content = first_block.buffer.map{|l| l.strip}.join("\n")
- list_item.blocks.shift
- end
+ list_item.fold_first
if items.any? && (level > items.last.level)
Asciidoctor.debug "--> Putting this new level #{level} ListItem under my pops, #{items.last} (level: #{items.last.level})"
items.last.blocks << list_item
else
@@ -488,10 +551,36 @@
block.buffer = items
block
end
+ def self.collect_attributes(attrs, attributes, posattrs = [])
+ # TODO walk be properly rather than using split
+ attrs.split(/\s*,\s*/).each_with_index do |entry, i|
+ key, val = entry.split(/\s*=\s*/)
+ if !val.nil?
+ val.gsub!(/^(['"])(.*)\1$/, '\2') unless val.nil?
+ attributes[key] = val
+ else
+ attributes[i] = key
+ # positional attribute has a known key
+ if posattrs.size >= (i + 1)
+ attributes[posattrs[i]] = key
+ end
+ end
+ end
+ end
+
+ def self.rekey_positional_attributes(attributes, posattrs)
+ posattrs.each_with_index do |key, i|
+ val = attributes[i]
+ if !val.nil?
+ attributes[key] = val
+ end
+ end
+ end
+
# Private: Get the Integer section level based on the characters
# used in the ASCII line under the section name.
#
# line - the String line from under the section name.
def self.section_level(line)
@@ -515,11 +604,12 @@
end
def self.is_two_line_section_heading?(line1, line2)
!line1.nil? && !line2.nil? &&
line1.match(REGEXP[:name]) && line2.match(REGEXP[:line]) &&
- (line1.size - line2.size).abs <= 1
+ # chomp so that a (non-visible) endline does not impact calculation
+ (line1.chomp.size - line2.chomp.size).abs <= 1
end
def self.is_section_heading?(line1, line2 = nil)
is_single_line_section_heading?(line1) ||
is_two_line_section_heading?(line1, line2)
@@ -645,16 +735,10 @@
break
else
section_lines << this_line
section_lines << reader.get_line unless is_single_line_section_heading?(this_line)
end
- elsif this_line.match(REGEXP[:listing])
- section_lines << this_line
- section_lines.concat reader.grab_lines_until {|line| line.match( REGEXP[:listing] ) }
- # Also grab the last line, if there is one
- this_line = reader.get_line
- section_lines << this_line unless this_line.nil?
else
section_lines << this_line
end
end
@@ -664,9 +748,22 @@
section_reader.skip_blank
if section_reader.has_lines?
new_block = next_block(section_reader, section)
section << new_block unless new_block.nil?
+ end
+ end
+
+ # detect preamble and push it into a block
+ # QUESTION make this an operation on Section?
+ if section.level == 0
+ blocks = section.blocks.take_while {|b| !b.is_a? Section}
+ if !blocks.empty?
+ # QUESTION Should we propagate the buffer?
+ #preamble = Block.new(section, :preamble, blocks.reduce {|a, b| a.buffer + b.buffer})
+ preamble = Block.new(section, :preamble)
+ blocks.each { preamble << section.delete_at(0) }
+ section.insert(0, preamble)
end
end
section
end