lib/asciidoctor/lexer.rb in asciidoctor-0.1.0 vs lib/asciidoctor/lexer.rb in asciidoctor-0.1.1

- old
+ new

@@ -1,5 +1,6 @@ +module Asciidoctor # Public: Methods to parse lines of AsciiDoc into an object hierarchy # representing the structure of the document. All methods are class methods and # should be invoked from the Lexer class. The main entry point is ::next_block. # No Lexer instances shall be discovered running around. (Any attempt to # instantiate a Lexer will be futile). @@ -18,13 +19,13 @@ # doc = Document.new # reader = Reader.new lines # block = Lexer.next_block(reader, doc) # block.class # # => Asciidoctor::Block -class Asciidoctor::Lexer +class Lexer - include Asciidoctor + BlockMatchData = Struct.new(:name, :tip, :terminator) # Public: Make sure the Lexer object doesn't get initialized. # # Raises RuntimeError if this constructor is invoked. def initialize @@ -38,32 +39,64 @@ # proceeds to iterate through the lines in the Reader, parsing the document # into nested Sections and Blocks. # # reader - the Reader holding the source lines of the document # document - the empty Document into which the lines will be parsed + # options - a Hash of options to control processing # # returns the Document object - def self.parse(reader, document) - # process and plow away any attribute lines that proceed the first block so - # we can get at the document title, if present, then begin parsing blocks - reader.skip_blank_lines - attributes = parse_block_metadata_lines(reader, document) + def self.parse(reader, document, options = {}) + block_attributes = parse_document_header(reader, document) - # by processing the header here, we enforce its position at head of the document - next_level = is_next_line_section? reader, attributes - if next_level == 0 - title_info = parse_section_title(reader) - document.title = title_info[1] - parse_header_metadata(reader, document) + unless options[:header_only] + while reader.has_more_lines? + new_section, block_attributes = next_section(reader, document, block_attributes) + document << new_section unless new_section.nil? + end end - while reader.has_lines? - new_section, attributes = next_section(reader, document, attributes) - document << new_section unless new_section.nil? + document + end + + # Public: Parses the document header of the AsciiDoc source read from the Reader + # + # Reads the AsciiDoc source from the Reader until the end of the document + # header is reached. The Document object is populated with information from + # the header (document title, document attributes, etc). The document + # attributes are then saved to establish a save point to which to rollback + # after parsing is complete. + # + # This method assumes that there are no blank lines at the start of the document, + # which are automatically removed by the reader. + # + # returns the Hash of orphan block attributes captured above the header + def self.parse_document_header(reader, document) + # capture any lines of block-level metadata and plow away any comment lines + # that precede first block + block_attributes = parse_block_metadata_lines(reader, document) + + # check if the first line is the document title + # if so, add a header to the document and parse the header metadata + if is_next_line_document_title?(reader, block_attributes) + document.id, document.title, _, _ = parse_section_title(reader) + # QUESTION: should this be encapsulated in document? + if document.id.nil? && block_attributes.has_key?('id') + document.id = block_attributes.delete('id') + end + parse_header_metadata(reader, document) end - document + if document.attributes.has_key? 'doctitle' + document.title = document.attributes['doctitle'] + end + + document.clear_playback_attributes block_attributes + document.save_attributes + + # NOTE these are the block-level attributes (not document attributes) that + # precede the first line of content (document title, first section or first block) + block_attributes end # Public: Return the next section from the Reader. # # This method process block metadata, content and subsections for this @@ -143,22 +176,22 @@ # 2. then look for a section, recurse if found # 3. then process blocks # # We have to parse all the metadata lines before continuing with the loop, # otherwise subsequent metadata lines get interpreted as block content - while reader.has_lines? + while reader.has_more_lines? parse_block_metadata_lines(reader, section, attributes) next_level = is_next_line_section? reader, attributes if next_level doctype = parent.document.doctype if next_level == 0 && doctype != 'book' - puts "asciidoctor: ERROR: only book doctypes can contain level 0 sections" + puts "asciidoctor: ERROR: line #{reader.lineno + 1}: only book doctypes can contain level 0 sections" end if next_level > current_level || (section.is_a?(Document) && next_level == 0) unless expected_next_levels.nil? || expected_next_levels.include?(next_level) - puts "asciidoctor: WARNING: section title out of sequence: " + + puts "asciidoctor: WARNING: line #{reader.lineno + 1}: section title out of sequence: " + "expected #{expected_next_levels.size > 1 ? 'levels' : 'level'} #{expected_next_levels * ' or '}, " + "got level #{next_level}" end # the attributes returned are those that are orphaned new_section, attributes = next_section(reader, section, attributes) @@ -208,27 +241,24 @@ # parent - The Document, Section or Block to which the next block belongs # # Returns a Section or Block object holding the parsed content of the processed lines def self.next_block(reader, parent, attributes = {}, options = {}) # Skip ahead to the block content - skipped = reader.skip_blank + skipped = reader.skip_blank_lines # bail if we've reached the end of the section content - return nil unless reader.has_lines? + return nil unless reader.has_more_lines? if options[:text] && skipped > 0 options.delete(:text) end - Asciidoctor.debug { + Debug.debug { msg = [] msg << '/' * 64 msg << 'next_block() - First two lines are:' - msg << reader.peek_line - tmp_line = reader.get_line - msg << reader.peek_line - reader.unshift tmp_line + msg.concat reader.peek_lines(2) msg << '/' * 64 msg * "\n" } parse_metadata = options[:parse_metadata] || true @@ -236,121 +266,115 @@ document = parent.document context = parent.is_a?(Block) ? parent.context : nil block = nil - while reader.has_lines? && block.nil? + while reader.has_more_lines? && block.nil? if parse_metadata && parse_block_metadata_line(reader, document, attributes, options) - reader.next_line + reader.advance next elsif parse_sections && context.nil? && is_next_line_section?(reader, attributes) block, attributes = next_section(reader, parent, attributes) break end this_line = reader.get_line - delimited_blk = delimited_block? this_line + block_context = nil + terminator = nil + if delimited_blk_match = is_delimited_block?(this_line, true) + block_context = delimited_blk_match.name + terminator = delimited_blk_match.terminator + end - # NOTE I've haven't decided whether I want this check here or in - # parse_block_metadata (where it is currently) - #if this_line.match(REGEXP[:comment_blk]) - # reader.grab_lines_until {|line| line.match( REGEXP[:comment_blk] ) } - # reader.skip_blank - # # NOTE we should break here because we have found a block, it - # # just happens to be nil...if we keep going we potentially overrun - # # a section heading which is not processed in this anymore - # break + # NOTE we're letting break lines (ruler, page_break, etc) have attributes + if !options[:text] && block_context.nil? && (match = this_line.match(REGEXP[:break_line])) + block = Block.new(parent, BREAK_LINES[match[0][0..2]]) + reader.skip_blank_lines - # NOTE we're letting ruler have attributes - if !options[:text] && this_line.match(REGEXP[:ruler]) - block = Block.new(parent, :ruler) - reader.skip_blank - - elsif !options[:text] && (match = this_line.match(REGEXP[:image_blk])) + elsif !options[:text] && block_context.nil? && (match = this_line.match(REGEXP[:image_blk])) block = Block.new(parent, :image) AttributeList.new(document.sub_attributes(match[2])).parse_into(attributes, ['alt', 'width', 'height']) target = block.sub_attributes(match[1]) if !target.to_s.empty? attributes['target'] = target document.register(:images, target) attributes['alt'] ||= File.basename(target, File.extname(target)) - # hmmm, this assignment seems like a one-off block.title = attributes['title'] - if block.title? && attributes['caption'].nil? - attributes['caption'] = "Figure #{document.counter('figure-number')}. " + if block.title? && !attributes.has_key?('caption') && !block.attr?('caption') + number = document.counter('figure-number') + attributes['caption'] = "#{document.attributes['figure-caption']} #{number}. " + Document::AttributeEntry.new('figure-number', number).save_to(attributes) end else # drop the line if target resolves to nothing block = nil end - reader.skip_blank + reader.skip_blank_lines - elsif delimited_blk && (match = this_line.match(REGEXP[:open_blk])) + elsif block_context == :open # an open block is surrounded by '--' lines and has zero or more blocks inside - terminator = match[0] buffer = Reader.new reader.grab_lines_until(:terminator => terminator) # Strip lines off end of block - not implemented yet - # while buffer.has_lines? && buffer.last.strip.empty? + # while buffer.has_more_lines? && buffer.last.strip.empty? # buffer.pop # end - block = Block.new(parent, :open) - while buffer.has_lines? + block = Block.new(parent, block_context) + while buffer.has_more_lines? new_block = next_block(buffer, block) block.blocks << new_block unless new_block.nil? end # needs to come before list detection - elsif delimited_blk && (match = this_line.match(REGEXP[:sidebar_blk])) + elsif block_context == :sidebar # sidebar is surrounded by '****' (4 or more '*' chars) lines - terminator = match[0] # FIXME violates DRY because it's a duplication of quote parsing - block = Block.new(parent, :sidebar) + block = Block.new(parent, block_context) buffer = Reader.new reader.grab_lines_until(:terminator => terminator) - while buffer.has_lines? + while buffer.has_more_lines? new_block = next_block(buffer, block) block.blocks << new_block unless new_block.nil? end - elsif match = this_line.match(REGEXP[:colist]) + elsif block_context.nil? && (match = this_line.match(REGEXP[:colist])) block = Block.new(parent, :colist) attributes['style'] = 'arabic' items = [] block.buffer = items - reader.unshift this_line + reader.unshift_line this_line expected_index = 1 begin # might want to move this check to a validate method if match[1].to_i != expected_index - puts "asciidoctor: WARNING: callout list item index: expected #{expected_index} got #{match[1]}" + puts "asciidoctor: WARNING: line #{reader.lineno + 1}: callout list item index: expected #{expected_index} got #{match[1]}" end list_item = next_list_item(reader, block, match) expected_index += 1 if !list_item.nil? items << list_item coids = document.callouts.callout_ids(items.size) if !coids.empty? list_item.attributes['coids'] = coids else - puts 'asciidoctor: WARNING: no callouts refer to list item ' + items.size.to_s + puts "asciidoctor: WARNING: line #{reader.lineno}: no callouts refer to list item #{items.size}" end end - end while reader.has_lines? && match = reader.peek_line.match(REGEXP[:colist]) + end while reader.has_more_lines? && match = reader.peek_line.match(REGEXP[:colist]) document.callouts.next_list - elsif match = this_line.match(REGEXP[:ulist]) + elsif block_context.nil? && (match = this_line.match(REGEXP[:ulist])) AttributeList.rekey(attributes, ['style']) - reader.unshift(this_line) + reader.unshift_line this_line block = next_outline_list(reader, :ulist, parent) - elsif match = this_line.match(REGEXP[:olist]) + elsif block_context.nil? && (match = this_line.match(REGEXP[:olist])) AttributeList.rekey(attributes, ['style']) - reader.unshift(this_line) + reader.unshift_line this_line block = next_outline_list(reader, :olist, parent) # QUESTION move this logic to next_outline_list? if !(attributes.has_key? 'style') && !(block.attributes.has_key? 'style') marker = block.buffer.first.marker if marker.start_with? '.' @@ -361,103 +385,114 @@ style = ORDERED_LIST_STYLES.detect{|s| marker.match(ORDERED_LIST_MARKER_PATTERNS[s]) } attributes['style'] = (style || ORDERED_LIST_STYLES.first).to_s end end - elsif match = this_line.match(REGEXP[:dlist]) - reader.unshift this_line + elsif block_context.nil? && (match = this_line.match(REGEXP[:dlist])) + reader.unshift_line this_line block = next_labeled_list(reader, match, parent) AttributeList.rekey(attributes, ['style']) - elsif delimited_blk && (match = this_line.match(document.nested? ? REGEXP[:table_nested] : REGEXP[:table])) + elsif block_context == :table # table is surrounded by lines starting with a | followed by 3 or more '=' chars - terminator = match[0] AttributeList.rekey(attributes, ['style']) table_reader = Reader.new reader.grab_lines_until(:terminator => terminator, :skip_line_comments => true) block = next_table(table_reader, parent, attributes) - # hmmm, this assignment seems like a one-off block.title = attributes['title'] - if block.title? && attributes['caption'].nil? - attributes['caption'] = "Table #{document.counter('table-number')}. " + if block.title? && !attributes.has_key?('caption') && !block.attr?('caption') + number = document.counter('table-number') + attributes['caption'] = "#{document.attributes['table-caption']} #{number}. " + Document::AttributeEntry.new('table-number', number).save_to(attributes) end # FIXME violates DRY because it's a duplication of other block parsing - elsif delimited_blk && (match = this_line.match(REGEXP[:example])) + elsif block_context == :example # example is surrounded by lines with 4 or more '=' chars - terminator = match[0] AttributeList.rekey(attributes, ['style']) if admonition_style = ADMONITION_STYLES.detect {|s| attributes['style'] == s} block = Block.new(parent, :admonition) - attributes['name'] = admonition_style.downcase - attributes['caption'] ||= admonition_style.capitalize + attributes['name'] = admonition_name = admonition_style.downcase + attributes['caption'] ||= document.attributes["#{admonition_name}-caption"] else - block = Block.new(parent, :example) - # hmmm, this assignment seems like a one-off + block = Block.new(parent, block_context) block.title = attributes['title'] - if block.title? && attributes['caption'].nil? - attributes['caption'] = "Example #{document.counter('example-number')}. " + if block.title? && !attributes.has_key?('caption') && !block.attr?('caption') + number = document.counter('example-number') + attributes['caption'] = "#{document.attributes['example-caption']} #{number}. " + Document::AttributeEntry.new('example-number', number).save_to(attributes) end end buffer = Reader.new reader.grab_lines_until(:terminator => terminator) - while buffer.has_lines? + while buffer.has_more_lines? new_block = next_block(buffer, block) block.blocks << new_block unless new_block.nil? end # FIXME violates DRY w/ non-delimited block listing - elsif delimited_blk && (match = this_line.match(REGEXP[:listing])) - terminator = match[0] - AttributeList.rekey(attributes, ['style', 'language', 'linenums']) + elsif block_context == :listing || block_context == :fenced_code + if block_context == :fenced_code + attributes['style'] = 'source' + lang = this_line[3..-1].strip + attributes['language'] = lang unless lang.empty? + terminator = terminator[0..2] if terminator.length > 3 + else + AttributeList.rekey(attributes, ['style', 'language', 'linenums']) + end buffer = reader.grab_lines_until(:terminator => terminator) buffer.last.chomp! unless buffer.empty? block = Block.new(parent, :listing, buffer) + block.title = attributes['title'] + if document.attributes.has_key?('listing-caption') && + block.title? && !attributes.has_key?('caption') && !block.attr?('caption') + number = document.counter('listing-number') + attributes['caption'] = "#{document.attributes['listing-caption']} #{number}. " + Document::AttributeEntry.new('listing-number', number).save_to(attributes) + end - elsif delimited_blk && (match = this_line.match(REGEXP[:quote])) + elsif block_context == :quote # multi-line verse or quote is surrounded by a block delimiter - terminator = match[0] AttributeList.rekey(attributes, ['style', 'attribution', 'citetitle']) quote_context = (attributes['style'] == 'verse' ? :verse : :quote) block_reader = Reader.new reader.grab_lines_until(:terminator => terminator) - # only quote can have other section elements (as as section block) + # only quote can have other section elements (as section block) section_body = (quote_context == :quote) if section_body block = Block.new(parent, quote_context) - while block_reader.has_lines? + while block_reader.has_more_lines? new_block = next_block(block_reader, block) block.blocks << new_block unless new_block.nil? end else block_reader.chomp_last! block = Block.new(parent, quote_context, block_reader.lines) end - elsif delimited_blk && (blk_ctx = [:literal, :pass].detect{|t| this_line.match(REGEXP[t])}) + elsif block_context == :literal || block_context == :pass # literal is surrounded by '....' (4 or more '.' chars) lines # pass is surrounded by '++++' (4 or more '+' chars) lines - terminator = $~[0] buffer = reader.grab_lines_until(:terminator => terminator) buffer.last.chomp! unless buffer.empty? # a literal can masquerade as a listing if attributes[1] == 'listing' - blk_ctx = :listing + block_context = :listing end - block = Block.new(parent, blk_ctx, buffer) + block = Block.new(parent, block_context, buffer) elsif this_line.match(REGEXP[:lit_par]) # literal paragraph is contiguous lines starting with # one or more space or tab characters # So we need to actually include this one in the grab_lines group - reader.unshift this_line + reader.unshift_line this_line buffer = reader.grab_lines_until(:preserve_last_line => true, :break_on_blank_lines => true) {|line| # labeled list terms can be indented, but a preceding blank indicates # we are in a list continuation and therefore literals should be strictly literal (context == :dlist && skipped == 0 && line.match(REGEXP[:dlist])) || - delimited_block?(line) + is_delimited_block?(line) } # trim off the indentation equivalent to the size of the least indented line if !buffer.empty? offset = buffer.map {|line| line.match(REGEXP[:leading_blanks])[1].length }.min @@ -475,39 +510,47 @@ attributes['options'] << 'listparagraph' end ## these switches based on style need to come immediately before the else ## - elsif attributes[1] == 'source' - AttributeList.rekey(attributes, ['style', 'language', 'linenums']) - reader.unshift(this_line) + elsif attributes[1] == 'source' || attributes[1] == 'listing' + if attributes[1] == 'source' + AttributeList.rekey(attributes, ['style', 'language', 'linenums']) + end + reader.unshift_line this_line buffer = reader.grab_lines_until(:break_on_blank_lines => true) buffer.last.chomp! unless buffer.empty? block = Block.new(parent, :listing, buffer) + elsif attributes[1] == 'literal' + reader.unshift_line this_line + buffer = reader.grab_lines_until(:break_on_blank_lines => true) + buffer.last.chomp! unless buffer.empty? + block = Block.new(parent, :literal, buffer) + elsif admonition_style = ADMONITION_STYLES.detect{|s| attributes[1] == s} # an admonition preceded by [<TYPE>] and lasts until a blank line - reader.unshift(this_line) + reader.unshift_line this_line buffer = reader.grab_lines_until(:break_on_blank_lines => true) buffer.last.chomp! unless buffer.empty? block = Block.new(parent, :admonition, buffer) attributes['style'] = admonition_style - attributes['name'] = admonition_style.downcase - attributes['caption'] ||= admonition_style.capitalize + attributes['name'] = admonition_name = admonition_style.downcase + attributes['caption'] ||= document.attributes["#{admonition_name}-caption"] elsif quote_context = [:quote, :verse].detect{|s| attributes[1] == s.to_s} # single-paragraph verse or quote is preceded by [verse] or [quote], respectively, and lasts until a blank line AttributeList.rekey(attributes, ['style', 'attribution', 'citetitle']) - reader.unshift(this_line) + reader.unshift_line this_line buffer = reader.grab_lines_until(:break_on_blank_lines => true) buffer.last.chomp! unless buffer.empty? block = Block.new(parent, quote_context, buffer) # a floating (i.e., discrete) title elsif ['float', 'discrete'].include?(attributes[1]) && is_section_title?(this_line, reader.peek_line) attributes['style'] = attributes[1] - reader.unshift this_line + reader.unshift_line this_line float_id, float_title, float_level, _ = parse_section_title reader block = Block.new(parent, :floating_title) if float_id.nil? || float_id.empty? # FIXME remove hack of creating throwaway Section to get at the generate_id method tmp_sect = Section.new(parent) @@ -520,13 +563,13 @@ block.level = float_level block.title = float_title # a paragraph - contiguous nonblank/noncontinuation lines else - reader.unshift this_line + reader.unshift_line this_line buffer = reader.grab_lines_until(:break_on_blank_lines => true, :preserve_last_line => true, :skip_line_comments => true) {|line| - delimited_block?(line) || line.match(REGEXP[:attr_line]) || + is_delimited_block?(line) || line.match(REGEXP[:attr_line]) || # next list item can be directly adjacent to paragraph of previous list item context == :dlist && line.match(REGEXP[:dlist]) # not sure if there are any cases when we need this check for other list types #LIST_CONTEXTS.include?(context) && line.match(REGEXP[context]) } @@ -542,12 +585,12 @@ if !options[:text] && (admonition = buffer.first.match(Regexp.new('^(' + ADMONITION_STYLES.join('|') + '):\s+'))) buffer[0] = admonition.post_match block = Block.new(parent, :admonition, buffer) attributes['style'] = admonition[1] - attributes['name'] = admonition[1].downcase - attributes['caption'] ||= admonition[1].capitalize + attributes['name'] = admonition_name = admonition[1].downcase + attributes['caption'] ||= document.attributes["#{admonition_name}-caption"] else buffer.last.chomp! block = Block.new(parent, :paragraph, buffer) end end @@ -576,21 +619,41 @@ end # Public: Determines whether this line is the start of any of the delimited blocks # # returns the match data if this line is the first line of a delimited block or nil if not - #-- - # TODO could use the match value as a lookup for the block type so we don't have - # to do any subsequent regexp - def self.delimited_block?(line) - # naive match - #line.match(REGEXP[:any_blk]) + def self.is_delimited_block?(line, return_match_data = false) + line_len = line.length + # optimized for best performance + if line_len > 2 + if line_len == 3 + tip = line.chop + tl = 2 + else + tip = line[0..3] + tl = 4 - # attempt at better performance - if line.length > 0 - # NOTE accessing the first element before calling ord is first Ruby 1.8.7 compat - REGEXP[:any_blk_ord].include?(line[0..0][0].ord) ? line.match(REGEXP[:any_blk]) : nil + # special case for fenced code blocks + tip_alt = tip.chop + if tip_alt == '```' || tip_alt == '~~~' + tip = tip_alt + tl = 3 + end + end + + if DELIMITED_BLOCKS.has_key? tip + # if tip is the full line + if tl == line_len - 1 + return_match_data ? BlockMatchData.new(DELIMITED_BLOCKS[tip], tip, tip) : true + elsif match = line.match(REGEXP[:any_blk]) + return_match_data ? BlockMatchData.new(DELIMITED_BLOCKS[tip], tip, match[0]) : true + else + nil + end + else + nil + end else nil end end @@ -608,13 +671,13 @@ if parent.context == list_type list_block.level = parent.level + 1 else list_block.level = 1 end - Asciidoctor.debug { "Created #{list_type} block: #{list_block}" } + Debug.debug { "Created #{list_type} block: #{list_block}" } - while reader.has_lines? && (match = reader.peek_line.match(REGEXP[list_type])) + while reader.has_more_lines? && (match = reader.peek_line.match(REGEXP[list_type])) marker = resolve_list_marker(list_type, match[1]) # if we are moving to the next item, and the marker is different # determine if we are moving up or down in nesting @@ -646,11 +709,11 @@ end items << list_item unless list_item.nil? list_item = nil - reader.skip_blank + reader.skip_blank_lines end list_block end @@ -705,11 +768,11 @@ # that uses the same delimiter (::, :::, :::: or ;;) sibling_pattern = REGEXP[:dlist_siblings][match[2]] begin pairs << next_list_item(reader, block, match, sibling_pattern) - end while reader.has_lines? && match = reader.peek_line.match(sibling_pattern) + end while reader.has_more_lines? && match = reader.peek_line.match(sibling_pattern) block end # Internal: Parse and construct the next ListItem for the current bulleted @@ -748,27 +811,32 @@ end # first skip the line with the marker / term reader.get_line list_item_reader = Reader.new grab_lines_for_list_item(reader, list_type, sibling_trait, has_text) - if list_item_reader.has_lines? + if list_item_reader.has_more_lines? comment_lines = list_item_reader.consume_line_comments subsequent_line = list_item_reader.peek_line list_item_reader.unshift(*comment_lines) unless comment_lines.empty? if !subsequent_line.nil? continuation_connects_first_block = (subsequent_line == "\n") - content_adjacent = !subsequent_line.strip.empty? + # if there's no continuation connecting the first block, then + # treat the lines as paragraph text (activated when has_text = false) + if !continuation_connects_first_block && list_type != :dlist + has_text = false + end + content_adjacent = !subsequent_line.chomp.empty? else continuation_connects_first_block = false content_adjacent = false end # only relevant for :dlist options = {:text => !has_text} - while list_item_reader.has_lines? + while list_item_reader.has_more_lines? new_block = next_block(list_item_reader, list_block, {}, options) list_item.blocks << new_block unless new_block.nil? end list_item.fold_first(continuation_connects_first_block, content_adjacent) @@ -813,11 +881,11 @@ # a detached continuation is a list continuation that follows a blank line # it gets associated with the outermost block detached_continuation = nil - while reader.has_lines? + while reader.has_more_lines? this_line = reader.get_line # if we've arrived at a sibling item in this list, we've captured # the complete list item and can begin processing it # the remainder of the method determines whether we've reached @@ -844,17 +912,16 @@ end end # a delimited block immediately breaks the list unless preceded # by a list continuation (they are harsh like that ;0) - if match = delimited_block?(this_line) + if match = is_delimited_block?(this_line, true) if continuation == :active buffer << this_line # grab all the lines in the block, leaving the delimiters in place # we're being more strict here about the terminator, but I think that's a good thing - terminator = match[0] - buffer.concat reader.grab_lines_until(:terminator => terminator, :grab_last_line => true) + buffer.concat reader.grab_lines_until(:terminator => match.terminator, :grab_last_line => true) continuation = :inactive else break end # technically attr_line only breaks if ensuing line is not a list item @@ -866,22 +933,22 @@ # literal paragraphs have special considerations (and this is one of # two entry points into one) # if we don't process it as a whole, then a line in it that looks like a # list item will throw off the exit from it if this_line.match(REGEXP[:lit_par]) - reader.unshift this_line + reader.unshift_line this_line buffer.concat reader.grab_lines_until( :preserve_last_line => true, :break_on_blank_lines => true, :break_on_list_continuation => true) {|line| # we may be in an indented list disguised as a literal paragraph # so we need to make sure we don't slurp up a legitimate sibling list_type == :dlist && is_sibling_list_item?(line, list_type, sibling_trait) } continuation = :inactive # let block metadata play out until we find the block - elsif this_line.match(REGEXP[:blk_title]) || this_line.match(REGEXP[:attr_line]) + elsif this_line.match(REGEXP[:blk_title]) || this_line.match(REGEXP[:attr_line]) || this_line.match(REGEXP[:attr_entry]) buffer << this_line else if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).detect {|ctx| this_line.match(REGEXP[ctx]) } within_nested_list = true if nested_list_type == :dlist && $~[3].to_s.empty? @@ -909,11 +976,11 @@ # for all other lists, has_text is always true # in this block, we have to see whether we stay in the list if has_text # slurp up any literal paragraph offset by blank lines if this_line.match(REGEXP[:lit_par]) - reader.unshift this_line + reader.unshift_line this_line buffer.concat reader.grab_lines_until( :preserve_last_line => true, :break_on_blank_lines => true, :break_on_list_continuation => true) {|line| # we may be in an indented list disguised as a literal paragraph @@ -953,24 +1020,25 @@ end end this_line = nil end - reader.unshift this_line if !this_line.nil? + reader.unshift_line this_line if !this_line.nil? if detached_continuation buffer.delete_at detached_continuation end # strip trailing blank lines to prevent empty blocks - buffer.pop while !buffer.empty? && buffer.last.strip.empty? + buffer.pop while !buffer.empty? && buffer.last.chomp.empty? # We do need to replace the optional trailing continuation # a blank line would have served the same purpose in the document if !buffer.empty? && buffer.last.chomp == LIST_CONTINUATION buffer.pop end + #puts "BUFFER[#{list_type},#{sibling_trait}]>#{buffer.join}<BUFFER" #puts "BUFFER[#{list_type},#{sibling_trait}]>#{buffer}<BUFFER" buffer end @@ -995,28 +1063,33 @@ end if attributes[1] section.sectname = attributes[1] section.special = true - if section.sectname == 'appendix' - attributes['caption'] ||= "Appendix #{parent.document.counter('appendix-number', 'A')}: " + document = parent.document + if section.sectname == 'appendix' && + !attributes.has_key?('caption') && + !document.attributes.has_key?('caption') + number = document.counter('appendix-number', 'A') + attributes['caption'] = "#{document.attributes['appendix-caption']} #{number}: " + Document::AttributeEntry.new('appendix-number', number).save_to(attributes) end else section.sectname = "sect#{section.level}" end section.update_attributes(attributes) - reader.skip_blank + reader.skip_blank_lines section end # Private: Get the Integer section level based on the characters # used in the ASCII line under the section title. # # line - the String line from under the section title. def self.section_level(line) - char = line.strip.chars.to_a.uniq + char = line.chomp.chars.to_a.uniq case char when ['=']; 0 when ['-']; 1 when ['~']; 2 when ['^']; 3 @@ -1030,25 +1103,29 @@ [line.length - 1, 0].max end # Internal: Checks if the next line on the Reader is a section title # - # reader - the source Reader + # reader - the source Reader + # attributes - a Hash of attributes collected above the current line # # returns the section level if the Reader is positioned at a section title, # false otherwise def self.is_next_line_section?(reader, attributes) return false if !attributes[1].nil? && ['float', 'discrete'].include?(attributes[1]) - if reader.has_lines? - line1 = reader.get_line - line2 = reader.peek_line - reader.unshift line1 - else - return false - end + return false if !reader.has_more_lines? + is_section_title?(*reader.peek_lines(2)) + end - is_section_title?(line1, line2) + # Internal: Convenience API for checking if the next line on the Reader is the document title + # + # reader - the source Reader + # attributes - a Hash of attributes collected above the current line + # + # returns true if the Reader is positioned at the document title, false otherwise + def self.is_next_line_document_title?(reader, attributes) + is_next_line_section?(reader, attributes) == 0 end # Public: Checks if these lines are a section title # # line1 - the first line as a String @@ -1170,19 +1247,18 @@ # # parse_header_metadata(Reader.new ["Author Name <author@example.org>\n", "v1.0, 2012-12-21: Coincide w/ end of world.\n"]) # # => {'author' => 'Author Name', 'firstname' => 'Author', 'lastname' => 'Name', 'email' => 'author@example.org', # # 'revnumber' => '1.0', 'revdate' => '2012-12-21', 'revremark' => 'Coincide w/ end of world.'} def self.parse_header_metadata(reader, document = nil) - # capture consecutive comment lines so we can reinsert them after the header - comment_lines = reader.consume_comments + # NOTE this will discard away any comment lines, but not skip blank lines + process_attribute_entries(reader, document) - metadata = !document.nil? ? document.attributes : {} - author_initials = metadata['authorinitials'] - if reader.has_lines? && !reader.peek_line.strip.empty? + metadata = {} + + if reader.has_more_lines? && !reader.peek_line.chomp.empty? author_line = reader.get_line - match = author_line.match(REGEXP[:author_info]) - if match + if match = author_line.match(REGEXP[:author_info]) metadata['firstname'] = fname = match[1].tr('_', ' ') metadata['author'] = fname metadata['authorinitials'] = fname[0, 1] if !match[2].nil? && !match[3].nil? metadata['middlename'] = mname = match[2].tr('_', ' ') @@ -1198,32 +1274,40 @@ else metadata['author'] = metadata['firstname'] = author_line.strip.squeeze(' ') metadata['authorinitials'] = metadata['firstname'][0, 1] end - # hack because of incorrect order of attribute processing - metadata['authorinitials'] = author_initials unless author_initials.nil? + # NOTE this will discard away any comment lines, but not skip blank lines + process_attribute_entries(reader, document) - # capture consecutive comment lines so we can reinsert them after the header - comment_lines += reader.consume_comments - - if reader.has_lines? && !reader.peek_line.strip.empty? + if reader.has_more_lines? && !reader.peek_line.chomp.empty? rev_line = reader.get_line - match = rev_line.match(REGEXP[:revision_info]) - if match - metadata['revdate'] = match[2] - metadata['revnumber'] = match[1] unless match[1].nil? - metadata['revremark'] = match[3] unless match[3].nil? + if match = rev_line.match(REGEXP[:revision_info]) + metadata['revdate'] = match[2].strip + metadata['revnumber'] = match[1].rstrip unless match[1].nil? + metadata['revremark'] = match[3].rstrip unless match[3].nil? else - metadata['revdate'] = rev_line.strip + # throw it back + reader.unshift_line rev_line end end - reader.skip_blank + # NOTE this will discard away any comment lines, but not skip blank lines + process_attribute_entries(reader, document) + + reader.skip_blank_lines + + # apply header subs and assign to document + if !document.nil? + metadata.map do |key, val| + val = document.apply_header_subs(val) + document.attributes[key] = val if !document.attributes.has_key?(key) + val + end + end end - reader.unshift(*comment_lines) metadata end # Internal: Parse lines of metadata until a line of metadata is not found. # @@ -1239,11 +1323,11 @@ # # returns the Hash of attributes including any metadata found def self.parse_block_metadata_lines(reader, parent, attributes = {}, options = {}) while parse_block_metadata_line(reader, parent, attributes, options) # discard the line just processed - reader.next_line + reader.advance reader.skip_blank_lines end attributes end @@ -1266,19 +1350,19 @@ # * :text indicates that lexer is only looking for text content # and thus the block title should not be captured # # returns true if the line contains metadata, otherwise false def self.parse_block_metadata_line(reader, parent, attributes, options = {}) - return false if !reader.has_lines? + return false if !reader.has_more_lines? next_line = reader.peek_line - if next_line.match(REGEXP[:comment]) - # do nothing, we'll skip it - # QUESTION should we parse block comments here instead of next_block? - # disable until we can agree what the current line is coming in - elsif match = next_line.match(REGEXP[:comment_blk]) + if (commentish = next_line.start_with?('//')) && (match = next_line.match(REGEXP[:comment_blk])) terminator = match[0] - reader.grab_lines_until(:skip_first_line => true, :preserve_last_line => true, :terminator => terminator) + reader.grab_lines_until(:skip_first_line => true, :preserve_last_line => true, :terminator => terminator, :preprocess => false) + elsif commentish && next_line.match(REGEXP[:comment]) + # do nothing, we'll skip it + elsif !options[:text] && (match = next_line.match(REGEXP[:attr_entry])) + process_attribute_entry(reader, parent, attributes, match) elsif match = next_line.match(REGEXP[:anchor]) id, reftext = match[1].split(',') attributes['id'] = id # AsciiDoc always use [id] as the reftext in HTML output, # but I'd like to do better in Asciidoctor @@ -1288,21 +1372,71 @@ parent.document.register(:ids, [id, reftext]) end elsif match = next_line.match(REGEXP[:blk_attr_list]) AttributeList.new(parent.document.sub_attributes(match[1]), parent.document).parse_into(attributes) # NOTE title doesn't apply to section, but we need to stash it for the first block - # TODO need test for this getting passed on to first block after section if found above section # TODO should issue an error if this is found above the document title elsif !options[:text] && (match = next_line.match(REGEXP[:blk_title])) attributes['title'] = match[1] else return false end true end + def self.process_attribute_entries(reader, parent, attributes = nil) + reader.skip_comment_lines + while process_attribute_entry(reader, parent, attributes) + # discard line just processed + reader.advance + reader.skip_comment_lines + end + end + + def self.process_attribute_entry(reader, parent, attributes = nil, match = nil) + match ||= reader.has_more_lines? ? reader.peek_line.match(REGEXP[:attr_entry]) : nil + if match + name = match[1] + value = match[2].nil? ? '' : match[2] + if value.end_with? LINE_BREAK + value.chop!.rstrip! + while reader.advance + next_line = reader.peek_line.strip + break if next_line.empty? + if next_line.end_with? LINE_BREAK + value = "#{value} #{next_line.chop.rstrip}" + else + value = "#{value} #{next_line}" + break + end + end + end + + if name.end_with?('!') + # a nil value signals the attribute should be deleted (undefined) + value = nil + name = name.chop + end + + name = sanitize_attribute_name(name) + accessible = true + if !parent.nil? + accessible = value.nil? ? + parent.document.delete_attribute(name) : + parent.document.set_attribute(name, value) + end + + if !attributes.nil? + Document::AttributeEntry.new(name, value).save_to(attributes) if accessible + end + true + else + false + end + end + # Internal: Resolve the 0-index marker for this list item # # For ordered lists, match the marker used for this list item against the # known list markers and determine which marker is the first (0-index) marker # in its number series. @@ -1385,10 +1519,11 @@ end marker = 'I)' end if validate && expected != actual + # FIXME I need a reader reference or line number to report line number puts "asciidoctor: WARNING: list item index: expected #{expected}, got #{actual}" end marker end @@ -1439,12 +1574,12 @@ explicit_col_specs = false end table_reader.skip_blank_lines - parser_ctx = Asciidoctor::Table::ParserContext.new(table, attributes) - while table_reader.has_lines? + parser_ctx = Table::ParserContext.new(table, attributes) + while table_reader.has_more_lines? line = table_reader.get_line if parser_ctx.format == 'psv' if parser_ctx.starts_with_delimiter? line line = line[1..-1] @@ -1505,11 +1640,11 @@ end end table_reader.skip_blank_lines unless parser_ctx.cell_open? - if !table_reader.has_lines? + if !table_reader.has_more_lines? parser_ctx.close_cell true end end table.attributes['colcount'] ||= parser_ctx.col_count @@ -1595,11 +1730,11 @@ spec = (pos == :end ? {} : nil) rest = line if m = line.match(REGEXP[:table_cellspec][pos]) spec = {} - return [spec, line] if m[0].strip.empty? + return [spec, line] if m[0].chomp.empty? rest = (pos == :start ? m.post_match : m.pre_match) if m[1] colspec, rowspec = m[1].split '.' colspec = colspec.to_s.empty? ? 1 : colspec.to_i rowspec = rowspec.to_s.empty? ? 1 : rowspec.to_i @@ -1627,10 +1762,30 @@ end [spec, rest] end + # Public: Convert a string to a legal attribute name. + # + # name - the String name of the attribute + # + # Returns a String with the legal AsciiDoc attribute name. + # + # Examples + # + # sanitize_attribute_name('Foo Bar') + # => 'foobar' + # + # sanitize_attribute_name('foo') + # => 'foo' + # + # sanitize_attribute_name('Foo 3 #-Billy') + # => 'foo3-billy' + def self.sanitize_attribute_name(name) + name.gsub(REGEXP[:illegal_attr_name_chars], '').downcase + end + # Internal: Converts a Roman numeral to an integer value. # # value - The String Roman numeral to convert # # Returns the Integer for this Roman numeral @@ -1648,6 +1803,7 @@ end } result end +end end