lib/asciidoctor/parser.rb in asciidoctor-1.5.6.2 vs lib/asciidoctor/parser.rb in asciidoctor-1.5.7

- old
+ new

@@ -21,10 +21,11 @@ # reader = Reader.new lines # block = Parser.next_block(reader, doc) # block.class # # => Asciidoctor::Block class Parser + include Logging BlockMatchData = Struct.new :context, :masq, :tip, :terminator # Regexp for replacing tab character TabRx = /\t/ @@ -91,11 +92,14 @@ def self.parse(reader, document, options = {}) block_attributes = parse_document_header(reader, document) while reader.has_more_lines? new_section, block_attributes = next_section(reader, document, block_attributes) - document << new_section if new_section + if new_section + document.assign_numeral new_section + document.blocks << new_section + end end unless options[:header_only] document end @@ -111,109 +115,115 @@ # which are automatically removed by the reader. # # returns the Hash of orphan block attributes captured above the header def self.parse_document_header(reader, document) # capture lines of block-level metadata and plow away comment lines that precede first block - block_attributes = parse_block_metadata_lines reader, document + block_attrs = parse_block_metadata_lines reader, document + doc_attrs = document.attributes # special case, block title is not allowed above document title, # carry attributes over to the document body - if (implicit_doctitle = is_next_line_doctitle? reader, block_attributes, document.attributes['leveloffset']) && - (block_attributes.key? 'title') - return document.finalize_header block_attributes, false + if (implicit_doctitle = is_next_line_doctitle? reader, block_attrs, doc_attrs['leveloffset']) && block_attrs['title'] + return document.finalize_header block_attrs, false end # yep, document title logic in AsciiDoc is just insanity # definitely an area for spec refinement assigned_doctitle = nil - unless (val = document.attributes['doctitle']).nil_or_empty? + unless (val = doc_attrs['doctitle']).nil_or_empty? document.title = assigned_doctitle = val end - section_title = nil # if the first line is the document title, add a header to the document and parse the header metadata if implicit_doctitle source_location = reader.cursor if document.sourcemap document.id, _, doctitle, _, atx = parse_section_title reader, document - unless assigned_doctitle - document.title = assigned_doctitle = doctitle - end + document.title = assigned_doctitle = doctitle unless assigned_doctitle + document.header.source_location = source_location if source_location # default to compat-mode if document uses atx-style doctitle - document.set_attr 'compat-mode' unless atx || (document.attribute_locked? 'compat-mode') - if (separator = block_attributes.delete 'separator') - document.set_attr 'title-separator', separator unless document.attribute_locked? 'title-separator' + doc_attrs['compat-mode'] = '' unless atx || (document.attribute_locked? 'compat-mode') + if (separator = block_attrs['separator']) + doc_attrs['title-separator'] = separator unless document.attribute_locked? 'title-separator' end - document.header.source_location = source_location if source_location - document.attributes['doctitle'] = section_title = doctitle - # QUESTION: should the id assignment on Document be encapsulated in the Document class? - if document.id - block_attributes.delete 1 - block_attributes.delete 'id' + doc_attrs['doctitle'] = section_title = doctitle + if (doc_id = block_attrs['id']) + document.id = doc_id else - if (style = block_attributes.delete 1) - style_attrs = { 1 => style } - parse_style_attribute style_attrs, reader - block_attributes['id'] = style_attrs['id'] if style_attrs.key? 'id' - end - document.id = block_attributes.delete 'id' + doc_id = document.id end + if (doc_role = block_attrs['role']) + doc_attrs['docrole'] = doc_role + end + if (doc_reftext = block_attrs['reftext']) + doc_attrs['reftext'] = doc_reftext + end + block_attrs = {} parse_header_metadata reader, document + document.register :refs, [doc_id, document] if doc_id end - unless (val = document.attributes['doctitle']).nil_or_empty? || val == section_title + unless (val = doc_attrs['doctitle']).nil_or_empty? || val == section_title document.title = assigned_doctitle = val end # restore doctitle attribute to original assignment - document.attributes['doctitle'] = assigned_doctitle if assigned_doctitle + doc_attrs['doctitle'] = assigned_doctitle if assigned_doctitle # parse title and consume name section of manpage document parse_manpage_header(reader, document) if document.doctype == 'manpage' - # NOTE block_attributes are the block-level attributes (not document attributes) that + # NOTE block_attrs are the block-level attributes (not document attributes) that # precede the first line of content (document title, first section or first block) - document.finalize_header block_attributes + document.finalize_header block_attrs end # Public: Parses the manpage header of the AsciiDoc source read from the Reader # # returns Nothing def self.parse_manpage_header(reader, document) if ManpageTitleVolnumRx =~ document.attributes['doctitle'] - document.attributes['mantitle'] = document.sub_attributes $1.downcase + document.attributes['mantitle'] = (($1.include? ATTR_REF_HEAD) ? (document.sub_attributes $1) : $1).downcase document.attributes['manvolnum'] = $2 else - warn %(asciidoctor: ERROR: #{reader.prev_line_info}: malformed manpage title) + logger.error message_with_context 'malformed manpage title', :source_location => reader.cursor_at_prev_line # provide sensible fallbacks document.attributes['mantitle'] = document.attributes['doctitle'] document.attributes['manvolnum'] = '1' end reader.skip_blank_lines - if is_next_line_section?(reader, {}) - name_section = initialize_section(reader, document, {}) + if is_next_line_section? reader, {} + name_section = initialize_section reader, document, {} if name_section.level == 1 - name_section_buffer = reader.read_lines_until(:break_on_blank_lines => true) * ' ' - if (m = ManpageNamePurposeRx.match(name_section_buffer)) - document.attributes['manname'] = document.sub_attributes m[1] - document.attributes['manpurpose'] = m[2] - # TODO parse multiple man names + name_section_buffer = (reader.read_lines_until :break_on_blank_lines => true, :skip_line_comments => true) * ' ' + if ManpageNamePurposeRx =~ name_section_buffer + document.attributes['manname-title'] ||= name_section.title + document.attributes['manname-id'] = name_section.id if name_section.id + document.attributes['manpurpose'] = $2 + if (manname = ($1.include? ATTR_REF_HEAD) ? (document.sub_attributes $1) : $1).include? ',' + manname = (mannames = (manname.split ',').map {|n| n.lstrip })[0] + else + mannames = [manname] + end + document.attributes['manname'] = manname + document.attributes['mannames'] = mannames if document.backend == 'manpage' - document.attributes['docname'] = document.attributes['manname'] + document.attributes['docname'] = manname document.attributes['outfilesuffix'] = %(.#{document.attributes['manvolnum']}) end else - warn %(asciidoctor: ERROR: #{reader.prev_line_info}: malformed name section body) + logger.error message_with_context 'malformed name section body', :source_location => reader.cursor_at_prev_line end else - warn %(asciidoctor: ERROR: #{reader.prev_line_info}: name section title must be at level 1) + logger.error message_with_context 'name section title must be at level 1', :source_location => reader.cursor_at_prev_line end else - warn %(asciidoctor: ERROR: #{reader.prev_line_info}: name section expected) + logger.error message_with_context 'name section expected', :source_location => reader.cursor_at_prev_line end + nil end # Public: Return the next section from the Reader. # # This method process block metadata, content and subsections for this @@ -252,41 +262,45 @@ # # returns a two-element Array containing the Section and Hash of orphaned attributes def self.next_section reader, parent, attributes = {} preamble = intro = part = false - # FIXME if attributes[1] is a verbatim style, then don't check for section - # check if we are at the start of processing the document # NOTE we could drop a hint in the attributes to indicate # that we are at a section title (so we don't have to check) if parent.context == :document && parent.blocks.empty? && ((has_header = parent.has_header?) || (attributes.delete 'invalid-header') || !(is_next_line_section? reader, attributes)) - doctype = (document = parent).doctype - if has_header || (doctype == 'book' && attributes[1] != 'abstract') + book = (document = parent).doctype == 'book' + if has_header || (book && attributes[1] != 'abstract') preamble = intro = (Block.new parent, :preamble, :content_model => :compound) - preamble.title = parent.attr 'preface-title' if doctype == 'book' && (parent.attr? 'preface-title') - parent << preamble + preamble.title = parent.attr 'preface-title' if book && (parent.attr? 'preface-title') + parent.blocks << preamble end section = parent - current_level = 0 if parent.attributes.key? 'fragment' - expected_next_levels = nil + expected_next_level = -1 # small tweak to allow subsequent level-0 sections for book doctype - elsif doctype == 'book' - expected_next_levels = [0, 1] + elsif book + expected_next_level, expected_next_level_alt = 1, 0 else - expected_next_levels = [1] + expected_next_level = 1 end else - doctype = (document = parent.document).doctype + book = (document = parent.document).doctype == 'book' section = initialize_section reader, parent, attributes # clear attributes except for title attribute, which must be carried over to next content block attributes = (title = attributes['title']) ? { 'title' => title } : {} - part = section.sectname == 'part' - expected_next_levels = [(current_level = section.level) + 1] + expected_next_level = (current_level = section.level) + 1 + if current_level == 0 + part = book + elsif current_level == 1 && section.special + # NOTE technically preface and abstract sections are only permitted in the book doctype + unless (sectname = section.sectname) == 'appendix' || sectname == 'preface' || sectname == 'abstract' + expected_next_level = nil + end + end end reader.skip_blank_lines # Parse lines belonging to this section and its subsections until we @@ -298,32 +312,36 @@ # # We have to parse all the metadata lines before continuing with the loop, # otherwise subsequent metadata lines get interpreted as block content while reader.has_more_lines? parse_block_metadata_lines reader, document, attributes - if (next_level = is_next_line_section?(reader, attributes)) next_level += document.attr('leveloffset').to_i if document.attr?('leveloffset') - if next_level > current_level || (next_level == 0 && section.context == :document) - if next_level == 0 && doctype != 'book' - warn %(asciidoctor: ERROR: #{reader.line_info}: only book doctypes can contain level 0 sections) - elsif expected_next_levels && !expected_next_levels.include?(next_level) - warn %(asciidoctor: WARNING: #{reader.line_info}: section title out of sequence: expected #{expected_next_levels.size > 1 ? 'levels' : 'level'} #{expected_next_levels * ' or '}, got level #{next_level}) + if next_level > current_level + if expected_next_level + unless next_level == expected_next_level || (expected_next_level_alt && next_level == expected_next_level_alt) || expected_next_level < 0 + expected_condition = expected_next_level_alt ? %(expected levels #{expected_next_level_alt} or #{expected_next_level}) : %(expected level #{expected_next_level}) + logger.warn message_with_context %(section title out of sequence: #{expected_condition}, got level #{next_level}), :source_location => reader.cursor + end + else + logger.error message_with_context %(#{sectname} sections do not support nested sections), :source_location => reader.cursor end - # the attributes returned are those that are orphaned new_section, attributes = next_section reader, section, attributes - section << new_section + section.assign_numeral new_section + section.blocks << new_section + elsif next_level == 0 && section == document + logger.error message_with_context 'level 0 sections can only be used when doctype is book', :source_location => reader.cursor unless book + new_section, attributes = next_section reader, section, attributes + section.assign_numeral new_section + section.blocks << new_section else - if next_level == 0 && doctype != 'book' - warn %(asciidoctor: ERROR: #{reader.line_info}: only book doctypes can contain level 0 sections) - end # close this section (and break out of the nesting) to begin a new one break end else # just take one block or else we run the risk of overrunning section boundaries - block_line_info = reader.line_info + block_cursor = reader.cursor if (new_block = next_block reader, intro || section, attributes, :parse_metadata => false) # REVIEW this may be doing too much if part if !section.blocks? # if this block wasn't marked as [partintro], emulate behavior as if it had @@ -332,40 +350,36 @@ if new_block.context == :paragraph new_block.context = :open new_block.style = 'partintro' # emulate [partintro] open block else - intro = Block.new section, :open, :content_model => :compound + new_block.parent = (intro = Block.new section, :open, :content_model => :compound) intro.style = 'partintro' - new_block.parent = intro - section << intro + section.blocks << intro end end elsif section.blocks.size == 1 first_block = section.blocks[0] # open the [partintro] open block for appending if !intro && first_block.content_model == :compound - #new_block.parent = (intro = first_block) - warn %(asciidoctor: ERROR: #{block_line_info}: illegal block content outside of partintro block) + logger.error message_with_context 'illegal block content outside of partintro block', :source_location => block_cursor # rebuild [partintro] paragraph as an open block elsif first_block.content_model != :compound - intro = Block.new section, :open, :content_model => :compound + new_block.parent = (intro = Block.new section, :open, :content_model => :compound) intro.style = 'partintro' section.blocks.shift if first_block.style == 'partintro' first_block.context = :paragraph first_block.style = nil end - first_block.parent = intro intro << first_block - new_block.parent = intro - section << intro + section.blocks << intro end end end - (intro || section) << new_block + (intro || section).blocks << new_block attributes = {} #else # # don't clear attributes if we don't find a block because they may # # be trailing attributes that didn't get associated with a block end @@ -374,22 +388,21 @@ reader.skip_blank_lines || break end if part unless section.blocks? && section.blocks[-1].context == :section - warn %(asciidoctor: ERROR: #{reader.line_info}: invalid part, must have at least one section (e.g., chapter, appendix, etc.)) + logger.error message_with_context 'invalid part, must have at least one section (e.g., chapter, appendix, etc.)', :source_location => reader.cursor end # NOTE we could try to avoid creating a preamble in the first place, though # that would require reworking assumptions in next_section since the preamble # is treated like an untitled section elsif preamble # implies parent == document if preamble.blocks? - # unwrap standalone preamble (i.e., no sections), if permissible - if Compliance.unwrap_standalone_preamble && document.blocks.size == 1 && doctype != 'book' + # unwrap standalone preamble (i.e., document has no sections) except for books, if permissible + unless book || document.blocks[1] || !Compliance.unwrap_standalone_preamble document.blocks.shift while (child_block = preamble.blocks.shift) - child_block.parent = document document << child_block end end # drop the preamble if it has no content else @@ -451,14 +464,13 @@ if (extensions = document.extensions) block_extensions, block_macro_extensions = extensions.blocks?, extensions.block_macros? end # QUESTION should we introduce a parsing context object? - source_location = reader.cursor if document.sourcemap - this_path, this_lineno, this_line, in_list = reader.path, reader.lineno, reader.read_line, ListItem === parent + reader.mark + this_line, doc_attrs, style = reader.read_line, document.attributes, attributes[1] block = block_context = cloaked_context = terminator = nil - style = attributes[1] ? (parse_style_attribute attributes, reader) : nil if (delimited_block = is_delimited_block? this_line, true) block_context = cloaked_context = delimited_block.context terminator = delimited_block.terminator if !style @@ -469,11 +481,11 @@ elsif delimited_block.masq.include?('admonition') && ADMONITION_STYLES.include?(style) block_context = :admonition elsif block_extensions && extensions.registered_for_block?(style, block_context) block_context = style.to_sym else - warn %(asciidoctor: WARNING: #{this_path}: line #{this_lineno}: invalid style for #{block_context} block: #{style}) + logger.warn message_with_context %(invalid style for #{block_context} block: #{style}), :source_location => reader.cursor_at_mark style = block_context.to_s end end end @@ -515,37 +527,39 @@ # NOTE we're letting break lines (horizontal rule, page_break, etc) have attributes block = Block.new(parent, layout_break_chars[ch0], :content_model => :empty) break # NOTE very rare that a text-only line will end in ] (e.g., inline macro), so check that first elsif (this_line.end_with? ']') && (this_line.include? '::') - #if (this_line.start_with? 'image', 'video', 'audio') && (match = BlockMediaMacroRx.match(this_line)) - if (ch0 == 'i' || (this_line.start_with? 'video:', 'audio:')) && (match = BlockMediaMacroRx.match(this_line)) - blk_ctx, target = match[1].to_sym, match[2] - block = Block.new(parent, blk_ctx, :content_model => :empty) - case blk_ctx - when :video - posattrs = ['poster', 'width', 'height'] - when :audio - posattrs = [] - else # :image - posattrs = ['alt', 'width', 'height'] + #if (this_line.start_with? 'image', 'video', 'audio') && BlockMediaMacroRx =~ this_line + if (ch0 == 'i' || (this_line.start_with? 'video:', 'audio:')) && BlockMediaMacroRx =~ this_line + blk_ctx, target, blk_attrs = $1.to_sym, $2, $3 + block = Block.new parent, blk_ctx, :content_model => :empty + if blk_attrs + case blk_ctx + when :video + posattrs = ['poster', 'width', 'height'] + when :audio + posattrs = [] + else # :image + posattrs = ['alt', 'width', 'height'] + end + block.parse_attributes blk_attrs, posattrs, :sub_input => true, :sub_result => false, :into => attributes end - block.parse_attributes(match[3], posattrs, :sub_input => true, :sub_result => false, :into => attributes) # style doesn't have special meaning for media macros attributes.delete 'style' if attributes.key? 'style' if (target.include? ATTR_REF_HEAD) && (target = block.sub_attributes target, :attribute_missing => 'drop-line').empty? # retain as unparsed if attribute-missing is skip - if document.attributes.fetch('attribute-missing', Compliance.attribute_missing) == 'skip' + if (doc_attrs['attribute-missing'] || Compliance.attribute_missing) == 'skip' return Block.new(parent, :paragraph, :content_model => :simple, :source => [this_line]) # otherwise, drop the line else attributes.clear return end end if blk_ctx == :image - block.document.register :images, target + document.register :images, target # NOTE style is the value of the first positional attribute in the block attribute line attributes['alt'] ||= style || (attributes['default-alt'] = Helpers.basename(target, true).tr('_-', ' ')) unless (scaledwidth = attributes.delete 'scaledwidth').nil_or_empty? # NOTE assume % units if not specified attributes['scaledwidth'] = (TrailingDigitsRx.match? scaledwidth) ? %(#{scaledwidth}%) : scaledwidth @@ -554,26 +568,24 @@ block.assign_caption((attributes.delete 'caption'), 'figure') end attributes['target'] = target break - elsif ch0 == 't' && (this_line.start_with? 'toc:') && (match = BlockTocMacroRx.match(this_line)) - block = Block.new(parent, :toc, :content_model => :empty) - block.parse_attributes(match[1], [], :sub_result => false, :into => attributes) + elsif ch0 == 't' && (this_line.start_with? 'toc:') && BlockTocMacroRx =~ this_line + block = Block.new parent, :toc, :content_model => :empty + block.parse_attributes($1, [], :sub_result => false, :into => attributes) if $1 break - elsif block_macro_extensions && (match = CustomBlockMacroRx.match(this_line)) && - (extension = extensions.registered_for_block_macro?(match[1])) - target = match[2] - content = match[3] + elsif block_macro_extensions && CustomBlockMacroRx =~ this_line && + (extension = extensions.registered_for_block_macro? $1) + target, content = $2, $3 if extension.config[:content_model] == :attributes - unless content.empty? - document.parse_attributes(content, extension.config[:pos_attrs] || [], - :sub_input => true, :sub_result => false, :into => attributes) + if content + document.parse_attributes content, extension.config[:pos_attrs] || [], :sub_input => true, :sub_result => false, :into => attributes end else - attributes['text'] = content + attributes['text'] = content || '' end if (default_attrs = extension.config[:default_attrs]) attributes.update(default_attrs) {|_, old_v| old_v } end if (block = extension.process_method[parent, target, attributes]) @@ -592,46 +604,44 @@ if !indented && CALLOUT_LIST_HEADS.include?(ch0 ||= this_line.chr) && (CalloutListSniffRx.match? this_line) && (match = CalloutListRx.match this_line) block = List.new(parent, :colist) attributes['style'] = 'arabic' reader.unshift_line this_line - expected_index = 1 + next_index = 1 # NOTE skip the match on the first time through as we've already done it (emulates begin...while) - while match || (reader.has_more_lines? && (match = CalloutListRx.match(reader.peek_line))) - list_item_lineno = reader.lineno + while match || ((match = CalloutListRx.match reader.peek_line) && reader.mark) # might want to move this check to a validate method - unless match[1] == expected_index.to_s - warn %(asciidoctor: WARNING: #{reader.path}: line #{list_item_lineno}: callout list item index: expected #{expected_index} got #{match[1]}) + unless match[1] == next_index.to_s + logger.warn message_with_context %(callout list item index: expected #{next_index}, got #{match[1]}), :source_location => reader.cursor_at_mark end if (list_item = next_list_item reader, block, match) - block << list_item + block.items << list_item if (coids = document.callouts.callout_ids block.items.size).empty? - warn %(asciidoctor: WARNING: #{reader.path}: line #{list_item_lineno}: no callouts refer to list item #{block.items.size}) + logger.warn message_with_context %(no callout found for <#{block.items.size}>), :source_location => reader.cursor_at_mark else list_item.attributes['coids'] = coids end end - expected_index += 1 + next_index += 1 match = nil end document.callouts.next_list break elsif UnorderedListRx.match? this_line reader.unshift_line this_line - block = next_item_list(reader, :ulist, parent) - if (style || (Section === parent && parent.sectname)) == 'bibliography' - attributes['style'] = 'bibliography' unless style - block.items.each {|item| catalog_inline_biblio_anchor item.instance_variable_get(:@text), item, document } - end + if Section === parent && parent.sectname == 'bibliography' + style = attributes['style'] = 'bibliography' + end unless style + block = next_list(reader, :ulist, parent, style) break elsif (match = OrderedListRx.match(this_line)) reader.unshift_line this_line - block = next_item_list(reader, :olist, parent) - # FIXME move this logic into next_item_list + block = next_list(reader, :olist, parent) + # FIXME move this logic into next_list unless style marker = block.items[0].marker if marker.start_with? '.' # first one makes more sense, but second one is AsciiDoc-compliant # TODO control behavior using a compliance setting @@ -649,17 +659,16 @@ break elsif (style == 'float' || style == 'discrete') && (Compliance.underline_style_section_titles ? (is_section_title? this_line, reader.peek_line) : !indented && (atx_section_title? this_line)) reader.unshift_line this_line - float_id, float_reftext, float_title, float_level, _ = parse_section_title(reader, document) + float_id, float_reftext, float_title, float_level = parse_section_title reader, document, attributes['id'] attributes['reftext'] = float_reftext if float_reftext block = Block.new(parent, :floating_title, :content_model => :empty) block.title = float_title attributes.delete 'title' - block.id = float_id || attributes['id'] || - ((document.attributes.key? 'sectids') ? (Section.generate_id block.title, document) : nil) + block.id = float_id || ((doc_attrs.key? 'sectids') ? (Section.generate_id block.title, document) : nil) block.level = float_level break # FIXME create another set for "passthrough" styles # FIXME make this more DRY! @@ -681,70 +690,68 @@ cloaked_context = :paragraph reader.unshift_line this_line # advance to block parsing => break else - warn %(asciidoctor: WARNING: #{this_path}: line #{this_lineno}: invalid style for paragraph: #{style}) + logger.warn message_with_context %(invalid style for paragraph: #{style}), :source_location => reader.cursor_at_mark style = nil # continue to process paragraph end end - break_at_list = (skipped == 0 && in_list) reader.unshift_line this_line # a literal paragraph: contiguous lines starting with at least one whitespace character # NOTE style can only be nil or "normal" at this point if indented && !style - lines = read_paragraph_lines reader, break_at_list, :skip_line_comments => text_only - + lines = read_paragraph_lines reader, (in_list = ListItem === parent) && skipped == 0, :skip_line_comments => text_only adjust_indentation! lines - block = Block.new(parent, :literal, :content_model => :verbatim, :source => lines, :attributes => attributes) # a literal gets special meaning inside of a description list # TODO this feels hacky, better way to distinguish from explicit literal block? block.set_option('listparagraph') if in_list - # a normal paragraph: contiguous non-blank/non-continuation lines (left-indented or normal style) else - # NOTE we only get here if there's at least one line that's not a line comment - lines = read_paragraph_lines reader, break_at_list, :skip_line_comments => true - + lines = read_paragraph_lines reader, skipped == 0 && ListItem === parent, :skip_line_comments => true # NOTE don't check indented here since it's extremely rare #if text_only || indented if text_only # if [normal] is used over an indented paragraph, shift content to left margin # QUESTION do we even need to shift since whitespace is normalized by XML in this case? adjust_indentation! lines if indented && style == 'normal' block = Block.new(parent, :paragraph, :content_model => :simple, :source => lines, :attributes => attributes) elsif (ADMONITION_STYLE_HEADS.include? ch0) && (this_line.include? ':') && (AdmonitionParagraphRx =~ this_line) lines[0] = $' # string after match attributes['name'] = admonition_name = (attributes['style'] = $1).downcase - attributes['textlabel'] = (attributes.delete 'caption') || document.attributes[%(#{admonition_name}-caption)] + attributes['textlabel'] = (attributes.delete 'caption') || doc_attrs[%(#{admonition_name}-caption)] block = Block.new(parent, :admonition, :content_model => :simple, :source => lines, :attributes => attributes) elsif md_syntax && ch0 == '>' && this_line.start_with?('> ') lines.map! {|line| line == '>' ? line[1..-1] : ((line.start_with? '> ') ? line[2..-1] : line) } if lines[-1].start_with? '-- ' - attribution, citetitle = lines.pop[3..-1].split(', ', 2) - attributes['attribution'] = attribution if attribution - attributes['citetitle'] = citetitle if citetitle + credit_line = (credit_line = lines.pop[3..-1]) lines.pop while lines[-1].empty? end attributes['style'] = 'quote' # NOTE will only detect discrete (aka free-floating) headings # TODO could assume a discrete heading when inside a block context # FIXME Reader needs to be created w/ line info block = build_block(:quote, :compound, false, parent, Reader.new(lines), attributes) + if credit_line + attribution, citetitle = (block.apply_subs credit_line).split ', ', 2 + attributes['attribution'] = attribution if attribution + attributes['citetitle'] = citetitle if citetitle + end elsif ch0 == '"' && lines.size > 1 && (lines[-1].start_with? '-- ') && (lines[-2].end_with? '"') lines[0] = this_line[1..-1] # strip leading quote - attribution, citetitle = lines.pop[3..-1].split(', ', 2) - attributes['attribution'] = attribution if attribution - attributes['citetitle'] = citetitle if citetitle + credit_line = (credit_line = lines.pop).slice(3, credit_line.length) lines.pop while lines[-1].empty? lines[-1] = lines[-1].chop # strip trailing quote attributes['style'] = 'quote' block = Block.new(parent, :quote, :content_model => :simple, :source => lines, :attributes => attributes) + attribution, citetitle = (block.apply_subs credit_line).split ', ', 2 + attributes['attribution'] = attribution if attribution + attributes['citetitle'] = citetitle if citetitle else # if [normal] is used over an indented paragraph, shift content to left margin # QUESTION do we even need to shift since whitespace is normalized by XML in this case? adjust_indentation! lines if indented && style == 'normal' block = Block.new(parent, :paragraph, :content_model => :simple, :source => lines, :attributes => attributes) @@ -763,11 +770,11 @@ block_context = :open if block_context == :abstract || block_context == :partintro case block_context when :admonition attributes['name'] = admonition_name = style.downcase - attributes['textlabel'] = (attributes.delete 'caption') || document.attributes[%(#{admonition_name}-caption)] + attributes['textlabel'] = (attributes.delete 'caption') || doc_attrs[%(#{admonition_name}-caption)] block = build_block(block_context, :compound, terminator, parent, reader, attributes) when :comment build_block(block_context, :skip, terminator, parent, reader, attributes) attributes.clear @@ -779,18 +786,18 @@ when :listing, :literal block = build_block(block_context, :verbatim, terminator, parent, reader, attributes) when :source AttributeList.rekey attributes, [nil, 'language', 'linenums'] - if document.attributes.key? 'source-language' - attributes['language'] = document.attributes['source-language'] || 'text' + if doc_attrs.key? 'source-language' + attributes['language'] = doc_attrs['source-language'] || 'text' end unless attributes.key? 'language' - if (attributes.key? 'linenums-option') || (document.attributes.key? 'source-linenums-option') + if (attributes.key? 'linenums-option') || (doc_attrs.key? 'source-linenums-option') attributes['linenums'] = '' end unless attributes.key? 'linenums' - if document.attributes.key? 'source-indent' - attributes['indent'] = document.attributes['source-indent'] + if doc_attrs.key? 'source-indent' + attributes['indent'] = doc_attrs['source-indent'] end unless attributes.key? 'indent' block = build_block(:listing, :verbatim, terminator, parent, reader, attributes) when :fenced_code attributes['style'] = 'source' @@ -806,45 +813,38 @@ end else language = language.lstrip end if language.nil_or_empty? - if document.attributes.key? 'source-language' - attributes['language'] = document.attributes['source-language'] || 'text' + if doc_attrs.key? 'source-language' + attributes['language'] = doc_attrs['source-language'] || 'text' end else attributes['language'] = language end - if (attributes.key? 'linenums-option') || (document.attributes.key? 'source-linenums-option') + if (attributes.key? 'linenums-option') || (doc_attrs.key? 'source-linenums-option') attributes['linenums'] = '' end unless attributes.key? 'linenums' - if document.attributes.key? 'source-indent' - attributes['indent'] = document.attributes['source-indent'] + if doc_attrs.key? 'source-indent' + attributes['indent'] = doc_attrs['source-indent'] end unless attributes.key? 'indent' terminator = terminator.slice 0, 3 block = build_block(:listing, :verbatim, terminator, parent, reader, attributes) when :pass block = build_block(block_context, :raw, terminator, parent, reader, attributes) when :stem, :latexmath, :asciimath - if block_context == :stem - attributes['style'] = if (explicit_stem_syntax = attributes[2]) - explicit_stem_syntax.include?('tex') ? 'latexmath' : 'asciimath' - elsif (default_stem_syntax = document.attributes['stem']).nil_or_empty? - 'asciimath' - else - default_stem_syntax - end - end + attributes['style'] = STEM_TYPE_ALIASES[attributes[2] || doc_attrs['stem']] if block_context == :stem block = build_block(:stem, :raw, terminator, parent, reader, attributes) when :open, :sidebar block = build_block(block_context, :compound, terminator, parent, reader, attributes) when :table - block_reader = Reader.new reader.read_lines_until(:terminator => terminator, :skip_line_comments => true), reader.cursor + block_cursor = reader.cursor + block_reader = Reader.new reader.read_lines_until(:terminator => terminator, :skip_line_comments => true, :context => :table, :cursor => :at_mark), block_cursor # NOTE it's very rare that format is set when using a format hint char, so short-circuit unless terminator.start_with? '|', '!' # NOTE infer dsv once all other format hint chars are ruled out attributes['format'] ||= (terminator.start_with? ',') ? 'csv' : 'dsv' end @@ -871,36 +871,33 @@ attributes.clear return end else # this should only happen if there's a misconfiguration - raise %(Unsupported block type #{block_context} at #{reader.line_info}) + raise %(Unsupported block type #{block_context} at #{reader.cursor}) end end end # FIXME we've got to clean this up, it's horrible! - block.source_location = source_location if source_location + block.source_location = reader.cursor_at_mark if document.sourcemap # FIXME title should be assigned when block is constructed block.title = attributes.delete 'title' if attributes.key? 'title' - #unless attributes.key? 'reftext' - # attributes['reftext'] = document.attributes['reftext'] if document.attributes.key? 'reftext' - #end # TODO eventually remove the style attribute from the attributes hash #block.style = attributes.delete 'style' block.style = attributes['style'] if (block_id = (block.id ||= attributes['id'])) unless document.register :refs, [block_id, block, attributes['reftext'] || (block.title? ? block.title : nil)] - warn %(asciidoctor: WARNING: #{this_path}: line #{this_lineno}: id assigned to block already in use: #{block_id}) + logger.warn message_with_context %(id assigned to block already in use: #{block_id}), :source_location => reader.cursor_at_mark end end # FIXME remove the need for this update! block.attributes.update(attributes) unless attributes.empty? block.lock_in_subs - #if document.attributes.key? :pending_attribute_entries - # document.attributes.delete(:pending_attribute_entries).each do |entry| + #if doc_attrs.key? :pending_attribute_entries + # doc_attrs.delete(:pending_attribute_entries).each do |entry| # entry.save_to block.attributes # end #end if block.sub? :callouts @@ -1013,19 +1010,20 @@ lines = read_paragraph_lines reader, false, :skip_line_comments => true, :skip_processing => skip_processing # QUESTION check for empty lines after grabbing lines for simple content model? end block_reader = nil elsif parse_as_content_model != :compound - lines = reader.read_lines_until :terminator => terminator, :skip_processing => skip_processing + lines = reader.read_lines_until :terminator => terminator, :skip_processing => skip_processing, :context => block_context, :cursor => :at_mark block_reader = nil # terminator is false when reader has already been prepared elsif terminator == false lines = nil block_reader = reader else lines = nil - block_reader = Reader.new reader.read_lines_until(:terminator => terminator, :skip_processing => skip_processing), reader.cursor + block_cursor = reader.cursor + block_reader = Reader.new reader.read_lines_until(:terminator => terminator, :skip_processing => skip_processing, :context => block_context, :cursor => :at_mark), block_cursor end if content_model == :verbatim if (indent = attributes['indent']) adjust_indentation! lines, indent, (attributes['tabsize'] || parent.document.attributes['tabsize']) @@ -1079,31 +1077,28 @@ # reader - The Reader containing the lines to process # parent - The parent Block to which to attach the parsed blocks # # Returns nothing. def self.parse_blocks(reader, parent) - while ((block = next_block reader, parent) && parent << block) || reader.has_more_lines? + while ((block = next_block reader, parent) && parent.blocks << block) || reader.has_more_lines? end end - # Internal: Parse and construct an item list (ordered or unordered) from the current position of the Reader + # Internal: Parse and construct an ordered or unordered list at the current position of the Reader # - # reader - The Reader from which to retrieve the outline list + # reader - The Reader from which to retrieve the list # list_type - A Symbol representing the list type (:olist for ordered, :ulist for unordered) - # parent - The parent Block to which this outline list belongs + # parent - The parent Block to which this list belongs + # style - The block style assigned to this list (optional, default: nil) # - # Returns the Block encapsulating the parsed outline (unordered or ordered) list - def self.next_item_list(reader, list_type, parent) + # Returns the Block encapsulating the parsed unordered or ordered list + def self.next_list(reader, list_type, parent, style = nil) list_block = List.new(parent, list_type) - if parent.context == list_type - list_block.level = parent.level + 1 - else - list_block.level = 1 - end + list_block.level = parent.context == list_type ? (parent.level + 1) : 1 - while reader.has_more_lines? && (match = ListRxMap[list_type].match(reader.peek_line)) - marker = resolve_list_marker(list_type, match[1]) + while reader.has_more_lines? && ListRxMap[list_type] =~ reader.peek_line + match, marker = $~, resolve_list_marker(list_type, $1) # if we are moving to the next item, and the marker is different # determine if we are moving up or down in nesting if list_block.items? && marker != list_block.items[0].marker # assume list is nested by default, but then check to see if we are @@ -1120,21 +1115,21 @@ else this_item_level = list_block.level end if !list_block.items? || this_item_level == list_block.level - list_item = next_list_item(reader, list_block, match) + list_item = next_list_item(reader, list_block, match, nil, style) elsif this_item_level < list_block.level # leave this block break elsif this_item_level > list_block.level # If this next list level is down one from the # current Block's, append it to content of the current list item list_block.items[-1] << next_block(reader, list_block) end - list_block << list_item if list_item + list_block.items << list_item if list_item list_item = nil reader.skip_blank_lines || break end @@ -1157,10 +1152,29 @@ found = true } if text.include? '<' found end + # Internal: Catalog a matched inline anchor. + # + # id - The String id of the anchor + # reftext - The optional String reference text of the anchor + # node - The AbstractNode parent node of the anchor node + # location - The source location (file and line) where the anchor was found + # doc - The document to which the node belongs; computed from node if not specified + # + # Returns nothing + def self.catalog_inline_anchor id, reftext, node, location, doc = nil + doc ||= node.document + reftext = doc.sub_attributes reftext if reftext && (reftext.include? ATTR_REF_HEAD) + unless doc.register :refs, [id, (Inline.new node, :anchor, reftext, :type => :ref, :id => id), reftext] + location = location.cursor if Reader === location + logger.warn message_with_context %(id assigned to anchor already in use: #{id}), :source_location => location + end + nil + end + # Internal: Catalog any inline anchors found in the text (but don't convert) # # text - The String text in which to look for inline anchors # block - The block in which the references should be searched # document - The current Document on which the references are stored @@ -1178,29 +1192,28 @@ reftext = reftext.gsub '\]', ']' if reftext.include? ']' next if (reftext.include? ATTR_REF_HEAD) && (reftext = document.sub_attributes reftext).empty? end end unless document.register :refs, [id, (Inline.new block, :anchor, reftext, :type => :ref, :id => id), reftext] - warn %(asciidoctor: WARNING: #{document.reader.path}: id assigned to anchor already in use: #{id}) + logger.warn message_with_context %(id assigned to anchor already in use: #{id}), :source_location => document.reader.cursor_at_prev_line end end if (text.include? '[[') || (text.include? 'or:') nil end # Internal: Catalog the bibliography inline anchor found in the start of the list item (but don't convert) # - # text - The String text in which to look for an inline bibliography anchor - # block - The ListItem block in which the reference should be searched - # document - The current document in which the reference is stored + # id - The String id of the anchor + # reftext - The optional String reference text of the anchor + # node - The AbstractNode parent node of the anchor node + # reader - The source Reader for the current Document, positioned at the current list item # # Returns nothing - def self.catalog_inline_biblio_anchor text, block, document - if InlineBiblioAnchorRx =~ text - # QUESTION should we sub attributes in reftext (like with regular anchors)? - unless document.register :refs, [(id = $1), (Inline.new block, :anchor, (reftext = %([#{$2 || id}])), :type => :bibref, :id => id), reftext] - warn %(asciidoctor: WARNING: #{document.reader.path}: id assigned to bibliography anchor already in use: #{id}) - end + def self.catalog_inline_biblio_anchor id, reftext, node, reader + # QUESTION should we sub attributes in reftext (like with regular anchors)? + unless node.document.register :refs, [id, (Inline.new node, :anchor, (styled_reftext = %([#{reftext || id}])), :type => :bibref, :id => id), styled_reftext] + logger.warn message_with_context %(id assigned to bibliography anchor already in use: #{id}), :source_location => reader.cursor end nil end # Internal: Parse and construct a description list Block from the current position of the Reader @@ -1218,114 +1231,127 @@ sibling_pattern = DescriptionListSiblingRx[match[2]] # NOTE skip the match on the first time through as we've already done it (emulates begin...while) while match || (reader.has_more_lines? && (match = sibling_pattern.match(reader.peek_line))) term, item = next_list_item(reader, list_block, match, sibling_pattern) - if previous_pair && !previous_pair[-1] - previous_pair.pop + if previous_pair && !previous_pair[1] previous_pair[0] << term - previous_pair << item + previous_pair[1] = item else - # FIXME this misses the automatic parent assignment list_block.items << (previous_pair = [[term], item]) end match = nil end list_block end - # Internal: Parse and construct the next ListItem for the current bulleted - # (unordered or ordered) list Block, callout lists included, or the next - # term ListItem and description ListItem pair for the description list Block. + # Internal: Parse and construct the next ListItem for the current list Block + # (unordered, ordered, or callout list) or the term ListItem and description + # ListItem pair for the description list Block. # # First collect and process all the lines that constitute the next list # item for the parent list (according to its type). Next, parse those lines # into blocks and associate them with the ListItem (in the case of a # description list, the description ListItem). Finally, fold the first block # into the item's text attribute according to rules described in ListItem. # # reader - The Reader from which to retrieve the next list item # list_block - The parent list Block of this ListItem. Also provides access to the list type. # match - The match Array which contains the marker and text (first-line) of the ListItem - # sibling_trait - The list marker or the Regexp to match a sibling item + # sibling_trait - The list marker or the Regexp to match a sibling item (optional, default: nil) + # style - The block style assigned to this list (optional, default: nil) # # Returns the next ListItem or ListItem pair (depending on the list type) # for the parent list Block. - def self.next_list_item(reader, list_block, match, sibling_trait = nil) + def self.next_list_item(reader, list_block, match, sibling_trait = nil, style = nil) if (list_type = list_block.context) == :dlist - list_term = ListItem.new(list_block, match[1]) - list_item = ListItem.new(list_block, match[3]) - has_text = !match[3].nil_or_empty? - else - # Create list item using first line as the text of the list item - text = match[2] - checkbox = false - if list_type == :ulist && text.start_with?('[') - if text.start_with?('[ ] ') - checkbox = true - checked = false - text = text[3..-1].lstrip - elsif text.start_with?('[x] ', '[*] ') - checkbox = true - checked = true - text = text[3..-1].lstrip + dlist = true + list_term = ListItem.new(list_block, (term_text = match[1])) + if term_text.start_with?('[[') && LeadingInlineAnchorRx =~ term_text + catalog_inline_anchor $1, ($2 || $'.lstrip), list_term, reader + end + has_text = true if (item_text = match[3]) + list_item = ListItem.new(list_block, item_text) + if list_block.document.sourcemap + list_term.source_location = reader.cursor + if has_text + list_item.source_location = list_term.source_location + else + sourcemap_assignment_deferred = true end end - list_item = ListItem.new(list_block, text) - - if checkbox - # FIXME checklist never makes it into the options attribute - list_block.attributes['checklist-option'] = '' - list_item.attributes['checkbox'] = '' - list_item.attributes['checked'] = '' if checked - end - - sibling_trait ||= resolve_list_marker(list_type, match[1], list_block.items.size, true, reader) - list_item.marker = sibling_trait + else has_text = true + list_item = ListItem.new(list_block, (item_text = match[2])) + list_item.source_location = reader.cursor if list_block.document.sourcemap + if list_type == :ulist + list_item.marker = (sibling_trait ||= match[1]) + if item_text.start_with?('[') + if style && style == 'bibliography' + if InlineBiblioAnchorRx =~ item_text + catalog_inline_biblio_anchor $1, $2, list_item, reader + end + elsif item_text.start_with?('[[') + if LeadingInlineAnchorRx =~ item_text + catalog_inline_anchor $1, $2, list_item, reader + end + elsif item_text.start_with?('[ ] ', '[x] ', '[*] ') + # FIXME next_block wipes out update to options attribute + #list_block.set_option 'checklist' unless list_block.attributes['checklist-option'] + list_block.attributes['checklist-option'] = '' + list_item.attributes['checkbox'] = '' + list_item.attributes['checked'] = '' unless item_text.start_with? '[ ' + list_item.text = item_text.slice(4, item_text.length) + end + end + elsif list_type == :olist + list_item.marker = (sibling_trait ||= resolve_ordered_list_marker(match[1], list_block.items.size, true, reader)) + else # :colist + list_item.marker = (sibling_trait ||= '<1>') + end end - # first skip the line with the marker / term + # first skip the line with the marker / term (it gets put back onto the reader by next_block) reader.shift - list_item_reader = Reader.new read_lines_for_list_item(reader, list_type, sibling_trait, has_text), reader.cursor + block_cursor = reader.cursor + list_item_reader = Reader.new read_lines_for_list_item(reader, list_type, sibling_trait, has_text), block_cursor if list_item_reader.has_more_lines? + list_item.source_location = block_cursor if sourcemap_assignment_deferred # NOTE peek on the other side of any comment lines comment_lines = list_item_reader.skip_line_comments if (subsequent_line = list_item_reader.peek_line) list_item_reader.unshift_lines comment_lines unless comment_lines.empty? if (continuation_connects_first_block = subsequent_line.empty?) content_adjacent = false else content_adjacent = true - # treat lines as paragraph text if continuation does not connect first block (i.e., has_text = false) - has_text = false unless list_type == :dlist + # treat lines as paragraph text if continuation does not connect first block (i.e., has_text = nil) + has_text = nil unless dlist end else # NOTE we have no use for any trailing comment lines we might have found continuation_connects_first_block = false content_adjacent = false end - # only relevant for :dlist - options = {:text => !has_text} + # reader is confined to boundaries of list, which means only blocks will be found (no sections) + if (block = next_block(list_item_reader, list_item, (attrs = {}), :text => !has_text)) + list_item.blocks << block + end - # we can look for blocks until lines are exhausted without worrying about - # sections since reader is confined to boundaries of list - while ((block = next_block list_item_reader, list_item, {}, options) && list_item << block) || - list_item_reader.has_more_lines? + while list_item_reader.has_more_lines? + if (block = next_block(list_item_reader, list_item, attrs)) + list_item.blocks << block + end end list_item.fold_first(continuation_connects_first_block, content_adjacent) end - if list_type == :dlist - if list_item.text? || list_item.blocks? - [list_term, list_item] - else - [list_term, nil] - end + if dlist + list_item.text? || list_item.blocks? ? [list_term, list_item] : [list_term] else list_item end end @@ -1394,11 +1420,11 @@ if (match = is_delimited_block?(this_line, true)) if continuation == :active buffer << this_line # grab all the lines in the block, leaving the delimiters in place # we're being more strict here about the terminator, but I think that's a good thing - buffer.concat reader.read_lines_until(:terminator => match.terminator, :read_last_line => true) + buffer.concat reader.read_lines_until(:terminator => match.terminator, :read_last_line => true, :context => nil) continuation = :inactive else break end # technically BlockAttributeLineRx only breaks if ensuing line is not a list item @@ -1526,62 +1552,61 @@ # current position of the reader. # # reader - the source reader # parent - the parent Section or Document of this Section # attributes - a Hash of attributes to assign to this section (default: {}) + # + # Returns the section [Block] def self.initialize_section reader, parent, attributes = {} document = parent.document + book = (doctype = document.doctype) == 'book' source_location = reader.cursor if document.sourcemap - sect_id, sect_reftext, sect_title, sect_level, atx = parse_section_title reader, document + sect_style = attributes[1] + sect_id, sect_reftext, sect_title, sect_level, sect_atx = parse_section_title reader, document, attributes['id'] + if sect_reftext attributes['reftext'] = sect_reftext - elsif attributes.key? 'reftext' + else sect_reftext = attributes['reftext'] - #elsif document.attributes.key? 'reftext' - # sect_reftext = attributes['reftext'] = document.attributes['reftext'] end - # parse style, id, and role attributes from first positional attribute if present - style = attributes[1] ? (parse_style_attribute attributes, reader) : nil - if style - if style == 'abstract' && document.doctype == 'book' + if sect_style + if book && sect_style == 'abstract' sect_name, sect_level = 'chapter', 1 else - sect_name, sect_special = style, true + sect_name, sect_special = sect_style, true sect_level = 1 if sect_level == 0 - sect_numbered_force = style == 'appendix' + sect_numbered = sect_style == 'appendix' end + elsif book + sect_name = sect_level == 0 ? 'part' : (sect_level > 1 ? 'section' : 'chapter') + elsif doctype == 'manpage' && (sect_title.casecmp 'synopsis') == 0 + sect_name, sect_special = 'synopsis', true else - case document.doctype - when 'book' - sect_name = sect_level == 0 ? 'part' : (sect_level == 1 ? 'chapter' : 'section') - when 'manpage' - if (sect_title.casecmp 'synopsis') == 0 - sect_name, sect_special = 'synopsis', true - else - sect_name = 'section' - end - else - sect_name = 'section' - end + sect_name = 'section' end - section = Section.new parent, sect_level, false + section = Section.new parent, sect_level section.id, section.title, section.sectname, section.source_location = sect_id, sect_title, sect_name, source_location - # TODO honor special section numbering option (#661) if sect_special section.special = true - section.numbered = true if sect_numbered_force - elsif sect_level > 0 && (document.attributes.key? 'sectnums') - section.numbered = section.special ? (parent.context == :section && parent.numbered) : true + if sect_numbered + section.numbered = true + elsif document.attributes['sectnums'] == 'all' + section.numbered = book && sect_level == 1 ? :chapter : true + end + elsif document.attributes['sectnums'] && sect_level > 0 + # NOTE a special section here is guaranteed to be nested in another section + section.numbered = section.special ? parent.numbered && true : true + elsif book && sect_level == 0 && document.attributes['partnums'] + section.numbered = true end # generate an ID if one was not embedded or specified as anchor above section title - if (id = section.id ||= (attributes['id'] || - ((document.attributes.key? 'sectids') ? (Section.generate_id section.title, document) : nil))) + if (id = section.id ||= ((document.attributes.key? 'sectids') ? (Section.generate_id section.title, document) : nil)) unless document.register :refs, [id, section, sect_reftext || section.title] - warn %(asciidoctor: WARNING: #{reader.path}: line #{reader.lineno - (atx ? 1 : 2)}: id assigned to section already in use: #{id}) + logger.warn message_with_context %(id assigned to section already in use: #{id}), :source_location => (reader.cursor_at_line reader.lineno - (sect_atx ? 1 : 2)) end end section.update_attributes(attributes) reader.skip_blank_lines @@ -1594,16 +1619,17 @@ # reader - the source Reader # attributes - a Hash of attributes collected above the current line # # Returns the Integer section level if the Reader is positioned at a section title or nil otherwise def self.is_next_line_section?(reader, attributes) - if (style = attributes[1]) && (style.start_with? 'discrete', 'float') && (DiscreteHeadingStyleRx.match? style) + if (style = attributes[1]) && (style == 'discrete' || style == 'float') return - elsif reader.has_more_lines? - Compliance.underline_style_section_titles ? - is_section_title?(*reader.peek_lines(2, style && style == 'comment')) : - atx_section_title?(reader.peek_line) + elsif Compliance.underline_style_section_titles + next_lines = reader.peek_lines 2, style && style == 'comment' + is_section_title?(next_lines[0] || '', next_lines[1]) + else + atx_section_title?(reader.peek_line || '') end end # Internal: Convenience API for checking if the next line on the Reader is the document title # @@ -1699,32 +1725,32 @@ # # => true # # Returns an 5-element [Array] containing the id (String), reftext (String), # title (String), level (Integer), and flag (Boolean) indicating whether an # atx section title was matched, or nothing. - def self.parse_section_title(reader, document) - sect_id = sect_reftext = nil + def self.parse_section_title(reader, document, sect_id = nil) + sect_reftext = nil line1 = reader.read_line if Compliance.markdown_syntax ? ((line1.start_with? '=', '#') && ExtAtxSectionTitleRx =~ line1) : ((line1.start_with? '=') && AtxSectionTitleRx =~ line1) # NOTE level is 1 less than number of line markers sect_level, sect_title, atx = $1.length - 1, $2, true if sect_title.end_with?(']]') && InlineSectionAnchorRx =~ sect_title && !$1 # escaped sect_title, sect_id, sect_reftext = (sect_title.slice 0, sect_title.length - $&.length), $2, $3 - end + end unless sect_id elsif Compliance.underline_style_section_titles && (line2 = reader.peek_line(true)) && (sect_level = SETEXT_SECTION_LEVELS[line2_ch1 = line2.chr]) && line2_ch1 * (line2_len = line2.length) == line2 && (sect_title = SetextSectionTitleRx =~ line1 && $1) && (line_length(line1) - line2_len).abs < 2 atx = false if sect_title.end_with?(']]') && InlineSectionAnchorRx =~ sect_title && !$1 # escaped sect_title, sect_id, sect_reftext = (sect_title.slice 0, sect_title.length - $&.length), $2, $3 - end + end unless sect_id reader.shift else - raise %(Unrecognized section at #{reader.prev_line_info}) + raise %(Unrecognized section at #{reader.cursor_at_prev_line}) end sect_level += document.attr('leveloffset').to_i if document.attr?('leveloffset') [sect_id, sect_reftext, sect_title, sect_level, atx] end @@ -1756,11 +1782,11 @@ # data = ["Author Name <author@example.org>\n", "v1.0, 2012-12-21: Coincide w/ end of world.\n"] # parse_header_metadata(Reader.new data, nil, :normalize => true) # # => {'author' => 'Author Name', 'firstname' => 'Author', 'lastname' => 'Name', 'email' => 'author@example.org', # # 'revnumber' => '1.0', 'revdate' => '2012-12-21', 'revremark' => 'Coincide w/ end of world.'} def self.parse_header_metadata(reader, document = nil) - # NOTE this will discard away any comment lines, but not skip blank lines + # NOTE this will discard any comment lines, but not skip blank lines process_attribute_entries reader, document metadata, implicit_author, implicit_authors = {}, nil, nil if reader.has_more_lines? && !reader.next_line_empty? @@ -1819,10 +1845,12 @@ # NOTE this will discard any comment lines, but not skip blank lines process_attribute_entries reader, document reader.skip_blank_lines + else + author_metadata = {} end # process author attribute entries that override (or stand in for) the implicit author line if document if document.attributes.key?('author') && (author_line = document.attributes['author']) != implicit_author @@ -1861,11 +1889,13 @@ else author_metadata = {} end end - unless author_metadata.empty? + if author_metadata.empty? + metadata['authorcount'] ||= (document.attributes['authorcount'] = 0) + else document.attributes.update author_metadata # special case if !document.attributes.key?('email') && document.attributes.key?('email_1') document.attributes['email'] = document.attributes['email_1'] @@ -1885,26 +1915,27 @@ # semicolon-separated entries in the author line (default: true) # # returns a Hash of author metadata def self.process_authors author_line, names_only = false, multiple = true author_metadata = {} + author_idx = 0 keys = ['author', 'authorinitials', 'firstname', 'middlename', 'lastname', 'email'] author_entries = multiple ? (author_line.split ';').map {|it| it.strip } : Array(author_line) - author_entries.each_with_index do |author_entry, idx| + author_entries.each do |author_entry| next if author_entry.empty? + author_idx += 1 key_map = {} - if idx == 0 + if author_idx == 1 keys.each do |key| key_map[key.to_sym] = key end else keys.each do |key| - key_map[key.to_sym] = %(#{key}_#{idx + 1}) + key_map[key.to_sym] = %(#{key}_#{author_idx}) end end - segments = nil if names_only # when parsing an attribute value # QUESTION should we rstrip author_entry? if author_entry.include? '<' author_metadata[key_map[:author]] = author_entry.tr('_', ' ') author_entry = author_entry.gsub XmlSanitizeRx, '' @@ -1937,24 +1968,24 @@ else author_metadata[key_map[:author]] = author_metadata[key_map[:firstname]] = fname = author_entry.squeeze(' ').strip author_metadata[key_map[:authorinitials]] = fname.chr end - author_metadata['authorcount'] = idx + 1 - # only assign the _1 attributes if there are multiple authors - if idx == 1 - keys.each do |key| - author_metadata[%(#{key}_1)] = author_metadata[key] if author_metadata.key? key - end - end - if idx == 0 + if author_idx == 1 author_metadata['authors'] = author_metadata[key_map[:author]] else + # only assign the _1 attributes once we see the second author + if author_idx == 2 + keys.each do |key| + author_metadata[%(#{key}_1)] = author_metadata[key] if author_metadata.key? key + end + end author_metadata['authors'] = %(#{author_metadata['authors']}, #{author_metadata[key_map[:author]]}) end end + author_metadata['authorcount'] = author_idx author_metadata end # Internal: Parse lines of metadata until a line of metadata is not found. # @@ -2010,11 +2041,15 @@ attributes['reftext'] = (reftext.include? ATTR_REF_HEAD) ? (document.sub_attributes reftext) : reftext end return true end elsif (next_line.end_with? ']') && BlockAttributeListRx =~ next_line - document.parse_attributes $1, [], :sub_input => true, :into => attributes + current_style = attributes[1] + # extract id, role, and options from first positional attribute and remove, if present + if (document.parse_attributes $1, [], :sub_input => true, :into => attributes)[1] + attributes[1] = (parse_style_attribute attributes, reader) || current_style + end return true end elsif normal && (next_line.start_with? '.') if BlockTitleRx =~ next_line # NOTE title doesn't apply to section, but we need to stash it for the first block @@ -2025,11 +2060,11 @@ elsif !normal || (next_line.start_with? '/') if next_line == '//' return true elsif normal && '/' * (ll = next_line.length) == next_line unless ll == 3 - reader.read_lines_until :skip_first_line => true, :preserve_last_line => true, :terminator => next_line, :skip_processing => true + reader.read_lines_until :terminator => next_line, :skip_first_line => true, :preserve_last_line => true, :skip_processing => true, :context => :comment return true end else return true unless next_line.start_with? '///' end if next_line.start_with? '//' @@ -2039,10 +2074,13 @@ return true end end end + # Process consecutive attribute entry lines, ignoring adjacent line comments and comment blocks. + # + # Returns nothing def self.process_attribute_entries reader, document, attributes = nil reader.skip_comment_lines while process_attribute_entry reader, document, attributes # discard line just processed reader.shift @@ -2133,16 +2171,16 @@ # ordinal - The position of this list item in the list # validate - Whether to validate the value of the marker # # Returns the String 0-index marker for this list item def self.resolve_list_marker(list_type, marker, ordinal = 0, validate = false, reader = nil) - if list_type == :olist - (marker.start_with? '.') ? marker : (resolve_ordered_list_marker marker, ordinal, validate, reader) - elsif list_type == :colist - '<1>' - else + if list_type == :ulist marker + elsif list_type == :olist + resolve_ordered_list_marker(marker, ordinal, validate, reader) + else # :colist + '<1>' end end # Internal: Resolve the 0-index marker for this ordered list item # @@ -2164,10 +2202,11 @@ # Parser.resolve_ordered_list_marker(marker, 1, true) # # => 'A.' # # Returns the String of the first marker in this number series def self.resolve_ordered_list_marker(marker, ordinal = 0, validate = false, reader = nil) + return marker if marker.start_with? '.' expected = actual = nil case ORDERED_LIST_STYLES.find {|s| OrderedListMarkerRxMap[s].match? marker } when :arabic if validate expected = ordinal + 1 @@ -2186,26 +2225,24 @@ actual = marker.chop # remove trailing . end marker = 'A.' when :lowerroman if validate - # TODO report this in roman numerals; see https://github.com/jamesshipton/roman-numeral/blob/master/lib/roman_numeral.rb - expected = ordinal + 1 - actual = roman_numeral_to_int(marker.chop) # remove trailing ) and coerce to int + expected = Helpers.int_to_roman(ordinal + 1).downcase + actual = marker.chop # remove trailing ) end marker = 'i)' when :upperroman if validate - # TODO report this in roman numerals; see https://github.com/jamesshipton/roman-numeral/blob/master/lib/roman_numeral.rb - expected = ordinal + 1 - actual = roman_numeral_to_int(marker.chop) # remove trailing ) and coerce to int + expected = Helpers.int_to_roman(ordinal + 1) + actual = marker.chop # remove trailing ) end marker = 'I)' end if validate && expected != actual - warn %(asciidoctor: WARNING: #{reader.line_info}: list item index: expected #{expected}, got #{actual}) + logger.warn message_with_context %(list item index: expected #{expected}, got #{actual}), :source_location => reader.cursor end marker end @@ -2219,22 +2256,17 @@ # Returns a Boolean indicating whether this line is a sibling list item given # the criteria provided def self.is_sibling_list_item?(line, list_type, sibling_trait) if ::Regexp === sibling_trait matcher = sibling_trait - expected_marker = false else matcher = ListRxMap[list_type] expected_marker = sibling_trait end - if (m = matcher.match(line)) - if expected_marker - expected_marker == resolve_list_marker(list_type, m[1]) - else - true - end + if matcher =~ line + expected_marker ? expected_marker == resolve_list_marker(list_type, $1) : true else false end end @@ -2261,11 +2293,11 @@ parser_ctx = Table::ParserContext.new table_reader, table, attributes format, loop_idx, implicit_header_boundary = parser_ctx.format, -1, nil implicit_header = true unless skipped > 0 || (attributes.key? 'header-option') || (attributes.key? 'noheader-option') while (line = table_reader.read_line) - if (loop_idx += 1) > 0 && line.empty? + if (beyond_first = (loop_idx += 1) > 0) && line.empty? line = nil implicit_header_boundary += 1 if implicit_header_boundary elsif format == 'psv' if parser_ctx.starts_with_delimiter? line line = line.slice 1, line.length @@ -2283,62 +2315,67 @@ implicit_header, implicit_header_boundary = false, nil end end end - # NOTE implicit header is offset by at least one blank line; implicit_header_boundary tracks size of gap - if loop_idx == 0 && implicit_header - if table_reader.has_more_lines? && table_reader.peek_line.empty? - implicit_header_boundary = 1 - else - implicit_header = false + unless beyond_first + table_reader.mark + # NOTE implicit header is offset by at least one blank line; implicit_header_boundary tracks size of gap + if implicit_header + if table_reader.has_more_lines? && table_reader.peek_line.empty? + implicit_header_boundary = 1 + else + implicit_header = false + end end end # this loop is used for flow control; internal logic controls how many times it executes while true if line && (m = parser_ctx.match_delimiter line) + pre_match, post_match = m.pre_match, m.post_match case format when 'csv' - if parser_ctx.buffer_has_unclosed_quotes? m.pre_match - break if (line = parser_ctx.skip_past_delimiter m).empty? + if parser_ctx.buffer_has_unclosed_quotes? pre_match + parser_ctx.skip_past_delimiter pre_match + break if (line = post_match).empty? redo end - parser_ctx.buffer = %(#{parser_ctx.buffer}#{m.pre_match}) + parser_ctx.buffer = %(#{parser_ctx.buffer}#{pre_match}) when 'dsv' - if m.pre_match.end_with? '\\' - if (line = parser_ctx.skip_past_escaped_delimiter m).empty? + if pre_match.end_with? '\\' + parser_ctx.skip_past_escaped_delimiter pre_match + if (line = post_match).empty? parser_ctx.buffer = %(#{parser_ctx.buffer}#{LF}) parser_ctx.keep_cell_open break end redo end - parser_ctx.buffer = %(#{parser_ctx.buffer}#{m.pre_match}) + parser_ctx.buffer = %(#{parser_ctx.buffer}#{pre_match}) else # psv - if m.pre_match.end_with? '\\' - if (line = parser_ctx.skip_past_escaped_delimiter m).empty? + if pre_match.end_with? '\\' + parser_ctx.skip_past_escaped_delimiter pre_match + if (line = post_match).empty? parser_ctx.buffer = %(#{parser_ctx.buffer}#{LF}) parser_ctx.keep_cell_open break end redo end - next_cellspec, cell_text = parse_cellspec m.pre_match + next_cellspec, cell_text = parse_cellspec pre_match parser_ctx.push_cellspec next_cellspec parser_ctx.buffer = %(#{parser_ctx.buffer}#{cell_text}) end # don't break if empty to preserve empty cell found at end of line (see issue #1106) - line = nil if (line = m.post_match).empty? + line = nil if (line = post_match).empty? parser_ctx.close_cell else # no other delimiters to see here; suck up this line into the buffer and move on parser_ctx.buffer = %(#{parser_ctx.buffer}#{line}#{LF}) case format when 'csv' - # QUESTION make stripping endlines in csv data an option? (unwrap-option?) - parser_ctx.buffer = %(#{parser_ctx.buffer.rstrip} ) if parser_ctx.buffer_has_unclosed_quotes? implicit_header, implicit_header_boundary = false, nil if implicit_header_boundary && loop_idx == 0 parser_ctx.keep_cell_open else parser_ctx.close_cell true @@ -2410,13 +2447,16 @@ if !rowspec.nil_or_empty? && TableCellVertAlignments.key?(rowspec) spec['valign'] = TableCellVertAlignments[rowspec] end end - # to_i permits us to support percentage width by stripping the % - # NOTE this is slightly out of compliance w/ AsciiDoc, but makes way more sense - spec['width'] = (m[3] ? m[3].to_i : 1) + if (width = m[3]) + # to_i will strip the optional % + spec['width'] = width == '~' ? -1 : width.to_i + else + spec['width'] = 1 + end # make this an operation if m[4] && TableCellStyles.key?(m[4]) spec['style'] = TableCellStyles[m[4]] end @@ -2525,19 +2565,27 @@ type, collector, parsed = :style, [], {} # QUESTION should this be a private method? (though, it's never called if shorthand isn't used) save_current = lambda { if collector.empty? unless type == :style - warn %(asciidoctor: WARNING:#{reader ? " #{reader.prev_line_info}:" : nil} invalid empty #{type} detected in style attribute) + if reader + logger.warn message_with_context %(invalid empty #{type} detected in style attribute), :source_location => reader.cursor_at_prev_line + else + logger.warn %(invalid empty #{type} detected in style attribute) + end end else case type when :role, :option (parsed[type] ||= []) << collector.join when :id if parsed.key? :id - warn %(asciidoctor: WARNING:#{reader ? " #{reader.prev_line_info}:" : nil} multiple ids detected in style attribute) + if reader + logger.warn message_with_context 'multiple ids detected in style attribute', :source_location => reader.cursor_at_prev_line + else + logger.warn 'multiple ids detected in style attribute' + end end parsed[type] = collector.join else parsed[type] = collector.join end @@ -2569,19 +2617,17 @@ parsed_style = attributes['style'] = parsed[:style] if parsed.key? :style attributes['id'] = parsed[:id] if parsed.key? :id - attributes['role'] = parsed[:role] * ' ' if parsed.key? :role + if parsed.key? :role + attributes['role'] = (existing_role = attributes['role']).nil_or_empty? ? (parsed[:role].join ' ') : %(#{existing_role} #{parsed[:role].join ' '}) + end if parsed.key? :option - (options = parsed[:option]).each {|option| attributes[%(#{option}-option)] = '' } - if (existing_opts = attributes['options']) - attributes['options'] = (options + existing_opts.split(',')) * ',' - else - attributes['options'] = options * ',' - end + (opts = parsed[:option]).each {|opt| attributes[%(#{opt}-option)] = '' } + attributes['options'] = (existing_opts = attributes['options']).nil_or_empty? ? (opts.join ',') : %(#{existing_opts},#{opts.join ','}) end parsed_style end else @@ -2707,30 +2753,8 @@ # # sanitize_attribute_name('Foo 3 #-Billy') # => 'foo3-billy' def self.sanitize_attribute_name(name) name.gsub(InvalidAttributeNameCharsRx, '').downcase - end - - # Internal: Converts a Roman numeral to an integer value. - # - # value - The String Roman numeral to convert - # - # Returns the Integer for this Roman numeral - def self.roman_numeral_to_int(value) - value = value.downcase - digits = { 'i' => 1, 'v' => 5, 'x' => 10 } - result = 0 - - (0..value.length - 1).each {|i| - digit = digits[value[i..i]] - if i + 1 < value.length && digits[value[i+1..i+1]] > digit - result -= digit - else - result += digit - end - } - - result end end end