# encoding: UTF-8 module Asciidoctor # Public: Methods to parse lines of AsciiDoc into an object hierarchy # representing the structure of the document. All methods are class methods and # should be invoked from the Parser class. The main entry point is ::next_block. # No Parser instances shall be discovered running around. (Any attempt to # instantiate a Parser will be futile). # # The object hierarchy created by the Parser consists of zero or more Section # and Block objects. Section objects may be nested and a Section object # contains zero or more Block objects. Block objects may be nested, but may # only contain other Block objects. Block objects which represent lists may # contain zero or more ListItem objects. # # Examples # # # Create a Reader for the AsciiDoc lines and retrieve the next block from it. # # Parser.next_block requires a parent, so we begin by instantiating an empty Document. # # doc = Document.new # reader = Reader.new lines # block = Parser.next_block(reader, doc) # block.class # # => Asciidoctor::Block class Parser BlockMatchData = Struct.new :context, :masq, :tip, :terminator # Public: Make sure the Parser object doesn't get initialized. # # Raises RuntimeError if this constructor is invoked. def initialize raise 'Au contraire, mon frere. No lexer instances will be running around.' end # Public: Parses AsciiDoc source read from the Reader into the Document # # This method is the main entry-point into the Parser when parsing a full document. # It first looks for and, if found, processes the document title. It then # proceeds to iterate through the lines in the Reader, parsing the document # into nested Sections and Blocks. # # reader - the Reader holding the source lines of the document # document - the empty Document into which the lines will be parsed # options - a Hash of options to control processing # # returns the Document object def self.parse(reader, document, options = {}) block_attributes = parse_document_header(reader, document) unless options[:header_only] while reader.has_more_lines? new_section, block_attributes = next_section(reader, document, block_attributes) document << new_section if new_section end end document end # Public: Parses the document header of the AsciiDoc source read from the Reader # # Reads the AsciiDoc source from the Reader until the end of the document # header is reached. The Document object is populated with information from # the header (document title, document attributes, etc). The document # attributes are then saved to establish a save point to which to rollback # after parsing is complete. # # This method assumes that there are no blank lines at the start of the document, # which are automatically removed by the reader. # # returns the Hash of orphan block attributes captured above the header def self.parse_document_header(reader, document) # capture any lines of block-level metadata and plow away any comment lines # that precede first block block_attributes = parse_block_metadata_lines(reader, document) # special case, block title is not allowed above document title, # carry attributes over to the document body if block_attributes.has_key?('title') return document.finalize_header block_attributes, false end # yep, document title logic in AsciiDoc is just insanity # definitely an area for spec refinement assigned_doctitle = nil unless (val = document.attributes['doctitle']).nil_or_empty? document.title = val assigned_doctitle = val end section_title = nil # check if the first line is the document title # if so, add a header to the document and parse the header metadata if is_next_line_document_title?(reader, block_attributes) source_location = reader.cursor if document.sourcemap document.id, _, doctitle, _, single_line = parse_section_title(reader, document) unless assigned_doctitle document.title = doctitle assigned_doctitle = doctitle end # default to compat-mode if document uses atx-style doctitle document.set_attribute 'compat-mode', '' unless single_line document.header.source_location = source_location if source_location document.attributes['doctitle'] = section_title = doctitle # QUESTION: should the id assignment on Document be encapsulated in the Document class? unless document.id document.id = block_attributes.delete('id') end parse_header_metadata(reader, document) end if !(val = document.attributes['doctitle']).nil_or_empty? && val != section_title document.title = val assigned_doctitle = val end # restore doctitle attribute to original assignment if assigned_doctitle document.attributes['doctitle'] = assigned_doctitle end # parse title and consume name section of manpage document parse_manpage_header(reader, document) if document.doctype == 'manpage' # NOTE block_attributes are the block-level attributes (not document attributes) that # precede the first line of content (document title, first section or first block) document.finalize_header block_attributes end # Public: Parses the manpage header of the AsciiDoc source read from the Reader # # returns Nothing def self.parse_manpage_header(reader, document) if (m = ManpageTitleVolnumRx.match(document.attributes['doctitle'])) document.attributes['mantitle'] = document.sub_attributes(m[1].rstrip.downcase) document.attributes['manvolnum'] = m[2].strip else warn %(asciidoctor: ERROR: #{reader.prev_line_info}: malformed manpage title) end reader.skip_blank_lines if is_next_line_section?(reader, {}) name_section = initialize_section(reader, document, {}) if name_section.level == 1 name_section_buffer = reader.read_lines_until(:break_on_blank_lines => true).join(' ').tr_s(' ', ' ') if (m = ManpageNamePurposeRx.match(name_section_buffer)) document.attributes['manname'] = document.sub_attributes m[1] document.attributes['manpurpose'] = m[2] # TODO parse multiple man names if document.backend == 'manpage' document.attributes['docname'] = document.attributes['manname'] document.attributes['outfilesuffix'] = %(.#{document.attributes['manvolnum']}) end else warn %(asciidoctor: ERROR: #{reader.prev_line_info}: malformed name section body) end else warn %(asciidoctor: ERROR: #{reader.prev_line_info}: name section title must be at level 1) end else warn %(asciidoctor: ERROR: #{reader.prev_line_info}: name section expected) end end # Public: Return the next section from the Reader. # # This method process block metadata, content and subsections for this # section and returns the Section object and any orphaned attributes. # # If the parent is a Document and has a header (document title), then # this method will put any non-section blocks at the start of document # into a preamble Block. If there are no such blocks, the preamble is # dropped. # # Since we are reading line-by-line, there's a chance that metadata # that should be associated with the following block gets consumed. # To deal with this case, the method returns a running Hash of # "orphaned" attributes that get passed to the next Section or Block. # # reader - the source Reader # parent - the parent Section or Document of this new section # attributes - a Hash of metadata that was left orphaned from the # previous Section. # # Examples # # source # # => "= Greetings\n\nThis is my doc.\n\n== Salutations\n\nIt is awesome." # # reader = Reader.new source, nil, :normalize => true # # create empty document to parent the section # # and hold attributes extracted from header # doc = Document.new # # Parser.next_section(reader, doc).first.title # # => "Greetings" # # Parser.next_section(reader, doc).first.title # # => "Salutations" # # returns a two-element Array containing the Section and Hash of orphaned attributes def self.next_section(reader, parent, attributes = {}) preamble = false part = false intro = false # FIXME if attributes[1] is a verbatim style, then don't check for section # check if we are at the start of processing the document # NOTE we could drop a hint in the attributes to indicate # that we are at a section title (so we don't have to check) if parent.context == :document && parent.blocks.empty? && ((has_header = parent.has_header?) || attributes.delete('invalid-header') || !is_next_line_section?(reader, attributes)) doctype = parent.doctype if has_header || (doctype == 'book' && attributes[1] != 'abstract') preamble = intro = Block.new(parent, :preamble, :content_model => :compound) if doctype == 'book' && (parent.attr? 'preface-title') preamble.title = parent.attr 'preface-title' end parent << preamble end section = parent current_level = 0 if parent.attributes.has_key? 'fragment' expected_next_levels = nil # small tweak to allow subsequent level-0 sections for book doctype elsif doctype == 'book' expected_next_levels = [0, 1] else expected_next_levels = [1] end else doctype = parent.document.doctype section = initialize_section(reader, parent, attributes) # clear attributes, except for title which carries over # section title to next block of content attributes = (title = attributes['title']) ? { 'title' => title } : {} current_level = section.level if current_level == 0 && doctype == 'book' part = !section.special # subsections in preface & appendix in multipart books start at level 2 if section.special && (['preface', 'appendix'].include? section.sectname) expected_next_levels = [current_level + 2] else expected_next_levels = [current_level + 1] end else expected_next_levels = [current_level + 1] end end reader.skip_blank_lines # Parse lines belonging to this section and its subsections until we # reach the end of this section level # # 1. first look for metadata thingies (anchor, attribute list, block title line, etc) # 2. then look for a section, recurse if found # 3. then process blocks # # We have to parse all the metadata lines before continuing with the loop, # otherwise subsequent metadata lines get interpreted as block content while reader.has_more_lines? parse_block_metadata_lines(reader, section, attributes) if (next_level = is_next_line_section? reader, attributes) next_level += section.document.attr('leveloffset', 0).to_i if next_level > current_level || (section.context == :document && next_level == 0) if next_level == 0 && doctype != 'book' warn %(asciidoctor: ERROR: #{reader.line_info}: only book doctypes can contain level 0 sections) elsif expected_next_levels && !expected_next_levels.include?(next_level) warn %(asciidoctor: WARNING: #{reader.line_info}: section title out of sequence: ) + %(expected #{expected_next_levels.size > 1 ? 'levels' : 'level'} #{expected_next_levels * ' or '}, ) + %(got level #{next_level}) end # the attributes returned are those that are orphaned new_section, attributes = next_section(reader, section, attributes) section << new_section else if next_level == 0 && doctype != 'book' warn %(asciidoctor: ERROR: #{reader.line_info}: only book doctypes can contain level 0 sections) end # close this section (and break out of the nesting) to begin a new one break end else # just take one block or else we run the risk of overrunning section boundaries block_line_info = reader.line_info if (new_block = next_block reader, (intro || section), attributes, :parse_metadata => false) # REVIEW this may be doing too much if part if !section.blocks? # if this block wasn't marked as [partintro], emulate behavior as if it had if new_block.style != 'partintro' # emulate [partintro] paragraph if new_block.context == :paragraph new_block.context = :open new_block.style = 'partintro' # emulate [partintro] open block else intro = Block.new section, :open, :content_model => :compound intro.style = 'partintro' new_block.parent = intro section << intro end end elsif section.blocks.size == 1 first_block = section.blocks[0] # open the [partintro] open block for appending if !intro && first_block.content_model == :compound #new_block.parent = (intro = first_block) warn %(asciidoctor: ERROR: #{block_line_info}: illegal block content outside of partintro block) # rebuild [partintro] paragraph as an open block elsif first_block.content_model != :compound intro = Block.new section, :open, :content_model => :compound intro.style = 'partintro' section.blocks.shift if first_block.style == 'partintro' first_block.context = :paragraph first_block.style = nil end first_block.parent = intro intro << first_block new_block.parent = intro section << intro end end end (intro || section) << new_block attributes = {} #else # # don't clear attributes if we don't find a block because they may # # be trailing attributes that didn't get associated with a block end end reader.skip_blank_lines end if part unless section.blocks? && section.blocks[-1].context == :section warn %(asciidoctor: ERROR: #{reader.line_info}: invalid part, must have at least one section (e.g., chapter, appendix, etc.)) end # NOTE we could try to avoid creating a preamble in the first place, though # that would require reworking assumptions in next_section since the preamble # is treated like an untitled section elsif preamble # implies parent == document document = parent if preamble.blocks? # unwrap standalone preamble (i.e., no sections), if permissible if Compliance.unwrap_standalone_preamble && document.blocks.size == 1 && doctype != 'book' document.blocks.shift while (child_block = preamble.blocks.shift) child_block.parent = document document << child_block end end # drop the preamble if it has no content else document.blocks.shift end end # The attributes returned here are orphaned attributes that fall at the end # of a section that need to get transfered to the next section # see "trailing block attributes transfer to the following section" in # test/attributes_test.rb for an example [section != parent ? section : nil, attributes.dup] end # Public: Return the next Section or Block object from the Reader. # # Begins by skipping over blank lines to find the start of the next Section # or Block. Processes each line of the reader in sequence until a Section or # Block is found or the reader has no more lines. # # Uses regular expressions from the Asciidoctor module to match Section # and Block delimiters. The ensuing lines are then processed according # to the type of content. # # reader - The Reader from which to retrieve the next block # parent - The Document, Section or Block to which the next block belongs # # Returns a Section or Block object holding the parsed content of the processed lines #-- # QUESTION should next_block have an option for whether it should keep looking until # a block is found? right now it bails when it encounters a line to be skipped def self.next_block(reader, parent, attributes = {}, options = {}) # Skip ahead to the block content skipped = reader.skip_blank_lines # bail if we've reached the end of the parent block or document return unless reader.has_more_lines? # check for option to find list item text only # if skipped a line, assume a list continuation was # used and block content is acceptable if (text_only = options[:text]) && skipped > 0 options.delete(:text) text_only = false end parse_metadata = options.fetch(:parse_metadata, true) #parse_sections = options.fetch(:parse_sections, false) document = parent.document if (extensions = document.extensions) block_extensions = extensions.blocks? block_macro_extensions = extensions.block_macros? else block_extensions = block_macro_extensions = false end #parent_context = parent.is_a?(Block) ? parent.context : nil in_list = (parent.is_a? List) block = nil style = nil explicit_style = nil sourcemap = document.sourcemap source_location = nil while !block && reader.has_more_lines? # if parsing metadata, read until there is no more to read if parse_metadata && parse_block_metadata_line(reader, document, attributes, options) reader.advance next #elsif parse_sections && !parent_context && is_next_line_section?(reader, attributes) # block, attributes = next_section(reader, parent, attributes) # break end # QUESTION should we introduce a parsing context object? source_location = reader.cursor if sourcemap this_line = reader.read_line delimited_block = false block_context = nil cloaked_context = nil terminator = nil # QUESTION put this inside call to rekey attributes? if attributes[1] style, explicit_style = parse_style_attribute(attributes, reader) end if (delimited_blk_match = is_delimited_block? this_line, true) delimited_block = true block_context = cloaked_context = delimited_blk_match.context terminator = delimited_blk_match.terminator if !style style = attributes['style'] = block_context.to_s elsif style != block_context.to_s if delimited_blk_match.masq.include? style block_context = style.to_sym elsif delimited_blk_match.masq.include?('admonition') && ADMONITION_STYLES.include?(style) block_context = :admonition elsif block_extensions && extensions.registered_for_block?(style, block_context) block_context = style.to_sym else warn %(asciidoctor: WARNING: #{reader.prev_line_info}: invalid style for #{block_context} block: #{style}) style = block_context.to_s end end end unless delimited_block # this loop only executes once; used for flow control # break once a block is found or at end of loop # returns nil if the line must be dropped # Implementation note - while(true) is twice as fast as loop while true # process lines verbatim if style && Compliance.strict_verbatim_paragraphs && VERBATIM_STYLES.include?(style) block_context = style.to_sym reader.unshift_line this_line # advance to block parsing => break end # process lines normally unless text_only first_char = Compliance.markdown_syntax ? this_line.lstrip.chr : this_line.chr # NOTE we're letting break lines (horizontal rule, page_break, etc) have attributes if (LAYOUT_BREAK_LINES.has_key? first_char) && this_line.length >= 3 && (Compliance.markdown_syntax ? LayoutBreakLinePlusRx : LayoutBreakLineRx) =~ this_line block = Block.new(parent, LAYOUT_BREAK_LINES[first_char], :content_model => :empty) break elsif this_line.end_with?(']') && (match = MediaBlockMacroRx.match(this_line)) blk_ctx = match[1].to_sym block = Block.new(parent, blk_ctx, :content_model => :empty) if blk_ctx == :image posattrs = ['alt', 'width', 'height'] elsif blk_ctx == :video posattrs = ['poster', 'width', 'height'] else posattrs = [] end unless !style || explicit_style attributes['alt'] = style if blk_ctx == :image attributes.delete('style') style = nil end block.parse_attributes(match[3], posattrs, :unescape_input => (blk_ctx == :image), :sub_input => true, :sub_result => false, :into => attributes) target = block.sub_attributes(match[2], :attribute_missing => 'drop-line') if target.empty? # retain as unparsed if attribute-missing is skip if document.attributes.fetch('attribute-missing', Compliance.attribute_missing) == 'skip' return Block.new(parent, :paragraph, :content_model => :simple, :source => [this_line]) # otherwise, drop the line else attributes.clear return end end attributes['target'] = target # now done down below #block.title = attributes.delete('title') if attributes.has_key?('title') #if blk_ctx == :image # if attributes.has_key? 'scaledwidth' # # append % to scaledwidth if ends in number (no units present) # if (48..57).include?((attributes['scaledwidth'][-1] || 0).ord) # attributes['scaledwidth'] = %(#{attributes['scaledwidth']}%) # end # end # document.register(:images, target) # attributes['alt'] ||= ::File.basename(target, ::File.extname(target)).tr('_-', ' ') # # QUESTION should video or audio have an auto-numbered caption? # block.assign_caption attributes.delete('caption'), 'figure' #end break # NOTE we're letting the toc macro have attributes elsif first_char == 't' && (match = TocBlockMacroRx.match(this_line)) block = Block.new(parent, :toc, :content_model => :empty) block.parse_attributes(match[1], [], :sub_result => false, :into => attributes) break elsif block_macro_extensions && (match = GenericBlockMacroRx.match(this_line)) && (extension = extensions.registered_for_block_macro?(match[1])) target = match[2] raw_attributes = match[3] if extension.config[:content_model] == :attributes unless raw_attributes.empty? document.parse_attributes(raw_attributes, (extension.config[:pos_attrs] || []), :sub_input => true, :sub_result => false, :into => attributes) end else attributes['text'] = raw_attributes end if (default_attrs = extension.config[:default_attrs]) default_attrs.each {|k, v| attributes[k] ||= v } end if (block = extension.process_method[parent, target, attributes.dup]) attributes.replace block.attributes else attributes.clear return end break end end # haven't found anything yet, continue if (match = CalloutListRx.match(this_line)) block = List.new(parent, :colist) attributes['style'] = 'arabic' reader.unshift_line this_line expected_index = 1 begin # might want to move this check to a validate method if match[1].to_i != expected_index # FIXME this lineno - 2 hack means we need a proper look-behind cursor warn %(asciidoctor: WARNING: #{reader.path}: line #{reader.lineno - 2}: callout list item index: expected #{expected_index} got #{match[1]}) end list_item = next_list_item(reader, block, match) expected_index += 1 if list_item block << list_item coids = document.callouts.callout_ids(block.items.size) if !coids.empty? list_item.attributes['coids'] = coids else # FIXME this lineno - 2 hack means we need a proper look-behind cursor warn %(asciidoctor: WARNING: #{reader.path}: line #{reader.lineno - 2}: no callouts refer to list item #{block.items.size}) end end end while reader.has_more_lines? && (match = CalloutListRx.match(reader.peek_line)) document.callouts.next_list break elsif UnorderedListRx =~ this_line reader.unshift_line this_line block = next_outline_list(reader, :ulist, parent) break elsif (match = OrderedListRx.match(this_line)) reader.unshift_line this_line block = next_outline_list(reader, :olist, parent) # QUESTION move this logic to next_outline_list? if !attributes['style'] && !block.attributes['style'] marker = block.items[0].marker if marker.start_with? '.' # first one makes more sense, but second one is AsciiDoc-compliant #attributes['style'] = (ORDERED_LIST_STYLES[block.level - 1] || ORDERED_LIST_STYLES[0]).to_s attributes['style'] = (ORDERED_LIST_STYLES[marker.length - 1] || ORDERED_LIST_STYLES[0]).to_s else style = ORDERED_LIST_STYLES.detect{|s| OrderedListMarkerRxMap[s] =~ marker } attributes['style'] = (style || ORDERED_LIST_STYLES[0]).to_s end end break elsif (match = DefinitionListRx.match(this_line)) reader.unshift_line this_line block = next_labeled_list(reader, match, parent) break elsif (style == 'float' || style == 'discrete') && is_section_title?(this_line, (Compliance.underline_style_section_titles ? reader.peek_line(true) : nil)) reader.unshift_line this_line float_id, float_reftext, float_title, float_level, _ = parse_section_title(reader, document) attributes['reftext'] = float_reftext if float_reftext float_id ||= attributes['id'] if attributes.has_key?('id') block = Block.new(parent, :floating_title, :content_model => :empty) if float_id.nil_or_empty? # FIXME remove hack of creating throwaway Section to get at the generate_id method tmp_sect = Section.new(parent) tmp_sect.title = float_title block.id = tmp_sect.generate_id else block.id = float_id end block.level = float_level block.title = float_title break # FIXME create another set for "passthrough" styles # FIXME make this more DRY! elsif style && style != 'normal' if PARAGRAPH_STYLES.include?(style) block_context = style.to_sym cloaked_context = :paragraph reader.unshift_line this_line # advance to block parsing => break elsif ADMONITION_STYLES.include?(style) block_context = :admonition cloaked_context = :paragraph reader.unshift_line this_line # advance to block parsing => break elsif block_extensions && extensions.registered_for_block?(style, :paragraph) block_context = style.to_sym cloaked_context = :paragraph reader.unshift_line this_line # advance to block parsing => break else warn %(asciidoctor: WARNING: #{reader.prev_line_info}: invalid style for paragraph: #{style}) style = nil # continue to process paragraph end end break_at_list = (skipped == 0 && in_list) # a literal paragraph is contiguous lines starting at least one space if style != 'normal' && LiteralParagraphRx =~ this_line # So we need to actually include this one in the read_lines group reader.unshift_line this_line lines = reader.read_lines_until( :break_on_blank_lines => true, :break_on_list_continuation => true, :preserve_last_line => true) {|line| # a preceding blank line (skipped > 0) indicates we are in a list continuation # and therefore we should not break at a list item # (this won't stop breaking on item of same level since we've already parsed them out) # QUESTION can we turn this block into a lambda or function call? (break_at_list && AnyListRx =~ line) || (Compliance.block_terminates_paragraph && (is_delimited_block?(line) || BlockAttributeLineRx =~ line)) } reset_block_indent! lines block = Block.new(parent, :literal, :content_model => :verbatim, :source => lines, :attributes => attributes) # a literal gets special meaning inside of a definition list # TODO this feels hacky, better way to distinguish from explicit literal block? block.set_option('listparagraph') if in_list # a paragraph is contiguous nonblank/noncontinuation lines else reader.unshift_line this_line lines = reader.read_lines_until( :break_on_blank_lines => true, :break_on_list_continuation => true, :preserve_last_line => true, :skip_line_comments => true) {|line| # a preceding blank line (skipped > 0) indicates we are in a list continuation # and therefore we should not break at a list item # (this won't stop breaking on item of same level since we've already parsed them out) # QUESTION can we turn this block into a lambda or function call? (break_at_list && AnyListRx =~ line) || (Compliance.block_terminates_paragraph && (is_delimited_block?(line) || BlockAttributeLineRx =~ line)) } # NOTE we need this logic because we've asked the reader to skip # line comments, which may leave us w/ an empty buffer if those # were the only lines found if lines.empty? # call advance since the reader preserved the last line reader.advance return end catalog_inline_anchors(lines.join(EOL), document) first_line = lines[0] if !text_only && (admonition_match = AdmonitionParagraphRx.match(first_line)) lines[0] = admonition_match.post_match.lstrip attributes['style'] = admonition_match[1] attributes['name'] = admonition_name = admonition_match[1].downcase attributes['caption'] ||= document.attributes[%(#{admonition_name}-caption)] block = Block.new(parent, :admonition, :content_model => :simple, :source => lines, :attributes => attributes) elsif !text_only && Compliance.markdown_syntax && first_line.start_with?('> ') lines.map! {|line| if line == '>' line[1..-1] elsif line.start_with? '> ' line[2..-1] else line end } if lines[-1].start_with? '-- ' attribution, citetitle = lines.pop[3..-1].split(', ', 2) lines.pop while lines[-1].empty? else attribution, citetitle = nil end attributes['style'] = 'quote' attributes['attribution'] = attribution if attribution attributes['citetitle'] = citetitle if citetitle # NOTE will only detect headings that are floating titles (not section titles) # TODO could assume a floating title when inside a block context # FIXME Reader needs to be created w/ line info block = build_block(:quote, :compound, false, parent, Reader.new(lines), attributes) elsif !text_only && lines.size > 1 && first_line.start_with?('"') && lines[-1].start_with?('-- ') && lines[-2].end_with?('"') lines[0] = first_line[1..-1] attribution, citetitle = lines.pop[3..-1].split(', ', 2) lines.pop while lines[-1].empty? # strip trailing quote lines[-1] = lines[-1].chop attributes['style'] = 'quote' attributes['attribution'] = attribution if attribution attributes['citetitle'] = citetitle if citetitle block = Block.new(parent, :quote, :content_model => :simple, :source => lines, :attributes => attributes) else # if [normal] is used over an indented paragraph, unindent it if style == 'normal' && ((first_char = lines[0].chr) == ' ' || first_char == TAB) first_line = lines[0] first_line_shifted = first_line.lstrip indent = line_length(first_line) - line_length(first_line_shifted) lines[0] = first_line_shifted # QUESTION should we fix the rest of the lines, since in XML output it's insignificant? lines.size.times do |i| lines[i] = lines[i][indent..-1] if i > 0 end end block = Block.new(parent, :paragraph, :content_model => :simple, :source => lines, :attributes => attributes) end end # forbid loop from executing more than once break end end # either delimited block or styled paragraph if !block && block_context # abstract and partintro should be handled by open block # FIXME kind of hackish...need to sort out how to generalize this block_context = :open if block_context == :abstract || block_context == :partintro case block_context when :admonition attributes['name'] = admonition_name = style.downcase attributes['caption'] ||= document.attributes[%(#{admonition_name}-caption)] block = build_block(block_context, :compound, terminator, parent, reader, attributes) when :comment build_block(block_context, :skip, terminator, parent, reader, attributes) return when :example block = build_block(block_context, :compound, terminator, parent, reader, attributes) when :listing, :fenced_code, :source if block_context == :fenced_code style = attributes['style'] = 'source' language, linenums = this_line[3..-1].split(',', 2) if language && !(language = language.strip).empty? attributes['language'] = language attributes['linenums'] = '' if linenums && !linenums.strip.empty? elsif (default_language = document.attributes['source-language']) attributes['language'] = default_language end terminator = terminator[0..2] elsif block_context == :source AttributeList.rekey(attributes, [nil, 'language', 'linenums']) unless attributes.has_key? 'language' if (default_language = document.attributes['source-language']) attributes['language'] = default_language end end end block = build_block(:listing, :verbatim, terminator, parent, reader, attributes) when :literal block = build_block(block_context, :verbatim, terminator, parent, reader, attributes) when :pass block = build_block(block_context, :raw, terminator, parent, reader, attributes) when :stem, :latexmath, :asciimath if block_context == :stem attributes['style'] = if (explicit_stem_syntax = attributes[2]) explicit_stem_syntax.include?('tex') ? 'latexmath' : 'asciimath' elsif (default_stem_syntax = document.attributes['stem']).nil_or_empty? 'asciimath' else default_stem_syntax end end block = build_block(:stem, :raw, terminator, parent, reader, attributes) when :open, :sidebar block = build_block(block_context, :compound, terminator, parent, reader, attributes) when :table cursor = reader.cursor block_reader = Reader.new reader.read_lines_until(:terminator => terminator, :skip_line_comments => true), cursor case terminator.chr when ',' attributes['format'] = 'csv' when ':' attributes['format'] = 'dsv' end block = next_table(block_reader, parent, attributes) when :quote, :verse AttributeList.rekey(attributes, [nil, 'attribution', 'citetitle']) block = build_block(block_context, (block_context == :verse ? :verbatim : :compound), terminator, parent, reader, attributes) else if block_extensions && (extension = extensions.registered_for_block?(block_context, cloaked_context)) # TODO pass cloaked_context to extension somehow (perhaps a new instance for each cloaked_context?) if (content_model = extension.config[:content_model]) != :skip if !(pos_attrs = extension.config[:pos_attrs] || []).empty? AttributeList.rekey(attributes, [nil].concat(pos_attrs)) end if (default_attrs = extension.config[:default_attrs]) default_attrs.each {|k, v| attributes[k] ||= v } end end block = build_block block_context, content_model, terminator, parent, reader, attributes, :extension => extension unless block && content_model != :skip attributes.clear return end else # this should only happen if there's a misconfiguration raise %(Unsupported block type #{block_context} at #{reader.line_info}) end end end end # when looking for nested content, one or more line comments, comment # blocks or trailing attribute lists could leave us without a block, # so handle accordingly # REVIEW we may no longer need this nil check # FIXME we've got to clean this up, it's horrible! if block block.source_location = source_location if source_location # REVIEW seems like there is a better way to organize this wrap-up block.title = attributes['title'] unless block.title? # FIXME HACK don't hardcode logic for alt, caption and scaledwidth on images down here if block.context == :image resolved_target = attributes['target'] block.document.register(:images, resolved_target) attributes['alt'] ||= ::File.basename(resolved_target, ::File.extname(resolved_target)).tr('_-', ' ') attributes['alt'] = block.sub_specialcharacters attributes['alt'] block.assign_caption attributes.delete('caption'), 'figure' if (scaledwidth = attributes['scaledwidth']) # append % to scaledwidth if ends in number (no units present) if (48..57).include?((scaledwidth[-1] || 0).ord) attributes['scaledwidth'] = %(#{scaledwidth}%) end end else block.caption ||= attributes.delete('caption') end # TODO eventualy remove the style attribute from the attributes hash #block.style = attributes.delete('style') block.style = attributes['style'] # AsciiDoc always use [id] as the reftext in HTML output, # but I'd like to do better in Asciidoctor if (block_id = (block.id ||= attributes['id'])) # TODO sub reftext document.register(:ids, [block_id, (attributes['reftext'] || (block.title? ? block.title : nil))]) end # FIXME remove the need for this update! block.attributes.update(attributes) unless attributes.empty? block.lock_in_subs #if document.attributes.has_key? :pending_attribute_entries # document.attributes.delete(:pending_attribute_entries).each do |entry| # entry.save_to block.attributes # end #end if block.sub? :callouts unless (catalog_callouts block.source, document) # No need to sub callouts if they aren't there block.remove_sub :callouts end end end block end # Public: Determines whether this line is the start of any of the delimited blocks # # returns the match data if this line is the first line of a delimited block or nil if not def self.is_delimited_block? line, return_match_data = false # highly optimized for best performance return unless (line_len = line.length) > 1 && (DELIMITED_BLOCK_LEADERS.include? line[0..1]) # catches open block if line_len == 2 tip = line tl = 2 else # catches all other delimited blocks, including fenced code if line_len <= 4 tip = line tl = line_len else tip = line[0..3] tl = 4 end # special case for fenced code blocks # REVIEW review this logic fenced_code = false if Compliance.markdown_syntax tip_3 = (tl == 4 ? tip.chop : tip) if tip_3 == '```' if tl == 4 && tip.end_with?('`') return end tip = tip_3 tl = 3 fenced_code = true end end # short circuit if not a fenced code block return if tl == 3 && !fenced_code end if DELIMITED_BLOCKS.has_key? tip # tip is the full line when delimiter is minimum length if tl < 4 || tl == line_len if return_match_data context, masq = *DELIMITED_BLOCKS[tip] BlockMatchData.new(context, masq, tip, tip) else true end elsif %(#{tip}#{tip[-1..-1] * (line_len - tl)}) == line if return_match_data context, masq = *DELIMITED_BLOCKS[tip] BlockMatchData.new(context, masq, tip, line) else true end # only enable if/when we decide to support non-congruent block delimiters #elsif (match = BlockDelimiterRx.match(line)) # if return_match_data # context, masq = *DELIMITED_BLOCKS[tip] # BlockMatchData.new(context, masq, tip, match[0]) # else # true # end else nil end else nil end end # whether a block supports complex content should be a config setting # if terminator is false, that means the all the lines in the reader should be parsed # NOTE could invoke filter in here, before and after parsing def self.build_block(block_context, content_model, terminator, parent, reader, attributes, options = {}) if content_model == :skip || content_model == :raw skip_processing = content_model == :skip parse_as_content_model = :simple else skip_processing = false parse_as_content_model = content_model end if terminator.nil? if parse_as_content_model == :verbatim lines = reader.read_lines_until(:break_on_blank_lines => true, :break_on_list_continuation => true) else content_model = :simple if content_model == :compound lines = reader.read_lines_until( :break_on_blank_lines => true, :break_on_list_continuation => true, :preserve_last_line => true, :skip_line_comments => true, :skip_processing => skip_processing) {|line| Compliance.block_terminates_paragraph && (is_delimited_block?(line) || BlockAttributeLineRx =~ line) } # QUESTION check for empty lines after grabbing lines for simple content model? end block_reader = nil elsif parse_as_content_model != :compound lines = reader.read_lines_until(:terminator => terminator, :skip_processing => skip_processing) block_reader = nil # terminator is false when reader has already been prepared elsif terminator == false lines = nil block_reader = reader else lines = nil cursor = reader.cursor block_reader = Reader.new reader.read_lines_until(:terminator => terminator, :skip_processing => skip_processing), cursor end if content_model == :skip attributes.clear # FIXME we shouldn't be mixing return types return lines end if content_model == :verbatim && (indent = attributes['indent']) reset_block_indent! lines, indent.to_i end if (extension = options[:extension]) # QUESTION do we want to delete the style? attributes.delete('style') if (block = extension.process_method[parent, block_reader || (Reader.new lines), attributes.dup]) attributes.replace block.attributes # FIXME if the content model is set to compound, but we only have simple in this context, then # forcefully set the content_model to simple to prevent parsing blocks from children # TODO document this behavior!! if block.content_model == :compound && !(lines = block.lines).nil_or_empty? content_model = :compound block_reader = Reader.new lines end else # FIXME need a test to verify this returns nil at the right time return end else block = Block.new(parent, block_context, :content_model => content_model, :source => lines, :attributes => attributes) end # QUESTION should we have an explicit map or can we rely on check for *-caption attribute? if (attributes.has_key? 'title') && (block.document.attr? %(#{block.context}-caption)) block.title = attributes.delete 'title' block.assign_caption attributes.delete('caption') end if content_model == :compound # we can look for blocks until there are no more lines (and not worry # about sections) since the reader is confined within the boundaries of a # delimited block parse_blocks block_reader, block end block end # Public: Parse blocks from this reader until there are no more lines. # # This method calls Parser#next_block until there are no more lines in the # Reader. It does not consider sections because it's assumed the Reader only # has lines which are within a delimited block region. # # reader - The Reader containing the lines to process # parent - The parent Block to which to attach the parsed blocks # # Returns nothing. def self.parse_blocks(reader, parent) while reader.has_more_lines? block = Parser.next_block(reader, parent) parent << block if block end end # Internal: Parse and construct an outline list Block from the current position of the Reader # # reader - The Reader from which to retrieve the outline list # list_type - A Symbol representing the list type (:olist for ordered, :ulist for unordered) # parent - The parent Block to which this outline list belongs # # Returns the Block encapsulating the parsed outline (unordered or ordered) list def self.next_outline_list(reader, list_type, parent) list_block = List.new(parent, list_type) if parent.context == list_type list_block.level = parent.level + 1 else list_block.level = 1 end while reader.has_more_lines? && (match = ListRxMap[list_type].match(reader.peek_line)) marker = resolve_list_marker(list_type, match[1]) # if we are moving to the next item, and the marker is different # determine if we are moving up or down in nesting if list_block.items? && marker != list_block.items[0].marker # assume list is nested by default, but then check to see if we are # popping out of a nested list by matching an ancestor's list marker this_item_level = list_block.level + 1 ancestor = parent while ancestor.context == list_type if marker == ancestor.items[0].marker this_item_level = ancestor.level break end ancestor = ancestor.parent end else this_item_level = list_block.level end if !list_block.items? || this_item_level == list_block.level list_item = next_list_item(reader, list_block, match) elsif this_item_level < list_block.level # leave this block break elsif this_item_level > list_block.level # If this next list level is down one from the # current Block's, append it to content of the current list item list_block.items[-1] << next_block(reader, list_block) end list_block << list_item if list_item list_item = nil reader.skip_blank_lines end list_block end # Internal: Catalog any callouts found in the text, but don't process them # # text - The String of text in which to look for callouts # document - The current document on which the callouts are stored # # Returns A Boolean indicating whether callouts were found def self.catalog_callouts(text, document) found = false if text.include? '<' text.scan(CalloutQuickScanRx) { # alias match for Ruby 1.8.7 compat m = $~ if m[0].chr != '\\' document.callouts.register(m[2]) end # we have to mark as found even if it's escaped so it can be unescaped found = true } end found end # Internal: Catalog any inline anchors found in the text, but don't process them # # text - The String text in which to look for inline anchors # document - The current document on which the references are stored # # Returns nothing def self.catalog_inline_anchors(text, document) if text.include? '[' text.scan(InlineAnchorRx) { # alias match for Ruby 1.8.7 compat m = $~ next if m[0].start_with? '\\' id = m[1] || m[3] reftext = m[2] || m[4] # enable if we want to allow double quoted values #id = id.sub(DoubleQuotedRx, '\2') #if reftext # reftext = reftext.sub(DoubleQuotedMultiRx, '\2') #end document.register(:ids, [id, reftext]) } end nil end # Internal: Parse and construct a labeled (e.g., definition) list Block from the current position of the Reader # # reader - The Reader from which to retrieve the labeled list # match - The Regexp match for the head of the list # parent - The parent Block to which this labeled list belongs # # Returns the Block encapsulating the parsed labeled list def self.next_labeled_list(reader, match, parent) list_block = List.new(parent, :dlist) previous_pair = nil # allows us to capture until we find a labeled item # that uses the same delimiter (::, :::, :::: or ;;) sibling_pattern = DefinitionListSiblingRx[match[2]] begin term, item = next_list_item(reader, list_block, match, sibling_pattern) if previous_pair && !previous_pair[-1] previous_pair.pop previous_pair[0] << term previous_pair << item else # FIXME this misses the automatic parent assignment list_block.items << (previous_pair = [[term], item]) end end while reader.has_more_lines? && (match = sibling_pattern.match(reader.peek_line)) list_block end # Internal: Parse and construct the next ListItem for the current bulleted # (unordered or ordered) list Block, callout lists included, or the next # term ListItem and definition ListItem pair for the labeled list Block. # # First collect and process all the lines that constitute the next list # item for the parent list (according to its type). Next, parse those lines # into blocks and associate them with the ListItem (in the case of a # labeled list, the definition ListItem). Finally, fold the first block # into the item's text attribute according to rules described in ListItem. # # reader - The Reader from which to retrieve the next list item # list_block - The parent list Block of this ListItem. Also provides access to the list type. # match - The match Array which contains the marker and text (first-line) of the ListItem # sibling_trait - The list marker or the Regexp to match a sibling item # # Returns the next ListItem or ListItem pair (depending on the list type) # for the parent list Block. def self.next_list_item(reader, list_block, match, sibling_trait = nil) if (list_type = list_block.context) == :dlist list_term = ListItem.new(list_block, match[1]) list_item = ListItem.new(list_block, match[3]) has_text = !match[3].nil_or_empty? else # Create list item using first line as the text of the list item text = match[2] checkbox = false if list_type == :ulist && text.start_with?('[') if text.start_with?('[ ] ') checkbox = true checked = false text = text[3..-1].lstrip elsif text.start_with?('[x] ') || text.start_with?('[*] ') checkbox = true checked = true text = text[3..-1].lstrip end end list_item = ListItem.new(list_block, text) if checkbox # FIXME checklist never makes it into the options attribute list_block.attributes['checklist-option'] = '' list_item.attributes['checkbox'] = '' list_item.attributes['checked'] = '' if checked end sibling_trait ||= resolve_list_marker(list_type, match[1], list_block.items.size, true, reader) list_item.marker = sibling_trait has_text = true end # first skip the line with the marker / term reader.advance cursor = reader.cursor list_item_reader = Reader.new read_lines_for_list_item(reader, list_type, sibling_trait, has_text), cursor if list_item_reader.has_more_lines? comment_lines = list_item_reader.skip_line_comments subsequent_line = list_item_reader.peek_line list_item_reader.unshift_lines comment_lines unless comment_lines.empty? if !subsequent_line.nil? continuation_connects_first_block = subsequent_line.empty? # if there's no continuation connecting the first block, then # treat the lines as paragraph text (activated when has_text = false) if !continuation_connects_first_block && list_type != :dlist has_text = false end content_adjacent = !continuation_connects_first_block && !subsequent_line.empty? else continuation_connects_first_block = false content_adjacent = false end # only relevant for :dlist options = {:text => !has_text} # we can look for blocks until there are no more lines (and not worry # about sections) since the reader is confined within the boundaries of a # list while list_item_reader.has_more_lines? new_block = next_block(list_item_reader, list_block, {}, options) list_item << new_block if new_block end list_item.fold_first(continuation_connects_first_block, content_adjacent) end if list_type == :dlist unless list_item.text? || list_item.blocks? list_item = nil end [list_term, list_item] else list_item end end # Internal: Collect the lines belonging to the current list item, navigating # through all the rules that determine what comprises a list item. # # Grab lines until a sibling list item is found, or the block is broken by a # terminator (such as a line comment). Definition lists are more greedy if # they don't have optional inline item text...they want that text # # reader - The Reader from which to retrieve the lines. # list_type - The Symbol context of the list (:ulist, :olist, :colist or :dlist) # sibling_trait - A Regexp that matches a sibling of this list item or String list marker # of the items in this list (default: nil) # has_text - Whether the list item has text defined inline (always true except for labeled lists) # # Returns an Array of lines belonging to the current list item. def self.read_lines_for_list_item(reader, list_type, sibling_trait = nil, has_text = true) buffer = [] # three states for continuation: :inactive, :active & :frozen # :frozen signifies we've detected sequential continuation lines & # continuation is not permitted until reset continuation = :inactive # if we are within a nested list, we don't throw away the list # continuation marks because they will be processed when grabbing # the lines for those nested lists within_nested_list = false # a detached continuation is a list continuation that follows a blank line # it gets associated with the outermost block detached_continuation = nil while reader.has_more_lines? this_line = reader.read_line # if we've arrived at a sibling item in this list, we've captured # the complete list item and can begin processing it # the remainder of the method determines whether we've reached # the termination of the list break if is_sibling_list_item?(this_line, list_type, sibling_trait) prev_line = buffer.empty? ? nil : buffer[-1] if prev_line == LIST_CONTINUATION if continuation == :inactive continuation = :active has_text = true buffer[-1] = '' unless within_nested_list end # dealing with adjacent list continuations (which is really a syntax error) if this_line == LIST_CONTINUATION if continuation != :frozen continuation = :frozen buffer << this_line end this_line = nil next end end # a delimited block immediately breaks the list unless preceded # by a list continuation (they are harsh like that ;0) if (match = is_delimited_block?(this_line, true)) if continuation == :active buffer << this_line # grab all the lines in the block, leaving the delimiters in place # we're being more strict here about the terminator, but I think that's a good thing buffer.concat reader.read_lines_until(:terminator => match.terminator, :read_last_line => true) continuation = :inactive else break end # technically BlockAttributeLineRx only breaks if ensuing line is not a list item # which really means BlockAttributeLineRx only breaks if it's acting as a block delimiter # FIXME to be AsciiDoc compliant, we shouldn't break if style in attribute line is "literal" (i.e., [literal]) elsif list_type == :dlist && continuation != :active && BlockAttributeLineRx =~ this_line break else if continuation == :active && !this_line.empty? # literal paragraphs have special considerations (and this is one of # two entry points into one) # if we don't process it as a whole, then a line in it that looks like a # list item will throw off the exit from it if LiteralParagraphRx =~ this_line reader.unshift_line this_line buffer.concat reader.read_lines_until( :preserve_last_line => true, :break_on_blank_lines => true, :break_on_list_continuation => true) {|line| # we may be in an indented list disguised as a literal paragraph # so we need to make sure we don't slurp up a legitimate sibling list_type == :dlist && is_sibling_list_item?(line, list_type, sibling_trait) } continuation = :inactive # let block metadata play out until we find the block elsif BlockTitleRx =~ this_line || BlockAttributeLineRx =~ this_line || AttributeEntryRx =~ this_line buffer << this_line else if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).detect {|ctx| ListRxMap[ctx] =~ this_line } within_nested_list = true if nested_list_type == :dlist && $~[3].nil_or_empty? # get greedy again has_text = false end end buffer << this_line continuation = :inactive end elsif !prev_line.nil? && prev_line.empty? # advance to the next line of content if this_line.empty? reader.skip_blank_lines this_line = reader.read_line # if we hit eof or a sibling, stop reading break if this_line.nil? || is_sibling_list_item?(this_line, list_type, sibling_trait) end if this_line == LIST_CONTINUATION detached_continuation = buffer.size buffer << this_line else # has_text is only relevant for dlist, which is more greedy until it has text for an item # for all other lists, has_text is always true # in this block, we have to see whether we stay in the list if has_text # TODO any way to combine this with the check after skipping blank lines? if is_sibling_list_item?(this_line, list_type, sibling_trait) break elsif nested_list_type = NESTABLE_LIST_CONTEXTS.detect {|ctx| ListRxMap[ctx] =~ this_line } buffer << this_line within_nested_list = true if nested_list_type == :dlist && $~[3].nil_or_empty? # get greedy again has_text = false end # slurp up any literal paragraph offset by blank lines # NOTE we have to check for indented list items first elsif LiteralParagraphRx =~ this_line reader.unshift_line this_line buffer.concat reader.read_lines_until( :preserve_last_line => true, :break_on_blank_lines => true, :break_on_list_continuation => true) {|line| # we may be in an indented list disguised as a literal paragraph # so we need to make sure we don't slurp up a legitimate sibling list_type == :dlist && is_sibling_list_item?(line, list_type, sibling_trait) } else break end else # only dlist in need of item text, so slurp it up! # pop the blank line so it's not interpretted as a list continuation buffer.pop unless within_nested_list buffer << this_line has_text = true end end else has_text = true if !this_line.empty? if nested_list_type = (within_nested_list ? [:dlist] : NESTABLE_LIST_CONTEXTS).detect {|ctx| ListRxMap[ctx] =~ this_line } within_nested_list = true if nested_list_type == :dlist && $~[3].nil_or_empty? # get greedy again has_text = false end end buffer << this_line end end this_line = nil end reader.unshift_line this_line if this_line if detached_continuation buffer.delete_at detached_continuation end # strip trailing blank lines to prevent empty blocks buffer.pop while !buffer.empty? && buffer[-1].empty? # We do need to replace the optional trailing continuation # a blank line would have served the same purpose in the document buffer.pop if !buffer.empty? && buffer[-1] == LIST_CONTINUATION #warn "BUFFER[#{list_type},#{sibling_trait}]>#{buffer * EOL}#{buffer.inspect} ["Foo", "~~~"] # # id, reftext, title, level, single = parse_section_title(reader, document) # # title # # => "Foo" # level # # => 2 # id # # => nil # single # # => false # # line1 # # => "==== Foo" # # id, reftext, title, level, single = parse_section_title(reader, document) # # title # # => "Foo" # level # # => 3 # id # # => nil # single # # => true # # returns an Array of [String, String, Integer, String, Boolean], representing the # id, reftext, title, level and line count of the Section, or nil. # #-- # NOTE for efficiency, we don't reuse methods that check for a section title def self.parse_section_title(reader, document) line1 = reader.read_line sect_id = nil sect_title = nil sect_level = -1 sect_reftext = nil single_line = true first_char = line1.chr if (first_char == '=' || (Compliance.markdown_syntax && first_char == '#')) && (match = AtxSectionRx.match(line1)) sect_level = single_line_section_level match[1] sect_title = match[2] if sect_title.end_with?(']]') && (anchor_match = InlineSectionAnchorRx.match(sect_title)) if anchor_match[2].nil? sect_title = anchor_match[1] sect_id = anchor_match[3] sect_reftext = anchor_match[4] end end elsif Compliance.underline_style_section_titles if (line2 = reader.peek_line(true)) && SECTION_LEVELS.has_key?(line2.chr) && line2 =~ SetextSectionLineRx && (name_match = SetextSectionTitleRx.match(line1)) && # chomp so that a (non-visible) endline does not impact calculation (line_length(line1) - line_length(line2)).abs <= 1 sect_title = name_match[1] if sect_title.end_with?(']]') && (anchor_match = InlineSectionAnchorRx.match(sect_title)) if anchor_match[2].nil? sect_title = anchor_match[1] sect_id = anchor_match[3] sect_reftext = anchor_match[4] end end sect_level = section_level line2 single_line = false reader.advance end end if sect_level >= 0 sect_level += document.attr('leveloffset', 0).to_i end [sect_id, sect_reftext, sect_title, sect_level, single_line] end # Public: Calculate the number of unicode characters in the line, excluding the endline # # line - the String to calculate # # returns the number of unicode characters in the line def self.line_length(line) FORCE_UNICODE_LINE_LENGTH ? line.scan(UnicodeCharScanRx).length : line.length end # Public: Consume and parse the two header lines (line 1 = author info, line 2 = revision info). # # Returns the Hash of header metadata. If a Document object is supplied, the metadata # is applied directly to the attributes of the Document. # # reader - the Reader holding the source lines of the document # document - the Document we are building (default: nil) # # Examples # # data = ["Author Name \n", "v1.0, 2012-12-21: Coincide w/ end of world.\n"] # parse_header_metadata(Reader.new data, nil, :normalize => true) # # => {'author' => 'Author Name', 'firstname' => 'Author', 'lastname' => 'Name', 'email' => 'author@example.org', # # 'revnumber' => '1.0', 'revdate' => '2012-12-21', 'revremark' => 'Coincide w/ end of world.'} def self.parse_header_metadata(reader, document = nil) # NOTE this will discard away any comment lines, but not skip blank lines process_attribute_entries(reader, document) metadata = {} implicit_author = nil implicit_authors = nil if reader.has_more_lines? && !reader.next_line_empty? author_metadata = process_authors reader.read_line unless author_metadata.empty? if document # apply header subs and assign to document author_metadata.each do |key, val| unless document.attributes.has_key? key document.attributes[key] = ((val.is_a? ::String) ? document.apply_header_subs(val) : val) end end implicit_author = document.attributes['author'] implicit_authors = document.attributes['authors'] end metadata = author_metadata end # NOTE this will discard any comment lines, but not skip blank lines process_attribute_entries(reader, document) rev_metadata = {} if reader.has_more_lines? && !reader.next_line_empty? rev_line = reader.read_line if (match = RevisionInfoLineRx.match(rev_line)) rev_metadata['revdate'] = match[2].strip rev_metadata['revnumber'] = match[1].rstrip unless match[1].nil? rev_metadata['revremark'] = match[3].rstrip unless match[3].nil? else # throw it back reader.unshift_line rev_line end end unless rev_metadata.empty? if document # apply header subs and assign to document rev_metadata.each do |key, val| unless document.attributes.has_key? key document.attributes[key] = document.apply_header_subs(val) end end end metadata.update rev_metadata end # NOTE this will discard any comment lines, but not skip blank lines process_attribute_entries(reader, document) reader.skip_blank_lines end if document # process author attribute entries that override (or stand in for) the implicit author line author_metadata = nil if document.attributes.has_key?('author') && (author_line = document.attributes['author']) != implicit_author # do not allow multiple, process as names only author_metadata = process_authors author_line, true, false elsif document.attributes.has_key?('authors') && (author_line = document.attributes['authors']) != implicit_authors # allow multiple, process as names only author_metadata = process_authors author_line, true else authors = [] author_key = %(author_#{authors.size + 1}) while document.attributes.has_key? author_key authors << document.attributes[author_key] author_key = %(author_#{authors.size + 1}) end if authors.size == 1 # do not allow multiple, process as names only author_metadata = process_authors authors[0], true, false elsif authors.size > 1 # allow multiple, process as names only author_metadata = process_authors authors.join('; '), true end end if author_metadata document.attributes.update author_metadata # special case if !document.attributes.has_key?('email') && document.attributes.has_key?('email_1') document.attributes['email'] = document.attributes['email_1'] end end end metadata end # Internal: Parse the author line into a Hash of author metadata # # author_line - the String author line # names_only - a Boolean flag that indicates whether to process line as # names only or names with emails (default: false) # multiple - a Boolean flag that indicates whether to process multiple # semicolon-separated entries in the author line (default: true) # # returns a Hash of author metadata def self.process_authors(author_line, names_only = false, multiple = true) author_metadata = {} keys = ['author', 'authorinitials', 'firstname', 'middlename', 'lastname', 'email'] author_entries = multiple ? (author_line.split ';').map {|line| line.strip } : [author_line] author_entries.each_with_index do |author_entry, idx| next if author_entry.empty? key_map = {} if idx.zero? keys.each do |key| key_map[key.to_sym] = key end else keys.each do |key| key_map[key.to_sym] = %(#{key}_#{idx + 1}) end end segments = nil if names_only # splitting on ' ' will collapse repeating spaces segments = author_entry.split(' ', 3) elsif (match = AuthorInfoLineRx.match(author_entry)) segments = match.to_a segments.shift end unless segments.nil? author_metadata[key_map[:firstname]] = fname = segments[0].tr('_', ' ') author_metadata[key_map[:author]] = fname author_metadata[key_map[:authorinitials]] = fname[0, 1] if !segments[1].nil? && !segments[2].nil? author_metadata[key_map[:middlename]] = mname = segments[1].tr('_', ' ') author_metadata[key_map[:lastname]] = lname = segments[2].tr('_', ' ') author_metadata[key_map[:author]] = [fname, mname, lname].join ' ' author_metadata[key_map[:authorinitials]] = [fname[0, 1], mname[0, 1], lname[0, 1]].join elsif !segments[1].nil? author_metadata[key_map[:lastname]] = lname = segments[1].tr('_', ' ') author_metadata[key_map[:author]] = [fname, lname].join ' ' author_metadata[key_map[:authorinitials]] = [fname[0, 1], lname[0, 1]].join end author_metadata[key_map[:email]] = segments[3] unless names_only || segments[3].nil? else author_metadata[key_map[:author]] = author_metadata[key_map[:firstname]] = fname = author_entry.strip.tr_s(' ', ' ') author_metadata[key_map[:authorinitials]] = fname[0, 1] end author_metadata['authorcount'] = idx + 1 # only assign the _1 attributes if there are multiple authors if idx == 1 keys.each do |key| author_metadata[%(#{key}_1)] = author_metadata[key] if author_metadata.has_key? key end end if idx.zero? author_metadata['authors'] = author_metadata[key_map[:author]] else author_metadata['authors'] = %(#{author_metadata['authors']}, #{author_metadata[key_map[:author]]}) end end author_metadata end # Internal: Parse lines of metadata until a line of metadata is not found. # # This method processes sequential lines containing block metadata, ignoring # blank lines and comments. # # reader - the source reader # parent - the parent to which the lines belong # attributes - a Hash of attributes in which any metadata found will be stored (default: {}) # options - a Hash of options to control processing: (default: {}) # * :text indicates that lexer is only looking for text content # and thus the block title should not be captured # # returns the Hash of attributes including any metadata found def self.parse_block_metadata_lines(reader, parent, attributes = {}, options = {}) while parse_block_metadata_line(reader, parent, attributes, options) # discard the line just processed reader.advance reader.skip_blank_lines end attributes end # Internal: Parse the next line if it contains metadata for the following block # # This method handles lines with the following content: # # * line or block comment # * anchor # * attribute list # * block title # # Any attributes found will be inserted into the attributes argument. # If the line contains block metadata, the method returns true, otherwise false. # # reader - the source reader # parent - the parent of the current line # attributes - a Hash of attributes in which any metadata found will be stored # options - a Hash of options to control processing: (default: {}) # * :text indicates that lexer is only looking for text content # and thus the block title should not be captured # # returns true if the line contains metadata, otherwise false def self.parse_block_metadata_line(reader, parent, attributes, options = {}) return false unless reader.has_more_lines? next_line = reader.peek_line if (commentish = next_line.start_with?('//')) && (match = CommentBlockRx.match(next_line)) terminator = match[0] reader.read_lines_until(:skip_first_line => true, :preserve_last_line => true, :terminator => terminator, :skip_processing => true) elsif commentish && CommentLineRx =~ next_line # do nothing, we'll skip it elsif !options[:text] && next_line.start_with?(':') && (match = AttributeEntryRx.match(next_line)) process_attribute_entry(reader, parent, attributes, match) elsif (in_square_brackets = next_line.start_with?('[') && next_line.end_with?(']')) && (match = BlockAnchorRx.match(next_line)) unless match[1].nil_or_empty? attributes['id'] = match[1] # AsciiDoc always uses [id] as the reftext in HTML output, # but I'd like to do better in Asciidoctor # registration is deferred until the block or section is processed attributes['reftext'] = match[2] unless match[2].nil? end elsif in_square_brackets && (match = BlockAttributeListRx.match(next_line)) parent.document.parse_attributes(match[1], [], :sub_input => true, :into => attributes) # NOTE title doesn't apply to section, but we need to stash it for the first block # TODO should issue an error if this is found above the document title elsif !options[:text] && (match = BlockTitleRx.match(next_line)) attributes['title'] = match[1] else return false end true end def self.process_attribute_entries(reader, parent, attributes = nil) reader.skip_comment_lines while process_attribute_entry(reader, parent, attributes) # discard line just processed reader.advance reader.skip_comment_lines end end def self.process_attribute_entry(reader, parent, attributes = nil, match = nil) match ||= (reader.has_more_lines? ? AttributeEntryRx.match(reader.peek_line) : nil) if match name = match[1] unless (value = match[2] || '').empty? if value.end_with?(line_continuation = LINE_CONTINUATION) || value.end_with?(line_continuation = LINE_CONTINUATION_LEGACY) value = value.chop.rstrip while reader.advance break if (next_line = reader.peek_line.strip).empty? if (keep_open = next_line.end_with? line_continuation) next_line = next_line.chop.rstrip end separator = (value.end_with? LINE_BREAK) ? EOL : ' ' value = %(#{value}#{separator}#{next_line}) break unless keep_open end end end store_attribute(name, value, (parent ? parent.document : nil), attributes) true else false end end # Public: Store the attribute in the document and register attribute entry if accessible # # name - the String name of the attribute to store # value - the String value of the attribute to store # doc - the Document being parsed # attrs - the attributes for the current context # # returns a 2-element array containing the attribute name and value def self.store_attribute(name, value, doc = nil, attrs = nil) # TODO move processing of attribute value to utility method if name.end_with?('!') # a nil value signals the attribute should be deleted (undefined) value = nil name = name.chop elsif name.start_with?('!') # a nil value signals the attribute should be deleted (undefined) value = nil name = name[1..-1] end name = sanitize_attribute_name(name) accessible = true if doc # alias numbered attribute to sectnums if name == 'numbered' name = 'sectnums' # support relative leveloffset values elsif name == 'leveloffset' if value case value.chr when '+' value = ((doc.attr 'leveloffset', 0).to_i + (value[1..-1] || 0).to_i).to_s when '-' value = ((doc.attr 'leveloffset', 0).to_i - (value[1..-1] || 0).to_i).to_s end end end accessible = value ? doc.set_attribute(name, value) : doc.delete_attribute(name) end if accessible && attrs # NOTE lookup resolved value (resolution occurs inside set_attribute) value = doc.attributes[name] if value Document::AttributeEntry.new(name, value).save_to(attrs) end [name, value] end # Internal: Resolve the 0-index marker for this list item # # For ordered lists, match the marker used for this list item against the # known list markers and determine which marker is the first (0-index) marker # in its number series. # # For callout lists, return <1>. # # For bulleted lists, return the marker as passed to this method. # # list_type - The Symbol context of the list # marker - The String marker for this list item # ordinal - The position of this list item in the list # validate - Whether to validate the value of the marker # # Returns the String 0-index marker for this list item def self.resolve_list_marker(list_type, marker, ordinal = 0, validate = false, reader = nil) if list_type == :olist && !marker.start_with?('.') resolve_ordered_list_marker(marker, ordinal, validate, reader) elsif list_type == :colist '<1>' else marker end end # Internal: Resolve the 0-index marker for this ordered list item # # Match the marker used for this ordered list item against the # known ordered list markers and determine which marker is # the first (0-index) marker in its number series. # # The purpose of this method is to normalize the implicit numbered markers # so that they can be compared against other list items. # # marker - The marker used for this list item # ordinal - The 0-based index of the list item (default: 0) # validate - Perform validation that the marker provided is the proper # marker in the sequence (default: false) # # Examples # # marker = 'B.' # Parser.resolve_ordered_list_marker(marker, 1, true) # # => 'A.' # # Returns the String of the first marker in this number series def self.resolve_ordered_list_marker(marker, ordinal = 0, validate = false, reader = nil) number_style = ORDERED_LIST_STYLES.detect {|s| OrderedListMarkerRxMap[s] =~ marker } expected = actual = nil case number_style when :arabic if validate expected = ordinal + 1 actual = marker.to_i end marker = '1.' when :loweralpha if validate expected = ('a'[0].ord + ordinal).chr actual = marker.chomp('.') end marker = 'a.' when :upperalpha if validate expected = ('A'[0].ord + ordinal).chr actual = marker.chomp('.') end marker = 'A.' when :lowerroman if validate # TODO report this in roman numerals; see https://github.com/jamesshipton/roman-numeral/blob/master/lib/roman_numeral.rb expected = ordinal + 1 actual = roman_numeral_to_int(marker.chomp(')')) end marker = 'i)' when :upperroman if validate # TODO report this in roman numerals; see https://github.com/jamesshipton/roman-numeral/blob/master/lib/roman_numeral.rb expected = ordinal + 1 actual = roman_numeral_to_int(marker.chomp(')')) end marker = 'I)' end if validate && expected != actual warn %(asciidoctor: WARNING: #{reader.line_info}: list item index: expected #{expected}, got #{actual}) end marker end # Internal: Determine whether the this line is a sibling list item # according to the list type and trait (marker) provided. # # line - The String line to check # list_type - The context of the list (:olist, :ulist, :colist, :dlist) # sibling_trait - The String marker for the list or the Regexp to match a sibling # # Returns a Boolean indicating whether this line is a sibling list item given # the criteria provided def self.is_sibling_list_item?(line, list_type, sibling_trait) if sibling_trait.is_a? ::Regexp matcher = sibling_trait expected_marker = false else matcher = ListRxMap[list_type] expected_marker = sibling_trait end if (m = matcher.match(line)) if expected_marker expected_marker == resolve_list_marker(list_type, m[1]) else true end else false end end # Internal: Parse the table contained in the provided Reader # # table_reader - a Reader containing the source lines of an AsciiDoc table # parent - the parent Block of this Asciidoctor::Table # attributes - attributes captured from above this Block # # returns an instance of Asciidoctor::Table parsed from the provided reader def self.next_table(table_reader, parent, attributes) table = Table.new(parent, attributes) if (attributes.has_key? 'title') table.title = attributes.delete 'title' table.assign_caption attributes.delete('caption') end if attributes.has_key? 'cols' table.create_columns(parse_col_specs(attributes['cols'])) explicit_col_specs = true else explicit_col_specs = false end skipped = table_reader.skip_blank_lines parser_ctx = Table::ParserContext.new(table_reader, table, attributes) loop_idx = -1 while table_reader.has_more_lines? loop_idx += 1 line = table_reader.read_line if skipped == 0 && loop_idx.zero? && !attributes.has_key?('options') && !(next_line = table_reader.peek_line).nil? && next_line.empty? table.has_header_option = true table.set_option 'header' end if parser_ctx.format == 'psv' if parser_ctx.starts_with_delimiter? line line = line[1..-1] # push an empty cell spec if boundary at start of line parser_ctx.close_open_cell else next_cell_spec, line = parse_cell_spec(line, :start, parser_ctx.delimiter) # if the cell spec is not null, then we're at a cell boundary if !next_cell_spec.nil? parser_ctx.close_open_cell next_cell_spec else # QUESTION do we not advance to next line? if so, when will we if we came into this block? end end end seen = false while !seen || !line.empty? seen = true if (m = parser_ctx.match_delimiter(line)) if parser_ctx.format == 'csv' if parser_ctx.buffer_has_unclosed_quotes?(m.pre_match) # throw it back, it's too small line = parser_ctx.skip_matched_delimiter(m) next end else if m.pre_match.end_with? '\\' line = parser_ctx.skip_matched_delimiter(m, true) next end end if parser_ctx.format == 'psv' next_cell_spec, cell_text = parse_cell_spec(m.pre_match, :end) parser_ctx.push_cell_spec next_cell_spec parser_ctx.buffer = %(#{parser_ctx.buffer}#{cell_text}) else parser_ctx.buffer = %(#{parser_ctx.buffer}#{m.pre_match}) end if (line = m.post_match) == '' # hack to prevent dropping empty cell found at end of line (see issue #1106) seen = false end parser_ctx.close_cell else # no other delimiters to see here # suck up this line into the buffer and move on parser_ctx.buffer = %(#{parser_ctx.buffer}#{line}#{EOL}) # QUESTION make stripping endlines in csv data an option? (unwrap-option?) if parser_ctx.format == 'csv' parser_ctx.buffer = %(#{parser_ctx.buffer.rstrip} ) end line = '' if parser_ctx.format == 'psv' || (parser_ctx.format == 'csv' && parser_ctx.buffer_has_unclosed_quotes?) parser_ctx.keep_cell_open else parser_ctx.close_cell true end end end skipped = table_reader.skip_blank_lines unless parser_ctx.cell_open? unless table_reader.has_more_lines? # NOTE may have already closed cell in csv or dsv table (see previous call to parser_ctx.close_cell(true)) parser_ctx.close_cell true if parser_ctx.cell_open? end end table.attributes['colcount'] ||= parser_ctx.col_count if !explicit_col_specs # TODO further encapsulate this logic (into table perhaps?) even_width = (100.0 / parser_ctx.col_count).floor table.columns.each {|c| c.assign_width(0, even_width) } end table.partition_header_footer attributes table end # Internal: Parse the column specs for this table. # # The column specs dictate the number of columns, relative # width of columns, default alignments for cells in each # column, and/or default styles or filters applied to the cells in # the column. # # Every column spec is guaranteed to have a width # # returns a Hash of attributes that specify how to format # and layout the cells in the table. def self.parse_col_specs(records) # check for deprecated syntax: single number, equal column spread # REVIEW could use records == records.to_i.to_s instead of regexp if DigitsRx =~ records return ::Array.new(records.to_i) { { 'width' => 1 } } end specs = [] records.split(',').each {|record| # TODO might want to use scan rather than this mega-regexp if (m = ColumnSpecRx.match(record)) spec = {} if m[2] # make this an operation colspec, rowspec = m[2].split '.' if !colspec.nil_or_empty? && Table::ALIGNMENTS[:h].has_key?(colspec) spec['halign'] = Table::ALIGNMENTS[:h][colspec] end if !rowspec.nil_or_empty? && Table::ALIGNMENTS[:v].has_key?(rowspec) spec['valign'] = Table::ALIGNMENTS[:v][rowspec] end end # to_i permits us to support percentage width by stripping the % # NOTE this is slightly out of compliance w/ AsciiDoc, but makes way more sense spec['width'] = !m[3].nil? ? m[3].to_i : 1 # make this an operation if m[4] && Table::TEXT_STYLES.has_key?(m[4]) spec['style'] = Table::TEXT_STYLES[m[4]] end repeat = !m[1].nil? ? m[1].to_i : 1 1.upto(repeat) { specs << spec.dup } end } specs end # Internal: Parse the cell specs for the current cell. # # The cell specs dictate the cell's alignments, styles or filters, # colspan, rowspan and/or repeating content. # # The default spec when pos == :end is {} since we already know we're at a # delimiter. When pos == :start, we *may* be at a delimiter, nil indicates # we're not. # # returns the Hash of attributes that indicate how to layout # and style this cell in the table. def self.parse_cell_spec(line, pos = :start, delimiter = nil) m = nil rest = '' case pos when :start if line.include? delimiter spec_part, rest = line.split delimiter, 2 if (m = CellSpecStartRx.match spec_part) return [{}, rest] if m[0].empty? else return [nil, line] end else return [nil, line] end when :end if (m = CellSpecEndRx.match line) # NOTE return the line stripped of trailing whitespace if no cellspec is found in this case return [{}, line.rstrip] if m[0].lstrip.empty? rest = m.pre_match else return [{}, line] end end spec = {} if m[1] colspec, rowspec = m[1].split '.' colspec = colspec.nil_or_empty? ? 1 : colspec.to_i rowspec = rowspec.nil_or_empty? ? 1 : rowspec.to_i if m[2] == '+' spec['colspan'] = colspec unless colspec == 1 spec['rowspan'] = rowspec unless rowspec == 1 elsif m[2] == '*' spec['repeatcol'] = colspec unless colspec == 1 end end if m[3] colspec, rowspec = m[3].split '.' if !colspec.nil_or_empty? && Table::ALIGNMENTS[:h].has_key?(colspec) spec['halign'] = Table::ALIGNMENTS[:h][colspec] end if !rowspec.nil_or_empty? && Table::ALIGNMENTS[:v].has_key?(rowspec) spec['valign'] = Table::ALIGNMENTS[:v][rowspec] end end if m[4] && Table::TEXT_STYLES.has_key?(m[4]) spec['style'] = Table::TEXT_STYLES[m[4]] end [spec, rest] end # Public: Parse the first positional attribute and assign named attributes # # Parse the first positional attribute to extract the style, role and id # parts, assign the values to their cooresponding attribute keys and return # both the original style attribute and the parsed value from the first # positional attribute. # # attributes - The Hash of attributes to process and update # # Examples # # puts attributes # => {1 => "abstract#intro.lead%fragment", "style" => "preamble"} # # parse_style_attribute(attributes) # => ["abstract", "preamble"] # # puts attributes # => {1 => "abstract#intro.lead", "style" => "abstract", "id" => "intro", # "role" => "lead", "options" => ["fragment"], "fragment-option" => ''} # # Returns a two-element Array of the parsed style from the # first positional attribute and the original style that was # replaced def self.parse_style_attribute(attributes, reader = nil) original_style = attributes['style'] raw_style = attributes[1] # NOTE spaces are not allowed in shorthand, so if we find one, this ain't shorthand if raw_style && !raw_style.include?(' ') && Compliance.shorthand_property_syntax type = :style collector = [] parsed = {} # QUESTION should this be a private method? (though, it's never called if shorthand isn't used) save_current = lambda { if collector.empty? if type != :style warn %(asciidoctor: WARNING:#{reader.nil? ? nil : " #{reader.prev_line_info}:"} invalid empty #{type} detected in style attribute) end else case type when :role, :option parsed[type] ||= [] parsed[type].push collector.join when :id if parsed.has_key? :id warn %(asciidoctor: WARNING:#{reader.nil? ? nil : " #{reader.prev_line_info}:"} multiple ids detected in style attribute) end parsed[type] = collector.join else parsed[type] = collector.join end collector = [] end } raw_style.each_char do |c| if c == '.' || c == '#' || c == '%' save_current.call case c when '.' type = :role when '#' type = :id when '%' type = :option end else collector.push c end end # small optimization if no shorthand is found if type == :style parsed_style = attributes['style'] = raw_style else save_current.call if parsed.has_key? :style parsed_style = attributes['style'] = parsed[:style] else parsed_style = nil end if parsed.has_key? :id attributes['id'] = parsed[:id] end if parsed.has_key? :role attributes['role'] = parsed[:role] * ' ' end if parsed.has_key? :option (options = parsed[:option]).each do |option| attributes[%(#{option}-option)] = '' end if (existing_opts = attributes['options']) attributes['options'] = (options + existing_opts.split(',')) * ',' else attributes['options'] = options * ',' end end end [parsed_style, original_style] else attributes['style'] = raw_style [raw_style, original_style] end end # Remove the indentation (block offset) shared by all the lines, then # indent the lines by the specified amount if specified # # Trim the leading whitespace (indentation) equivalent to the length # of the indent on the least indented line. If the indent argument # is specified, indent the lines by this many spaces (columns). # # The purpose of this method is to shift a block of text to # align to the left margin, while still preserving the relative # indentation between lines # # lines - the Array of String lines to process # indent - the integer number of spaces to add to the beginning # of each line; if this value is nil, the existing # space is preserved (optional, default: 0) # # Examples # # source = < [" def names", " @names.split ' '", " end"] # # Parser.reset_block_indent(source.split "\n") # # => ["def names", " @names.split ' '", "end"] # # puts Parser.reset_block_indent(source.split "\n") * "\n" # # => def names # # => @names.split ' ' # # => end # # returns the Array of String lines with block offset removed #-- # FIXME refactor gsub matchers into compiled regex def self.reset_block_indent!(lines, indent = 0) return if !indent || lines.empty? tab_detected = false # TODO make tab size configurable tab_expansion = ' ' # strip leading block indent offsets = lines.map do |line| # break if the first char is non-whitespace break [] unless line.chr.lstrip.empty? if line.include? TAB tab_detected = true line = line.gsub(TAB_PATTERN, tab_expansion) end if (flush_line = line.lstrip).empty? nil elsif (offset = line.length - flush_line.length) == 0 break [] else offset end end unless offsets.empty? || (offsets = offsets.compact).empty? if (offset = offsets.min) > 0 lines.map! {|line| line = line.gsub(TAB_PATTERN, tab_expansion) if tab_detected line[offset..-1].to_s } end end if indent > 0 padding = ' ' * indent lines.map! {|line| %(#{padding}#{line}) } end nil end # Public: Convert a string to a legal attribute name. # # name - the String name of the attribute # # Returns a String with the legal AsciiDoc attribute name. # # Examples # # sanitize_attribute_name('Foo Bar') # => 'foobar' # # sanitize_attribute_name('foo') # => 'foo' # # sanitize_attribute_name('Foo 3 #-Billy') # => 'foo3-billy' def self.sanitize_attribute_name(name) name.gsub(InvalidAttributeNameCharsRx, '').downcase end # Internal: Converts a Roman numeral to an integer value. # # value - The String Roman numeral to convert # # Returns the Integer for this Roman numeral def self.roman_numeral_to_int(value) value = value.downcase digits = { 'i' => 1, 'v' => 5, 'x' => 10 } result = 0 (0..value.length - 1).each {|i| digit = digits[value[i..i]] if i + 1 < value.length && digits[value[i+1..i+1]] > digit result -= digit else result += digit end } result end end end