parser.rb in kramdown-0.3.0

- old
+ new
@@ -18,1207 +18,16 @@
 # You should have received a copy of the GNU General Public License
 # along with this program.  If not, see <http://www.gnu.org/licenses/>.
 #++
 #
 
-require 'strscan'
-require 'stringio'
-require 'kramdown/parser/registry'
-
-#TODO: use [[:alpha:]] in all regexp to allow parsing of international values in 1.9.1
-#NOTE: use @src.pre_match only before other check/match?/... operations, otherwise the content is changed
-
 module Kramdown
 
   # This module contains all available parsers. Currently, there is only one parser for parsing
   # documents in kramdown format.
   module Parser
 
-    # Used for parsing a document in kramdown format.
-    class Kramdown
-
-      include ::Kramdown
-
-      attr_reader :tree
-      attr_reader :doc
-
-      # Create a new Kramdown parser object for the Kramdown::Document +doc+.
-      def initialize(doc)
-        @doc = doc
-        @src = nil
-        @tree = nil
-        @unclosed_html_tags = []
-        @stack = []
-        @used_ids = {}
-        @doc.parse_infos[:ald] = {}
-        @doc.parse_infos[:link_defs] = {}
-        @doc.parse_infos[:footnotes] = {}
-      end
-      private_class_method(:new, :allocate)
-
-
-      # Parse the string +source+ using the Kramdown::Document +doc+ and return the parse tree.
-      def self.parse(source, doc)
-        new(doc).parse(source)
-      end
-
-      # The source string provided on initialization is parsed and the created +tree+ is returned.
-      def parse(source)
-        configure_parser
-        tree = Element.new(:root)
-        parse_blocks(tree, adapt_source(source))
-        update_tree(tree)
-        @doc.parse_infos[:footnotes].each do |name, data|
-          update_tree(data[:content])
-        end
-        tree
-      end
-
-      # Add the given warning +text+ to the warning array of the Kramdown document.
-      def warning(text)
-        @doc.warnings << text
-        #TODO: add position information
-      end
-
-      #######
-      private
-      #######
-
-      BLOCK_PARSERS = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :atx_header,
-                       :setext_header, :horizontal_rule, :list, :definition_list, :link_definition, :block_html,
-                       :footnote_definition, :ald, :block_ial, :extension_block, :eob_marker, :paragraph]
-      SPAN_PARSERS =  [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link,
-                       :span_ial, :html_entity, :typographic_syms, :line_break, :escaped_chars]
-
-      # Adapt the object to allow parsing like specified in the options.
-      def configure_parser
-        @parsers = {}
-        BLOCK_PARSERS.each do |name|
-          if Registry.has_parser?(name, :block)
-            extend(Registry.parser(name).module)
-            @parsers[name] = Registry.parser(name)
-          else
-            raise Kramdown::Error, "Unknown block parser: #{name}"
-          end
-        end
-        SPAN_PARSERS.each do |name|
-          if Registry.has_parser?(name, :span)
-            extend(Registry.parser(name).module)
-            @parsers[name] = Registry.parser(name)
-          else
-            raise Kramdown::Error, "Unknown span parser: #{name}"
-          end
-        end
-        @span_start = Regexp.union(*SPAN_PARSERS.map {|name| @parsers[name].start_re})
-        @span_start_re = /(?=#{@span_start})/
-      end
-
-      # Parse all block level elements in +text+ (a string or a StringScanner object) into the
-      # element +el+.
-      def parse_blocks(el, text)
-        @stack.push([@tree, @src, @unclosed_html_tags])
-        @tree, @src, @unclosed_html_tags = el, StringScanner.new(text), []
-
-        while !@src.eos?
-          BLOCK_PARSERS.any? do |name|
-            if @src.check(@parsers[name].start_re)
-              send(@parsers[name].method)
-            else
-              false
-            end
-          end || begin
-            warning('Warning: this should not occur - no block parser handled the line')
-            add_text(@src.scan(/.*\n/))
-          end
-        end
-
-        @unclosed_html_tags.reverse.each do |tag|
-          warning("Automatically closing unclosed html tag '#{tag.value}'")
-        end
-
-        @tree, @src, @unclosed_html_tags = *@stack.pop
-      end
-
-      # Update the tree by parsing all <tt>:text</tt> elements with the span level parser (resets
-      # +@tree+, +@src+ and the +@stack+) and by updating the attributes from the IALs.
-      def update_tree(element)
-        element.children.map! do |child|
-          if child.type == :text
-            @stack, @tree = [], nil
-            @src = StringScanner.new(child.value)
-            parse_spans(child)
-            child.children
-          else
-            update_tree(child)
-            update_attr_with_ial(child.options[:attr] ||= {}, child.options[:ial]) if child.options[:ial]
-            child
-          end
-        end.flatten!
-      end
-
-      # Parse all span level elements in the source string.
-      def parse_spans(el, stop_re = nil)
-        @stack.push(@tree)
-        @tree = el
-
-        used_re = (stop_re.nil? ? @span_start_re : /(?=#{Regexp.union(stop_re, @span_start)})/)
-        stop_re_found = false
-        while !@src.eos? && !stop_re_found
-          if result = @src.scan_until(used_re)
-            add_text(result)
-            if stop_re && (stop_re_matched = @src.check(stop_re))
-              stop_re_found = (block_given? ? yield : true)
-            end
-            processed = SPAN_PARSERS.any? do |name|
-              if @src.check(@parsers[name].start_re)
-                send(@parsers[name].method)
-                true
-              else
-                false
-              end
-            end unless stop_re_found
-            if !processed && !stop_re_found
-              if stop_re_matched
-                add_text(@src.scan(/./))
-              else
-                raise Kramdown::Error, 'Bug: please report!'
-              end
-            end
-          else
-            add_text(@src.scan_until(/.*/m)) unless stop_re
-            break
-          end
-        end
-
-        @tree = @stack.pop
-
-        stop_re_found
-      end
-
-      # Modify the string +source+ to be usable by the parser.
-      def adapt_source(source)
-        source.gsub(/\r\n?/, "\n").chomp + "\n"
-      end
-
-      # This helper method adds the given +text+ either to the last element in the +tree+ if it is a
-      # text element or creates a new text element.
-      def add_text(text, tree = @tree)
-        if tree.children.last && tree.children.last.type == :text
-          tree.children.last.value << text
-        elsif !text.empty?
-          tree.children << Element.new(:text, text)
-        end
-      end
-
-    end
-
-
-    module ParserMethods
-
-      INDENT = /^(?:\t| {4})/
-      OPT_SPACE = / {0,3}/
-
-
-      # Parse the string +str+ and extract all attributes and add all found attributes to the hash
-      # +opts+.
-      def parse_attribute_list(str, opts)
-        str.scan(ALD_TYPE_ANY).each do |key, sep, val, id_attr, class_attr, ref|
-          if ref
-            (opts[:refs] ||= []) << ref
-          elsif class_attr
-            opts['class'] = ((opts['class'] || '') + " #{class_attr}").lstrip
-          elsif id_attr
-            opts['id'] = id_attr
-          else
-            opts[key] = val.gsub(/\\(\}|#{sep})/, "\\1")
-          end
-        end
-      end
-
-      # Update the +ial+ with the information from the inline attribute list +opts+.
-      def update_ial_with_ial(ial, opts)
-        (ial[:refs] ||= []) << opts[:refs]
-        ial['class'] = ((ial['class'] || '') + " #{opts['class']}").lstrip if opts['class']
-        opts.each {|k,v| ial[k] = v if k != :refs && k != 'class' }
-      end
-
-      # Update the attributes with the information from the inline attribute list and all referenced ALDs.
-      def update_attr_with_ial(attr, ial)
-        ial[:refs].each do |ref|
-          update_attr_with_ial(attr, ref) if ref = @doc.parse_infos[:ald][ref]
-        end if ial[:refs]
-        attr['class'] = ((attr['class'] || '') + " #{ial['class']}").lstrip if ial['class']
-        ial.each {|k,v| attr[k] = v if k.kind_of?(String) && k != 'class' }
-      end
-
-      # Generate an alpha-numeric ID from the the string +str+.
-      def generate_id(str)
-        gen_id = str.gsub(/[^a-zA-Z0-9 -]/, '').gsub(/^[^a-zA-Z]*/, '').gsub(' ', '-').downcase
-        gen_id = 'section' if gen_id.length == 0
-        if @used_ids.has_key?(gen_id)
-          gen_id += '-' + (@used_ids[gen_id] += 1).to_s
-        else
-          @used_ids[gen_id] = 0
-        end
-        gen_id
-      end
-
-      # Helper method for obfuscating the +email+ address by using HTML entities.
-      def obfuscate_email(email)
-        result = ""
-        email.each_byte do |b|
-          result += (b > 128 ? b.chr : "&#%03d;" % b)
-        end
-        result
-      end
-
-
-      BLANK_LINE = /(?:^\s*\n)+/
-
-      # Parse the blank line at the current postition.
-      def parse_blank_line
-        @src.pos += @src.matched_size
-        if @tree.children.last && @tree.children.last.type == :blank
-          @tree.children.last.value += @src.matched
-        else
-          @tree.children << Element.new(:blank, @src.matched)
-        end
-        true
-      end
-      Registry.define_parser(:block, :blank_line, BLANK_LINE, self)
-
-
-      EOB_MARKER = /^\^\s*?\n/
-
-      # Parse the EOB marker at the current location.
-      def parse_eob_marker
-        @src.pos += @src.matched_size
-        @tree.children << Element.new(:eob)
-        true
-      end
-      Registry.define_parser(:block, :eob_marker, EOB_MARKER, self)
-
-
-      PARAGRAPH_START = /^#{OPT_SPACE}[^ \t].*?\n/
-
-      # Parse the paragraph at the current location.
-      def parse_paragraph
-        @src.pos += @src.matched_size
-        if @tree.children.last && @tree.children.last.type == :p
-          @tree.children.last.children.first.value << "\n" << @src.matched.chomp
-        else
-          @tree.children << Element.new(:p)
-          add_text(@src.matched.lstrip.chomp, @tree.children.last)
-        end
-        true
-      end
-      Registry.define_parser(:block, :paragraph, PARAGRAPH_START, self)
-
-      HEADER_ID=/(?:[ \t]\{#((?:\w|\d)[\w\d-]*)\})?/
-      SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)#{HEADER_ID}[ \t]*?\n(-|=)+\s*?\n/
-
-      # Parse the Setext header at the current location.
-      def parse_setext_header
-        if @tree.children.last && @tree.children.last.type != :blank
-          return false
-        end
-        @src.pos += @src.matched_size
-        text, id, level = @src[1].strip, @src[2], @src[3]
-        el = Element.new(:header, nil, :level => (level == '-' ? 2 : 1))
-        add_text(text, el)
-        el.options[:attr] = {'id' => id} if id
-        el.options[:attr] = {'id' => generate_id(text)} if @doc.options[:auto_ids] && !id
-        @tree.children << el
-        true
-      end
-      Registry.define_parser(:block, :setext_header, SETEXT_HEADER_START, self)
-
-
-      ATX_HEADER_START = /^\#{1,6}/
-      ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*#{HEADER_ID}\s*?\n/
-
-      # Parse the Atx header at the current location.
-      def parse_atx_header
-        if @tree.children.last && @tree.children.last.type != :blank
-          return false
-        end
-        result = @src.scan(ATX_HEADER_MATCH)
-        level, text, id = @src[1], @src[2].strip, @src[3]
-        el = Element.new(:header, nil, :level => level.length)
-        add_text(text, el)
-        el.options[:attr] = {'id' => id} if id
-        el.options[:attr] = {'id' => generate_id(text)} if @doc.options[:auto_ids] && !id
-        @tree.children << el
-        true
-      end
-      Registry.define_parser(:block, :atx_header, ATX_HEADER_START, self)
-
-
-      BLOCKQUOTE_START = /^#{OPT_SPACE}> ?/
-      BLOCKQUOTE_MATCH = /(^#{OPT_SPACE}>.*?\n)+/
-
-      # Parse the blockquote at the current location.
-      def parse_blockquote
-        result = @src.scan(BLOCKQUOTE_MATCH).gsub(BLOCKQUOTE_START, '')
-        el = Element.new(:blockquote)
-        @tree.children << el
-        parse_blocks(el, result)
-        true
-      end
-      Registry.define_parser(:block, :blockquote, BLOCKQUOTE_START, self)
-
-
-      CODEBLOCK_START = INDENT
-      CODEBLOCK_MATCH = /(?:#{INDENT}.*?\S.*?\n)+/
-
-      # Parse the indented codeblock at the current location.
-      def parse_codeblock
-        result = @src.scan(CODEBLOCK_MATCH).gsub(INDENT, '')
-        children = @tree.children
-        if children.length >= 2 && children[-1].type == :blank && children[-2].type == :codeblock
-          children[-2].value << children[-1].value.gsub(INDENT, '') << result
-          children.pop
-        else
-          @tree.children << Element.new(:codeblock, result)
-        end
-        true
-      end
-      Registry.define_parser(:block, :codeblock, CODEBLOCK_START, self)
-
-
-      FENCED_CODEBLOCK_START = /^~{3,}/
-      FENCED_CODEBLOCK_MATCH = /^(~{3,})\s*?\n(.*?)^\1~*\s*?\n/m
-
-      # Parse the fenced codeblock at the current location.
-      def parse_codeblock_fenced
-        if @src.check(FENCED_CODEBLOCK_MATCH)
-          @src.pos += @src.matched_size
-          @tree.children << Element.new(:codeblock, @src[2])
-          true
-        else
-          false
-        end
-      end
-      Registry.define_parser(:block, :codeblock_fenced, FENCED_CODEBLOCK_START, self)
-
-
-      HR_START = /^#{OPT_SPACE}(\*|-|_)[ \t]*\1[ \t]*\1[ \t]*(\1|[ \t])*\n/
-
-      # Parse the horizontal rule at the current location.
-      def parse_horizontal_rule
-        @src.pos += @src.matched_size
-        @tree.children << Element.new(:hr)
-        true
-      end
-      Registry.define_parser(:block, :horizontal_rule, HR_START, self)
-
-
-      LIST_START_UL = /^(#{OPT_SPACE}[+*-])([\t| ].*?\n)/
-      LIST_START_OL = /^(#{OPT_SPACE}\d+\.)([\t| ].*?\n)/
-      LIST_START = /#{LIST_START_UL}|#{LIST_START_OL}/
-
-      # Parse the ordered or unordered list at the current location.
-      def parse_list
-        if @tree.children.last && @tree.children.last.type == :p # last element must not be a paragraph
-          return false
-        end
-
-        type, list_start_re = (@src.check(LIST_START_UL) ? [:ul, LIST_START_UL] : [:ol, LIST_START_OL])
-        list = Element.new(type)
-
-        item = nil
-        indent_re = nil
-        content_re = nil
-        eob_found = false
-        nested_list_found = false
-        while !@src.eos?
-          if @src.check(HR_START)
-            break
-          elsif @src.scan(list_start_re)
-            item = Element.new(:li)
-            item.value, indentation, content_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
-            list.children << item
-
-            list_start_re = (type == :ul ? /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/ :
-                             /^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/)
-            nested_list_found = false
-          elsif result = @src.scan(content_re)
-            result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
-            result.sub!(indent_re, '')
-            if !nested_list_found && result =~ LIST_START
-              parse_blocks(item, item.value)
-              if item.children.length == 1 && item.children.first.type == :p
-                item.value = ''
-              else
-                item.children.clear
-              end
-              nested_list_found = true
-            end
-            item.value << result
-          elsif result = @src.scan(BLANK_LINE)
-            nested_list_found = true
-            item.value << result
-          elsif @src.scan(EOB_MARKER)
-            eob_found = true
-            break
-          else
-            break
-          end
-        end
-
-        @tree.children << list
-
-        last = nil
-        list.children.each do |item|
-          temp = Element.new(:temp)
-          parse_blocks(temp, item.value)
-          item.children += temp.children
-          item.value = nil
-          next if item.children.size == 0
-
-          if item.children.first.type == :p && (item.children.length < 2 || item.children[1].type != :blank ||
-                                                (item == list.children.last && item.children.length == 2 && !eob_found))
-            text = item.children.shift.children.first
-            text.value += "\n" if !item.children.empty? && item.children[0].type != :blank
-            item.children.unshift(text)
-          else
-            item.options[:first_is_block] = true
-          end
-
-          if item.children.last.type == :blank
-            last = item.children.pop
-          else
-            last = nil
-          end
-        end
-
-        @tree.children << last if !last.nil? && !eob_found
-
-        true
-      end
-      Registry.define_parser(:block, :list, LIST_START, self)
-
-      def parse_first_list_line(indentation, content)
-        if content =~ /^\s*\n/
-          indentation = 4
-        else
-          while content =~ /^ *\t/
-            temp = content.scan(/^ */).first.length + indentation
-            content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
-          end
-          indentation += content.scan(/^ */).first.length
-        end
-        content.sub!(/^\s*/, '')
-
-        indent_re = /^ {#{indentation}}/
-        content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*?\n/
-        [content, indentation, content_re, indent_re]
-      end
-
-
-      DEFINITION_LIST_START = /^(#{OPT_SPACE}:)([\t| ].*?\n)/
-
-      # Parse the ordered or unordered list at the current location.
-      def parse_definition_list
-        children = @tree.children
-        if !children.last || (children.length == 1 && children.last.type != :p ) ||
-            (children.length >= 2 && children[-1].type != :p && (children[-1].type != :blank || children[-1].value != "\n" || children[-2].type != :p))
-          return false
-        end
-
-        first_as_para = false
-        deflist = Element.new(:dl)
-        para = @tree.children.pop
-        if para.type == :blank
-          para = @tree.children.pop
-          first_as_para = true
-        end
-        para.children.first.value.split("\n").each do |term|
-          el = Element.new(:dt)
-          el.children << Element.new(:text, term)
-          deflist.children << el
-        end
-
-        item = nil
-        indent_re = nil
-        content_re = nil
-        def_start_re = DEFINITION_LIST_START
-        while !@src.eos?
-          if @src.scan(def_start_re)
-            item = Element.new(:dd)
-            item.options[:first_as_para] = first_as_para
-            item.value, indentation, content_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
-            deflist.children << item
-
-            def_start_re = /^( {0,#{[3, indentation - 1].min}}:)([\t| ].*?\n)/
-            first_as_para = false
-          elsif result = @src.scan(content_re)
-            result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
-            result.sub!(indent_re, '')
-            item.value << result
-            first_as_para = false
-          elsif result = @src.scan(BLANK_LINE)
-            first_as_para = true
-            item.value << result
-          else
-            break
-          end
-        end
-
-        last = nil
-        deflist.children.each do |item|
-          next if item.type == :dt
-
-          parse_blocks(item, item.value)
-          item.value = nil
-          next if item.children.size == 0
-
-          if item.children.last.type == :blank
-            last = item.children.pop
-          else
-            last = nil
-          end
-          if item.children.first.type == :p && !item.options.delete(:first_as_para)
-            text = item.children.shift.children.first
-            text.value += "\n" if !item.children.empty?
-            item.children.unshift(text)
-          else
-            item.options[:first_is_block] = true
-          end
-        end
-
-        if @tree.children.length >= 1 && @tree.children.last.type == :dl
-          @tree.children[-1].children += deflist.children
-        elsif @tree.children.length >= 2 && @tree.children[-1].type == :blank && @tree.children[-2].type == :dl
-          @tree.children.pop
-          @tree.children[-1].children += deflist.children
-        else
-          @tree.children << deflist
-        end
-
-        @tree.children << last if !last.nil?
-
-        true
-      end
-      Registry.define_parser(:block, :definition_list, DEFINITION_LIST_START, self)
-
-
-      PUNCTUATION_CHARS = "_.:,;!?-"
-      LINK_ID_CHARS = /[a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
-      LINK_ID_NON_CHARS = /[^a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
-      LINK_DEFINITION_START = /^#{OPT_SPACE}\[(#{LINK_ID_CHARS}+)\]:[ \t]*(?:<(.*?)>|([^\s]+))[ \t]*?(?:\n?[ \t]*?(["'])(.+?)\4[ \t]*?)?\n/
-
-      # Parse the link definition at the current location.
-      def parse_link_definition
-        @src.pos += @src.matched_size
-        link_id, link_url, link_title = @src[1].downcase, @src[2] || @src[3], @src[5]
-        warning("Duplicate link ID '#{link_id}' - overwriting") if @doc.parse_infos[:link_defs][link_id]
-        @doc.parse_infos[:link_defs][link_id] = [link_url, link_title]
-        true
-      end
-      Registry.define_parser(:block, :link_definition, LINK_DEFINITION_START, self)
-
-
-      ALD_ID_CHARS = /[\w\d-]/
-      ALD_ANY_CHARS = /\\\}|[^\}]/
-      ALD_ID_NAME = /(?:\w|\d)#{ALD_ID_CHARS}*/
-      ALD_TYPE_KEY_VALUE_PAIR = /(#{ALD_ID_NAME})=("|')((?:\\\}|\\\2|[^\}\2])+?)\2/
-      ALD_TYPE_CLASS_NAME = /\.(#{ALD_ID_NAME})/
-      ALD_TYPE_ID_NAME = /#(#{ALD_ID_NAME})/
-      ALD_TYPE_REF = /(#{ALD_ID_NAME})/
-      ALD_TYPE_ANY = /(?:\A|\s)(?:#{ALD_TYPE_KEY_VALUE_PAIR}|#{ALD_TYPE_ID_NAME}|#{ALD_TYPE_CLASS_NAME}|#{ALD_TYPE_REF})(?=\s|\Z)/
-      ALD_START = /^#{OPT_SPACE}\{:(#{ALD_ID_NAME}):(#{ALD_ANY_CHARS}+)\}\s*?\n/
-
-      # Parse the attribute list definition at the current location.
-      def parse_ald
-        @src.pos += @src.matched_size
-        parse_attribute_list(@src[2], @doc.parse_infos[:ald][@src[1]] ||= {})
-        true
-      end
-      Registry.define_parser(:block, :ald, ALD_START, self)
-
-
-      IAL_BLOCK_START = /^#{OPT_SPACE}\{:(?!:)(#{ALD_ANY_CHARS}+)\}\s*?\n/
-
-      # Parse the inline attribute list at the current location.
-      def parse_block_ial
-        @src.pos += @src.matched_size
-        if @tree.children.last && @tree.children.last.type != :blank
-          parse_attribute_list(@src[1], @tree.children.last.options[:ial] ||= {})
-        end
-        true
-      end
-      Registry.define_parser(:block, :block_ial, IAL_BLOCK_START, self)
-
-
-      EXT_BLOCK_START_STR = "^#{OPT_SPACE}\\{::(%s):(:)?(#{ALD_ANY_CHARS}*)\\}\s*?\n"
-      EXT_BLOCK_START = /#{EXT_BLOCK_START_STR % ALD_ID_NAME}/
-
-      # Parse the extension block at the current location.
-      def parse_extension_block
-        @src.pos += @src.matched_size
-
-        ext = @src[1]
-        opts = {}
-        body = nil
-        parse_attribute_list(@src[3], opts)
-
-        if !@doc.extension.public_methods.map {|m| m.to_s}.include?("parse_#{ext}")
-          warning("No extension named '#{ext}' found - ignoring extension block")
-          body = :invalid
-        end
-
-        if !@src[2]
-          stop_re = /#{EXT_BLOCK_START_STR % ext}/
-          if result = @src.scan_until(stop_re)
-            parse_attribute_list(@src[3], opts)
-            body = result.sub!(stop_re, '') if body != :invalid
-          else
-            body = :invalid
-            warning("No ending line for extension block '#{ext}' found - ignoring extension block")
-          end
-        end
-
-        @doc.extension.send("parse_#{ext}", self, opts, body) if body != :invalid
-
-        true
-      end
-      Registry.define_parser(:block, :extension_block, EXT_BLOCK_START, self)
-
-
-      FOOTNOTE_DEFINITION_START = /^#{OPT_SPACE}\[\^(#{ALD_ID_NAME})\]:\s*?(.*?\n(?:#{BLANK_LINE}?#{CODEBLOCK_MATCH})*)/
-
-      # Parse the foot note definition at the current location.
-      def parse_footnote_definition
-        @src.pos += @src.matched_size
-
-        el = Element.new(:footnote_def)
-        parse_blocks(el, @src[2].gsub(INDENT, ''))
-        warning("Duplicate footnote name '#{@src[1]}' - overwriting") if @doc.parse_infos[:footnotes][@src[1]]
-        (@doc.parse_infos[:footnotes][@src[1]] = {})[:content] = el
-      end
-      Registry.define_parser(:block, :footnote_definition, FOOTNOTE_DEFINITION_START, self)
-
-
-      require 'rexml/parsers/baseparser'
-
-      #:stopdoc:
-      # The following regexps are based on the ones used by REXML, with some slight modifications.
-      #:startdoc:
-      HTML_COMMENT_RE = /<!--(.*?)-->/m
-      HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
-      HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/m
-      HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/m
-      HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::NAME_STR})\s*>/
-
-
-      HTML_PARSE_AS_BLOCK = %w{applet button blockquote colgroup dd div dl fieldset form iframe li
-                               map noscript object ol table tbody td th thead tfoot tr ul}
-      HTML_PARSE_AS_SPAN  = %w{a abbr acronym address b bdo big cite caption code del dfn dt em
-                               h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p pre q rb rbc
-                               rp rt rtc ruby samp select small span strong sub sup tt var}
-      HTML_PARSE_AS_RAW   = %w{script math option textarea}
-
-      HTML_PARSE_AS = Hash.new {|h,k| h[k] = :raw}
-      HTML_PARSE_AS_BLOCK.each {|i| HTML_PARSE_AS[i] = :block}
-      HTML_PARSE_AS_SPAN.each {|i| HTML_PARSE_AS[i] = :span}
-      HTML_PARSE_AS_RAW.each {|i| HTML_PARSE_AS[i] = :raw}
-
-      #:stopdoc:
-      # Some HTML elements like script belong to both categories (i.e. are valid in block and
-      # span HTML) and don't appear therefore!
-      #:startdoc:
-      HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
-                              ins kbd label option q rb rbc rp rt rtc ruby samp select small span
-                              strong sub sup textarea tt var}
-      HTML_BLOCK_ELEMENTS = %w{address applet button blockquote caption col colgroup dd div dl dt fieldset
-                               form h1 h2 h3 h4 h5 h6 hr iframe legend li map ol optgroup p pre table tbody
-                               td th thead tfoot tr ul}
-      HTML_ELEMENTS_WITHOUT_BODY = %w{area br col hr img input}
-
-      HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
-
-      # Parse the HTML at the current position as block level HTML.
-      def parse_block_html
-        if result = @src.scan(HTML_COMMENT_RE)
-          @tree.children << Element.new(:html_raw, result, :type => :block)
-          @src.scan(/.*?\n/)
-          true
-        elsif result = @src.scan(HTML_INSTRUCTION_RE)
-          @tree.children << Element.new(:html_raw, result, :type => :block)
-          @src.scan(/.*?\n/)
-          true
-        else
-          if (!@src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !@src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/)) ||
-              HTML_SPAN_ELEMENTS.include?(@src[1])
-            if @tree.type == :html_element && @tree.options[:parse_type] != :block
-              add_html_text(@src.scan(/.*?\n/), @tree)
-              add_html_text(@src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/), @tree)
-              return true
-            else
-              return false
-            end
-          end
-
-          current_el = (@tree.type == :html_element ? @tree : nil)
-          @src.scan(/^(#{OPT_SPACE})(.*?)\n/)
-          if current_el && current_el.options[:parse_type] == :raw
-            add_html_text(@src[1], current_el)
-          end
-          line = @src[2]
-          stack = []
-
-          while line.size > 0
-            index_start_tag, index_close_tag = line.index(HTML_TAG_RE), line.index(HTML_TAG_CLOSE_RE)
-            if index_start_tag && (!index_close_tag || index_start_tag < index_close_tag)
-              md = line.match(HTML_TAG_RE)
-              line = md.post_match
-              add_html_text(md.pre_match, current_el) if current_el
-              if HTML_SPAN_ELEMENTS.include?(md[1]) || (current_el && current_el.options[:parse_type] == :span)
-                add_html_text(md.to_s, current_el) if current_el
-                next
-              end
-
-              attrs = {}
-              md[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
-
-              parse_type = if !current_el || current_el.options[:parse_type] != :raw
-                             (@doc.options[:parse_block_html] ? HTML_PARSE_AS[md[1]] : :raw)
-                           else
-                             :raw
-                           end
-              if val = get_parse_type(attrs.delete('markdown'))
-                parse_type = (val == :default ? HTML_PARSE_AS[md[1]] : val)
-              end
-              el = Element.new(:html_element, md[1], :attr => attrs, :type => :block, :parse_type => parse_type)
-              el.options[:no_start_indent] = true if !stack.empty?
-              el.options[:outer_element] = true if !current_el
-              el.options[:parent_is_raw] = true if current_el && current_el.options[:parse_type] == :raw
-
-              @tree.children << el
-              if !md[4] && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
-                warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
-              elsif !md[4]
-                @unclosed_html_tags.push(el)
-                @stack.push(@tree)
-                stack.push(current_el)
-                @tree = current_el = el
-              end
-            elsif index_close_tag
-              md = line.match(HTML_TAG_CLOSE_RE)
-              line = md.post_match
-              add_html_text(md.pre_match, current_el) if current_el
-
-              if @unclosed_html_tags.size > 0 && md[1] == @unclosed_html_tags.last.value
-                el = @unclosed_html_tags.pop
-                @tree = @stack.pop
-                current_el.options[:compact] = true if stack.size > 0
-                current_el = stack.pop || (@tree.type == :html_element ? @tree : nil)
-              else
-                if !HTML_SPAN_ELEMENTS.include?(md[1]) && @tree.options[:parse_type] != :span
-                  warning("Found invalidly used HTML closing tag for '#{md[1]}'")
-                elsif current_el
-                  add_html_text(md.to_s, current_el)
-                end
-              end
-            else
-              if current_el
-                line.rstrip! if current_el.options[:parse_type] == :block
-                add_html_text(line + "\n", current_el)
-              else
-                add_text(line + "\n")
-              end
-              line = ''
-            end
-          end
-          if current_el && (current_el.options[:parse_type] == :span || current_el.options[:parse_type] == :raw)
-            result = @src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/)
-            last = current_el.children.last
-            result = "\n" + result if last.nil? || (last.type != :text && last.type != :raw) || last.value !~ /\n\Z/
-            add_html_text(result, current_el)
-          end
-          true
-        end
-      end
-      Registry.define_parser(:block, :block_html, HTML_BLOCK_START, self)
-
-      # Return the HTML parse type defined by the string +val+, i.e. raw when "0", default parsing
-      # (return value +nil+) when "1", span parsing when "span" and block parsing when "block". If
-      # +val+ is nil, then the default parsing mode is used.
-      def get_parse_type(val)
-        case val
-        when "0" then :raw
-        when "1" then :default
-        when "span" then :span
-        when "block" then :block
-        when NilClass then nil
-        else
-          warning("Invalid markdown attribute val '#{val}', using default")
-          nil
-        end
-      end
-
-      # Special version of #add_text which either creates a :text element or a :raw element,
-      # depending on the HTML element type.
-      def add_html_text(text, tree)
-        type = (tree.options[:parse_type] == :raw ? :raw : :text)
-        if tree.children.last && tree.children.last.type == type
-          tree.children.last.value << text
-        elsif !text.empty?
-          tree.children << Element.new(type, text)
-        end
-      end
-
-
-      ESCAPED_CHARS = /\\([\\.*_+-`()\[\]{}#!])/
-
-      # Parse the backslash-escaped character at the current location.
-      def parse_escaped_chars
-        @src.pos += @src.matched_size
-        add_text(@src[1])
-      end
-      Registry.define_parser(:span, :escaped_chars, ESCAPED_CHARS, self)
-
-
-      # Parse the HTML entity at the current location.
-      def parse_html_entity
-        @src.pos += @src.matched_size
-        @tree.children << Element.new(:entity, @src.matched)
-      end
-      Registry.define_parser(:span, :html_entity, REXML::Parsers::BaseParser::REFERENCE_RE, self)
-
-
-      LINE_BREAK = /(  |\\\\)(?=\n)/
-
-      # Parse the line break at the current location.
-      def parse_line_break
-        @src.pos += @src.matched_size
-        @tree.children << Element.new(:br)
-      end
-      Registry.define_parser(:span, :line_break, LINE_BREAK, self)
-
-
-      TYPOGRAPHIC_SYMS = [['---', :mdash], ['--', :ndash], ['...', :ellipsis],
-                          ['\\<<', '&lt;&lt;'], ['\\>>', '&gt;&gt;'],
-                          ['<< ', :laquo_space], [' >>', :raquo_space],
-                          ['<<', :laquo], ['>>', :raquo]]
-      TYPOGRAPHIC_SYMS_SUBST = Hash[*TYPOGRAPHIC_SYMS.flatten]
-      TYPOGRAPHIC_SYMS_RE = /#{TYPOGRAPHIC_SYMS.map {|k,v| Regexp.escape(k)}.join('|')}/
-
-      # Parse the typographic symbols at the current location.
-      def parse_typographic_syms
-        @src.pos += @src.matched_size
-        val = TYPOGRAPHIC_SYMS_SUBST[@src.matched]
-        if val.kind_of?(Symbol)
-          @tree.children << Element.new(:typographic_sym, val)
-        else
-          add_text(val.dup)
-        end
-      end
-      Registry.define_parser(:span, :typographic_syms, TYPOGRAPHIC_SYMS_RE, self)
-
-
-      AUTOLINK_START = /<((mailto|https?|ftps?):.*?|\S*?@\S*?)>/
-
-      # Parse the autolink at the current location.
-      def parse_autolink
-        @src.pos += @src.matched_size
-
-        text = href = @src[1]
-        if @src[2].nil? || @src[2] == 'mailto'
-          text = obfuscate_email(@src[2] ? @src[1].sub(/^mailto:/, '') : @src[1])
-          mailto = obfuscate_email('mailto')
-          href = "#{mailto}:#{text}"
-        end
-        el = Element.new(:a, nil, {:attr => {'href' => href}})
-        add_text(text, el)
-        @tree.children << el
-      end
-      Registry.define_parser(:span, :autolink, AUTOLINK_START, self)
-
-
-      CODESPAN_DELIMITER = /`+/
-
-      # Parse the codespan at the current scanner location.
-      def parse_codespan
-        result = @src.scan(CODESPAN_DELIMITER)
-        simple = (result.length == 1)
-        reset_pos = @src.pos
-
-        if simple && @src.pre_match =~ /\s\Z/ && @src.match?(/\s/)
-          add_text(result)
-          return
-        end
-
-        text = @src.scan_until(/#{result}/)
-        if text
-          text.sub!(/#{result}\Z/, '')
-          if !simple
-            text = text[1..-1] if text[0..0] == ' '
-            text = text[0..-2] if text[-1..-1] == ' '
-          end
-          @tree.children << Element.new(:codespan, text)
-        else
-          @src.pos = reset_pos
-          add_text(result)
-        end
-      end
-      Registry.define_parser(:span, :codespan, CODESPAN_DELIMITER, self)
-
-
-      IAL_SPAN_START = /\{:(#{ALD_ANY_CHARS}+)\}/
-
-      # Parse the inline attribute list at the current location.
-      def parse_span_ial
-        @src.pos += @src.matched_size
-        if @tree.children.last && @tree.children.last.type != :text
-          attr = {}
-          parse_attribute_list(@src[1], attr)
-          update_ial_with_ial(@tree.children.last.options[:ial] ||= {}, attr)
-          update_attr_with_ial(@tree.children.last.options[:attr] ||= {}, attr)
-        else
-          warning("Ignoring span IAL because preceding element is just text")
-          add_text(@src.matched)
-        end
-      end
-      Registry.define_parser(:span, :span_ial, IAL_SPAN_START, self)
-
-
-      FOOTNOTE_MARKER_START = /\[\^(#{ALD_ID_NAME})\]/
-
-      # Parse the footnote marker at the current location.
-      def parse_footnote_marker
-        @src.pos += @src.matched_size
-        fn_def = @doc.parse_infos[:footnotes][@src[1]]
-        if fn_def
-          valid = fn_def[:marker] && fn_def[:marker].options[:stack][0..-2].zip(fn_def[:marker].options[:stack][1..-1]).all? do |par, child|
-            par.children.include?(child)
-          end
-          if !fn_def[:marker] || !valid
-            fn_def[:marker] = Element.new(:footnote, nil, :name => @src[1])
-            fn_def[:marker].options[:stack] = [@stack, @tree, fn_def[:marker]].flatten.compact
-            @tree.children << fn_def[:marker]
-          else
-            warning("Footnote marker '#{@src[1]}' already appeared in document, ignoring newly found marker")
-            add_text(@src.matched)
-          end
-        else
-          warning("Footnote definition for '#{@src[1]}' not found")
-          add_text(@src.matched)
-        end
-      end
-      Registry.define_parser(:span, :footnote_marker, FOOTNOTE_MARKER_START, self)
-
-
-      EMPHASIS_START = /(?:\*\*?|__?)/
-
-      # Parse the emphasis at the current location.
-      def parse_emphasis
-        result = @src.scan(EMPHASIS_START)
-        element = (result.length == 2 ? :strong : :em)
-        type = (result =~ /_/ ? '_' : '*')
-        reset_pos = @src.pos
-
-        if (type == '_' && @src.pre_match =~ /[[:alpha:]]\Z/ && @src.check(/[[:alpha:]]/)) || @src.check(/\s/)
-          add_text(result)
-          return
-        end
-
-        sub_parse = lambda do |delim, elem|
-          el = Element.new(elem)
-          stop_re = /#{Regexp.escape(delim)}/
-          found = parse_spans(el, stop_re) do
-            (@src.string[@src.pos-1, 1] !~ /\s/) &&
-              (elem != :em || !@src.match?(/#{Regexp.escape(delim*2)}(?!#{Regexp.escape(delim)})/)) &&
-              (type != '_' || !@src.match?(/#{Regexp.escape(delim)}[[:alpha:]]/)) && el.children.size > 0
-          end
-          [found, el, stop_re]
-        end
-
-        found, el, stop_re = sub_parse.call(result, element)
-        if !found && element == :strong
-          @src.pos = reset_pos - 1
-          found, el, stop_re = sub_parse.call(type, :em)
-        end
-        if found
-          @src.scan(stop_re)
-          @tree.children << el
-        else
-          @src.pos = reset_pos
-          add_text(result)
-        end
-      end
-      Registry.define_parser(:span, :emphasis, EMPHASIS_START, self)
-
-
-      HTML_SPAN_START = /<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--)/
-
-      # Parse the HTML at the current position as span level HTML.
-      def parse_span_html
-        if result = @src.scan(HTML_COMMENT_RE)
-          @tree.children << Element.new(:html_raw, result, :type => :span)
-        elsif result = @src.scan(HTML_INSTRUCTION_RE)
-          @tree.children << Element.new(:html_raw, result, :type => :span)
-        elsif result = @src.scan(HTML_TAG_RE)
-          if HTML_BLOCK_ELEMENTS.include?(@src[1])
-            add_text(result)
-            return
-          end
-          reset_pos = @src.pos
-          attrs = {}
-          @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val.gsub(/\n+/, ' ')}
-
-          do_parsing = @doc.options[:parse_span_html]
-          if val = get_parse_type(attrs.delete('markdown'))
-            if val == :block
-              warning("Cannot use block level parsing in span level HTML tag - using default mode")
-            elsif val == :span || val == :default
-              do_parsing = true
-            elsif val == :raw
-              do_parsing = false
-            end
-          end
-          do_parsing = false if HTML_PARSE_AS_RAW.include?(@src[1])
-
-          el = Element.new(:html_element, @src[1], :attr => attrs, :type => :span)
-          stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
-          if @src[4]
-            @tree.children << el
-          elsif HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
-            warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
-            @tree.children << el
-          else
-            if parse_spans(el, stop_re)
-              end_pos = @src.pos
-              @src.scan(stop_re)
-              @tree.children << el
-              if !do_parsing
-                el.children.clear
-                el.children << Element.new(:raw, @src.string[reset_pos...end_pos])
-              end
-            else
-              @src.pos = reset_pos
-              add_text(result)
-            end
-          end
-        else
-          add_text(@src.scan(/./))
-        end
-      end
-      Registry.define_parser(:span, :span_html, HTML_SPAN_START, self)
-
-
-      LINK_TEXT_BRACKET_RE = /\\\[|\\\]|\[|\]/
-      LINK_INLINE_ID_RE = /\s*?\[(#{LINK_ID_CHARS}+)?\]/
-      LINK_INLINE_TITLE_RE = /\s*?(["'])(.+?)\1\s*?\)/
-
-      LINK_START = /!?\[(?=[^^])/
-
-      # Parse the link at the current scanner position. This method is used to parse normal links as
-      # well as image links.
-      def parse_link
-        result = @src.scan(LINK_START)
-        reset_pos = @src.pos
-
-        link_type = (result =~ /^!/ ? :img : :a)
-
-        # no nested links allowed
-        if link_type == :a && (@tree.type == :img || @tree.type == :a || @stack.any? {|t,s| t && (t.type == :img || t.type == :a)})
-          add_text(result)
-          return
-        end
-        el = Element.new(link_type)
-
-        stop_re = /\]|!?\[/
-        count = 1
-        found = parse_spans(el, stop_re) do
-          case @src.matched
-          when "[", "!["
-            count += 1
-          when "]"
-            count -= 1
-          end
-          count - el.children.select {|c| c.type == :img}.size == 0
-        end
-        if !found || el.children.empty?
-          @src.pos = reset_pos
-          add_text(result)
-          return
-        end
-        alt_text = @src.string[reset_pos...@src.pos]
-        conv_link_id = alt_text.gsub(/(\s|\n)+/m, ' ').gsub(LINK_ID_NON_CHARS, '').downcase
-        @src.scan(stop_re)
-
-        # reference style link or no link url
-        if @src.scan(LINK_INLINE_ID_RE) || !@src.check(/\(/)
-          link_id = (@src[1] || conv_link_id).downcase
-          if @doc.parse_infos[:link_defs].has_key?(link_id)
-            add_link(el, @doc.parse_infos[:link_defs][link_id].first, @doc.parse_infos[:link_defs][link_id].last, alt_text)
-          else
-            warning("No link definition for link ID '#{link_id}' found")
-            @src.pos = reset_pos
-            add_text(result)
-          end
-          return
-        end
-
-        # link url in parentheses
-        if @src.scan(/\(<(.*?)>/)
-          link_url = @src[1]
-          if @src.scan(/\)/)
-            add_link(el, link_url, nil, alt_text)
-            return
-          end
-        else
-          link_url = ''
-          re = /\(|\)|\s/
-          nr_of_brackets = 0
-          while temp = @src.scan_until(re)
-            link_url += temp
-            case @src.matched
-            when /\s/
-              break
-            when '('
-              nr_of_brackets += 1
-            when ')'
-              nr_of_brackets -= 1
-              break if nr_of_brackets == 0
-            end
-          end
-          link_url = link_url[1..-2]
-
-          if nr_of_brackets == 0
-            add_link(el, link_url, nil, alt_text)
-            return
-          end
-        end
-
-        if @src.scan(LINK_INLINE_TITLE_RE)
-          add_link(el, link_url, @src[2], alt_text)
-        else
-          @src.pos = reset_pos
-          add_text(result)
-        end
-      end
-      Registry.define_parser(:span, :link, LINK_START, self)
-
-
-      # This helper methods adds the approriate attributes to the element +el+ of type +a+ or +img+
-      # and the element itself to the <tt>@tree</tt>.
-      def add_link(el, href, title, alt_text = nil)
-        el.options[:attr] ||= {}
-        el.options[:attr]['title'] = title if title
-        if el.type == :a
-          el.options[:attr]['href'] = href
-        else
-          el.options[:attr]['src'] = href
-          el.options[:attr]['alt'] = alt_text
-          el.children.clear
-        end
-        @tree.children << el
-      end
-
-    end
+    autoload :Kramdown, 'kramdown/parser/kramdown'
 
   end
 
 end