parser.rb in kramdown-0.2.0

- old
+ new

@@ -82,14 +82,14 @@
       #######
       private
       #######
 
       BLOCK_PARSERS = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :atx_header,
-                       :setext_header, :horizontal_rule, :list, :link_definition, :block_html,
+                       :setext_header, :horizontal_rule, :list, :definition_list, :link_definition, :block_html,
                        :footnote_definition, :ald, :block_ial, :extension_block, :eob_marker, :paragraph]
       SPAN_PARSERS =  [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link,
-                       :span_ial, :html_entity, :typographic_syms, :special_html_chars, :line_break, :escaped_chars,]
+                       :span_ial, :html_entity, :typographic_syms, :line_break, :escaped_chars]
 
       # Adapt the object to allow parsing like specified in the options.
       def configure_parser
         @parsers = {}
         BLOCK_PARSERS.each do |name|
@@ -290,10 +290,11 @@
       EOB_MARKER = /^\^\s*?\n/
 
       # Parse the EOB marker at the current location.
       def parse_eob_marker
         @src.pos += @src.matched_size
+        @tree.children << Element.new(:eob)
         true
       end
       Registry.define_parser(:block, :eob_marker, EOB_MARKER, self)
 
 
@@ -310,42 +311,44 @@
         end
         true
       end
       Registry.define_parser(:block, :paragraph, PARAGRAPH_START, self)
 
+      HEADER_ID=/(?:[ \t]\{#((?:\w|\d)[\w\d-]*)\})?/
+      SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)#{HEADER_ID}[ \t]*?\n(-|=)+\s*?\n/
 
-      SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)\n(-|=)+\s*?\n/
-
       # Parse the Setext header at the current location.
       def parse_setext_header
         if @tree.children.last && @tree.children.last.type != :blank
           return false
         end
         @src.pos += @src.matched_size
-        text, level = @src[1].strip, @src[2]
+        text, id, level = @src[1].strip, @src[2], @src[3]
         el = Element.new(:header, nil, :level => (level == '-' ? 2 : 1))
         add_text(text, el)
-        el.options[:attr] = {:id => generate_id(text)} if @doc.options[:auto_ids]
+        el.options[:attr] = {'id' => id} if id
+        el.options[:attr] = {'id' => generate_id(text)} if @doc.options[:auto_ids] && !id
         @tree.children << el
         true
       end
       Registry.define_parser(:block, :setext_header, SETEXT_HEADER_START, self)
 
 
       ATX_HEADER_START = /^\#{1,6}/
-      ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*\s*?\n/
+      ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*#{HEADER_ID}\s*?\n/
 
       # Parse the Atx header at the current location.
       def parse_atx_header
         if @tree.children.last && @tree.children.last.type != :blank
           return false
         end
         result = @src.scan(ATX_HEADER_MATCH)
-        level, text = @src[1], @src[2].strip
+        level, text, id = @src[1], @src[2].strip, @src[3]
         el = Element.new(:header, nil, :level => level.length)
         add_text(text, el)
-        el.options[:attr] = {:id => generate_id(text)} if @doc.options[:auto_ids]
+        el.options[:attr] = {'id' => id} if id
+        el.options[:attr] = {'id' => generate_id(text)} if @doc.options[:auto_ids] && !id
         @tree.children << el
         true
       end
       Registry.define_parser(:block, :atx_header, ATX_HEADER_START, self)
 
@@ -396,11 +399,11 @@
         end
       end
       Registry.define_parser(:block, :codeblock_fenced, FENCED_CODEBLOCK_START, self)
 
 
-      HR_START = /^#{OPT_SPACE}(\*|-|_) *\1 *\1 *(\1| )*\n/
+      HR_START = /^#{OPT_SPACE}(\*|-|_)[ \t]*\1[ \t]*\1[ \t]*(\1|[ \t])*\n/
 
       # Parse the horizontal rule at the current location.
       def parse_horizontal_rule
         @src.pos += @src.matched_size
         @tree.children << Element.new(:hr)
@@ -429,27 +432,14 @@
         nested_list_found = false
         while !@src.eos?
           if @src.check(HR_START)
             break
           elsif @src.scan(list_start_re)
-            indentation, content = @src[1].length, @src[2]
             item = Element.new(:li)
+            item.value, indentation, content_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
             list.children << item
-            if content =~ /^\s*\n/
-              indentation = 4
-            else
-              while content =~ /^ *\t/
-                temp = content.scan(/^ */).first.length + indentation
-                content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
-              end
-              indentation += content.scan(/^ */).first.length
-            end
-            content.sub!(/^\s*/, '')
-            item.value = content
 
-            indent_re = /^ {#{indentation}}/
-            content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*?\n/
             list_start_re = (type == :ul ? /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/ :
                              /^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/)
             nested_list_found = false
           elsif result = @src.scan(content_re)
             result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
@@ -489,11 +479,11 @@
                                                 (item == list.children.last && item.children.length == 2 && !eob_found))
             text = item.children.shift.children.first
             text.value += "\n" if !item.children.empty? && item.children[0].type != :blank
             item.children.unshift(text)
           else
-            item.options[:first_as_block] = true
+            item.options[:first_is_block] = true
           end
 
           if item.children.last.type == :blank
             last = item.children.pop
           else
@@ -505,11 +495,115 @@
 
         true
       end
       Registry.define_parser(:block, :list, LIST_START, self)
 
+      def parse_first_list_line(indentation, content)
+        if content =~ /^\s*\n/
+          indentation = 4
+        else
+          while content =~ /^ *\t/
+            temp = content.scan(/^ */).first.length + indentation
+            content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
+          end
+          indentation += content.scan(/^ */).first.length
+        end
+        content.sub!(/^\s*/, '')
 
+        indent_re = /^ {#{indentation}}/
+        content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*?\n/
+        [content, indentation, content_re, indent_re]
+      end
+
+
+      DEFINITION_LIST_START = /^(#{OPT_SPACE}:)([\t| ].*?\n)/
+
+      # Parse the ordered or unordered list at the current location.
+      def parse_definition_list
+        children = @tree.children
+        if !children.last || (children.length == 1 && children.last.type != :p ) ||
+            (children.length >= 2 && children[-1].type != :p && (children[-1].type != :blank || children[-1].value != "\n" || children[-2].type != :p))
+          return false
+        end
+
+        first_as_para = false
+        deflist = Element.new(:dl)
+        para = @tree.children.pop
+        if para.type == :blank
+          para = @tree.children.pop
+          first_as_para = true
+        end
+        para.children.first.value.split("\n").each do |term|
+          el = Element.new(:dt)
+          el.children << Element.new(:text, term)
+          deflist.children << el
+        end
+
+        item = nil
+        indent_re = nil
+        content_re = nil
+        def_start_re = DEFINITION_LIST_START
+        while !@src.eos?
+          if @src.scan(def_start_re)
+            item = Element.new(:dd)
+            item.options[:first_as_para] = first_as_para
+            item.value, indentation, content_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
+            deflist.children << item
+
+            def_start_re = /^( {0,#{[3, indentation - 1].min}}:)([\t| ].*?\n)/
+            first_as_para = false
+          elsif result = @src.scan(content_re)
+            result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
+            result.sub!(indent_re, '')
+            item.value << result
+            first_as_para = false
+          elsif result = @src.scan(BLANK_LINE)
+            first_as_para = true
+            item.value << result
+          else
+            break
+          end
+        end
+
+        last = nil
+        deflist.children.each do |item|
+          next if item.type == :dt
+
+          parse_blocks(item, item.value)
+          item.value = nil
+          next if item.children.size == 0
+
+          if item.children.last.type == :blank
+            last = item.children.pop
+          else
+            last = nil
+          end
+          if item.children.first.type == :p && !item.options.delete(:first_as_para)
+            text = item.children.shift.children.first
+            text.value += "\n" if !item.children.empty?
+            item.children.unshift(text)
+          else
+            item.options[:first_is_block] = true
+          end
+        end
+
+        if @tree.children.length >= 1 && @tree.children.last.type == :dl
+          @tree.children[-1].children += deflist.children
+        elsif @tree.children.length >= 2 && @tree.children[-1].type == :blank && @tree.children[-2].type == :dl
+          @tree.children.pop
+          @tree.children[-1].children += deflist.children
+        else
+          @tree.children << deflist
+        end
+
+        @tree.children << last if !last.nil?
+
+        true
+      end
+      Registry.define_parser(:block, :definition_list, DEFINITION_LIST_START, self)
+
+
       PUNCTUATION_CHARS = "_.:,;!?-"
       LINK_ID_CHARS = /[a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
       LINK_ID_NON_CHARS = /[^a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
       LINK_DEFINITION_START = /^#{OPT_SPACE}\[(#{LINK_ID_CHARS}+)\]:[ \t]*(?:<(.*?)>|([^\s]+))[ \t]*?(?:\n?[ \t]*?(["'])(.+?)\4[ \t]*?)?\n/
 
@@ -610,24 +704,38 @@
       #:stopdoc:
       # The following regexps are based on the ones used by REXML, with some slight modifications.
       #:startdoc:
       HTML_COMMENT_RE = /<!--(.*?)-->/m
       HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
-      HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/
-      HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/
+      HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/m
+      HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/m
       HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::NAME_STR})\s*>/
 
 
-      HTML_PARSE_AS_BLOCK = %w{div blockquote table dl ol ul form fieldset}
-      HTML_PARSE_AS_SPAN  = %w{a address b dd dt em h1 h2 h3 h4 h5 h6 legend li p pre span td th}
-      HTML_PARSE_AS_RAW   = %w{script math}
-      HTML_PARSE_AS = Hash.new {|h,k| h[k] = :span}
+      HTML_PARSE_AS_BLOCK = %w{applet button blockquote colgroup dd div dl fieldset form iframe li
+                               map noscript object ol table tbody td th thead tfoot tr ul}
+      HTML_PARSE_AS_SPAN  = %w{a abbr acronym address b bdo big cite caption code del dfn dt em
+                               h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p pre q rb rbc
+                               rp rt rtc ruby samp select small span strong sub sup tt var}
+      HTML_PARSE_AS_RAW   = %w{script math option textarea}
+
+      HTML_PARSE_AS = Hash.new {|h,k| h[k] = :raw}
       HTML_PARSE_AS_BLOCK.each {|i| HTML_PARSE_AS[i] = :block}
       HTML_PARSE_AS_SPAN.each {|i| HTML_PARSE_AS[i] = :span}
       HTML_PARSE_AS_RAW.each {|i| HTML_PARSE_AS[i] = :raw}
 
-      HTML_BLOCK_ELEMENTS = %w[div p pre h1 h2 h3 h4 h5 h6 hr form fieldset iframe legend script dl ul ol table ins del blockquote address]
+      #:stopdoc:
+      # Some HTML elements like script belong to both categories (i.e. are valid in block and
+      # span HTML) and don't appear therefore!
+      #:startdoc:
+      HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
+                              ins kbd label option q rb rbc rp rt rtc ruby samp select small span
+                              strong sub sup textarea tt var}
+      HTML_BLOCK_ELEMENTS = %w{address applet button blockquote caption col colgroup dd div dl dt fieldset
+                               form h1 h2 h3 h4 h5 h6 hr iframe legend li map ol optgroup p pre table tbody
+                               td th thead tfoot tr ul}
+      HTML_ELEMENTS_WITHOUT_BODY = %w{area br col hr img input}
 
       HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
 
       # Parse the HTML at the current position as block level HTML.
       def parse_block_html
@@ -638,89 +746,129 @@
         elsif result = @src.scan(HTML_INSTRUCTION_RE)
           @tree.children << Element.new(:html_raw, result, :type => :block)
           @src.scan(/.*?\n/)
           true
         else
-          if !((@src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && (HTML_BLOCK_ELEMENTS.include?(@src[1]) || @src[1] =~ /:/)) ||
-               @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/))
-            return false
+          if (!@src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !@src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/)) ||
+              HTML_SPAN_ELEMENTS.include?(@src[1])
+            if @tree.type == :html_element && @tree.options[:parse_type] != :block
+              add_html_text(@src.scan(/.*?\n/), @tree)
+              add_html_text(@src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/), @tree)
+              return true
+            else
+              return false
+            end
           end
 
-          @src.scan(/^(.*?)\n/)
-          line = @src[1]
-          temp = nil
+          current_el = (@tree.type == :html_element ? @tree : nil)
+          @src.scan(/^(#{OPT_SPACE})(.*?)\n/)
+          if current_el && current_el.options[:parse_type] == :raw
+            add_html_text(@src[1], current_el)
+          end
+          line = @src[2]
           stack = []
 
           while line.size > 0
             index_start_tag, index_close_tag = line.index(HTML_TAG_RE), line.index(HTML_TAG_CLOSE_RE)
-            if index_start_tag && (!index_close_tag || index_start_tag < index_close_tag) && (!temp || temp.options[:parse_type] == :block)
+            if index_start_tag && (!index_close_tag || index_start_tag < index_close_tag)
               md = line.match(HTML_TAG_RE)
-              break if !(HTML_BLOCK_ELEMENTS.include?(md[1]) || md[1] =~ /:/)
-
-              add_text(md.pre_match + "\n", temp) if temp
               line = md.post_match
+              add_html_text(md.pre_match, current_el) if current_el
+              if HTML_SPAN_ELEMENTS.include?(md[1]) || (current_el && current_el.options[:parse_type] == :span)
+                add_html_text(md.to_s, current_el) if current_el
+                next
+              end
 
               attrs = {}
               md[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
-              el = Element.new(:html_element, md[1], :attr => attrs, :type => :block,
-                               :parse_type => HTML_PARSE_AS[md[1]])
 
-              (temp || @tree).children << el
-              if !md[4]
+              parse_type = if !current_el || current_el.options[:parse_type] != :raw
+                             (@doc.options[:parse_block_html] ? HTML_PARSE_AS[md[1]] : :raw)
+                           else
+                             :raw
+                           end
+              if val = get_parse_type(attrs.delete('markdown'))
+                parse_type = (val == :default ? HTML_PARSE_AS[md[1]] : val)
+              end
+              el = Element.new(:html_element, md[1], :attr => attrs, :type => :block, :parse_type => parse_type)
+              el.options[:no_start_indent] = true if !stack.empty?
+              el.options[:outer_element] = true if !current_el
+              el.options[:parent_is_raw] = true if current_el && current_el.options[:parse_type] == :raw
+
+              @tree.children << el
+              if !md[4] && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
+                warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
+              elsif !md[4]
                 @unclosed_html_tags.push(el)
-                stack << temp
-                temp = el
+                @stack.push(@tree)
+                stack.push(current_el)
+                @tree = current_el = el
               end
             elsif index_close_tag
               md = line.match(HTML_TAG_CLOSE_RE)
-              add_text(md.pre_match, temp) if temp
-
               line = md.post_match
+              add_html_text(md.pre_match, current_el) if current_el
+
               if @unclosed_html_tags.size > 0 && md[1] == @unclosed_html_tags.last.value
                 el = @unclosed_html_tags.pop
-                @tree = @stack.pop unless temp
-                temp = stack.pop
-                if el.options[:parse_type] == :raw
-                  raise Kramdown::Error, "Bug: please report!" if el.children.size > 1
-                  el.children.first.type = :raw if el.children.first
-                end
+                @tree = @stack.pop
+                current_el.options[:compact] = true if stack.size > 0
+                current_el = stack.pop || (@tree.type == :html_element ? @tree : nil)
               else
-                if HTML_BLOCK_ELEMENTS.include?(md[1]) && (temp || @tree).options[:parse_type] == :block
-                  warning("Found invalidly nested HTML closing tag for '#{md[1]}'")
+                if !HTML_SPAN_ELEMENTS.include?(md[1]) && @tree.options[:parse_type] != :span
+                  warning("Found invalidly used HTML closing tag for '#{md[1]}'")
+                elsif current_el
+                  add_html_text(md.to_s, current_el)
                 end
-                if temp
-                  add_text(md.to_s, temp)
-                else
-                  add_text(md.to_s + "\n")
-                end
               end
             else
-              if temp
-                add_text(line, temp)
+              if current_el
+                line.rstrip! if current_el.options[:parse_type] == :block
+                add_html_text(line + "\n", current_el)
               else
-                warning("Ignoring characters at the end of an HTML block line")
+                add_text(line + "\n")
               end
               line = ''
             end
           end
-          if temp && temp.children.last && temp.children.last.type == :text
-            temp.children.last.value << "\n"
+          if current_el && (current_el.options[:parse_type] == :span || current_el.options[:parse_type] == :raw)
+            result = @src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/)
+            last = current_el.children.last
+            result = "\n" + result if last.nil? || (last.type != :text && last.type != :raw) || last.value !~ /\n\Z/
+            add_html_text(result, current_el)
           end
-          if temp
-            if temp.options[:parse_type] == :span || temp.options[:parse_type] == :raw
-              result = @src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/)
-              add_text(result, temp)
-            end
-            @stack.push(@tree)
-            @tree = temp
-          end
           true
         end
       end
       Registry.define_parser(:block, :block_html, HTML_BLOCK_START, self)
 
+      # Return the HTML parse type defined by the string +val+, i.e. raw when "0", default parsing
+      # (return value +nil+) when "1", span parsing when "span" and block parsing when "block". If
+      # +val+ is nil, then the default parsing mode is used.
+      def get_parse_type(val)
+        case val
+        when "0" then :raw
+        when "1" then :default
+        when "span" then :span
+        when "block" then :block
+        when NilClass then nil
+        else
+          warning("Invalid markdown attribute val '#{val}', using default")
+          nil
+        end
+      end
 
+      # Special version of #add_text which either creates a :text element or a :raw element,
+      # depending on the HTML element type.
+      def add_html_text(text, tree)
+        type = (tree.options[:parse_type] == :raw ? :raw : :text)
+        if tree.children.last && tree.children.last.type == type
+          tree.children.last.value << text
+        elsif !text.empty?
+          tree.children << Element.new(type, text)
+        end
+      end
 
 
       ESCAPED_CHARS = /\\([\\.*_+-`()\[\]{}#!])/
 
       # Parse the backslash-escaped character at the current location.
@@ -732,51 +880,46 @@
 
 
       # Parse the HTML entity at the current location.
       def parse_html_entity
         @src.pos += @src.matched_size
-        add_text(@src.matched)
+        @tree.children << Element.new(:entity, @src.matched)
       end
       Registry.define_parser(:span, :html_entity, REXML::Parsers::BaseParser::REFERENCE_RE, self)
 
 
-      SPECIAL_HTML_CHARS = /&|>|</
-
-      # Parse the special HTML characters at the current location.
-      def parse_special_html_chars
-        @src.pos += @src.matched_size
-        add_text(@src.matched)
-      end
-      Registry.define_parser(:span, :special_html_chars, SPECIAL_HTML_CHARS, self)
-
-
       LINE_BREAK = /(  |\\\\)(?=\n)/
 
       # Parse the line break at the current location.
       def parse_line_break
         @src.pos += @src.matched_size
         @tree.children << Element.new(:br)
       end
       Registry.define_parser(:span, :line_break, LINE_BREAK, self)
 
 
-      TYPOGRAPHIC_SYMS = [['---', '&mdash;'], ['--', '&ndash;'], ['...', '&hellip;'],
+      TYPOGRAPHIC_SYMS = [['---', :mdash], ['--', :ndash], ['...', :ellipsis],
                           ['\\<<', '&lt;&lt;'], ['\\>>', '&gt;&gt;'],
-                          ['<< ', '&laquo;&nbsp;'], [' >>', '&nbsp;&raquo;'],
-                          ['<<', '&laquo;'], ['>>', '&raquo;']]
+                          ['<< ', :laquo_space], [' >>', :raquo_space],
+                          ['<<', :laquo], ['>>', :raquo]]
       TYPOGRAPHIC_SYMS_SUBST = Hash[*TYPOGRAPHIC_SYMS.flatten]
       TYPOGRAPHIC_SYMS_RE = /#{TYPOGRAPHIC_SYMS.map {|k,v| Regexp.escape(k)}.join('|')}/
 
       # Parse the typographic symbols at the current location.
       def parse_typographic_syms
         @src.pos += @src.matched_size
-        add_text(TYPOGRAPHIC_SYMS_SUBST[@src.matched].dup)
+        val = TYPOGRAPHIC_SYMS_SUBST[@src.matched]
+        if val.kind_of?(Symbol)
+          @tree.children << Element.new(:typographic_sym, val)
+        else
+          add_text(val.dup)
+        end
       end
       Registry.define_parser(:span, :typographic_syms, TYPOGRAPHIC_SYMS_RE, self)
 
 
-      AUTOLINK_START = /<((mailto|https?|ftps?):.*?|.*?@.*?)>/
+      AUTOLINK_START = /<((mailto|https?|ftps?):.*?|\S*?@\S*?)>/
 
       # Parse the autolink at the current location.
       def parse_autolink
         @src.pos += @src.matched_size
 
@@ -914,30 +1057,55 @@
         if result = @src.scan(HTML_COMMENT_RE)
           @tree.children << Element.new(:html_raw, result, :type => :span)
         elsif result = @src.scan(HTML_INSTRUCTION_RE)
           @tree.children << Element.new(:html_raw, result, :type => :span)
         elsif result = @src.scan(HTML_TAG_RE)
+          if HTML_BLOCK_ELEMENTS.include?(@src[1])
+            add_text(result)
+            return
+          end
           reset_pos = @src.pos
           attrs = {}
-          @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
+          @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val.gsub(/\n+/, ' ')}
+
+          do_parsing = @doc.options[:parse_span_html]
+          if val = get_parse_type(attrs.delete('markdown'))
+            if val == :block
+              warning("Cannot use block level parsing in span level HTML tag - using default mode")
+            elsif val == :span || val == :default
+              do_parsing = true
+            elsif val == :raw
+              do_parsing = false
+            end
+          end
+          do_parsing = false if HTML_PARSE_AS_RAW.include?(@src[1])
+
           el = Element.new(:html_element, @src[1], :attr => attrs, :type => :span)
+          stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
           if @src[4]
             @tree.children << el
+          elsif HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
+            warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
+            @tree.children << el
           else
-            stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
             if parse_spans(el, stop_re)
+              end_pos = @src.pos
               @src.scan(stop_re)
               @tree.children << el
+              if !do_parsing
+                el.children.clear
+                el.children << Element.new(:raw, @src.string[reset_pos...end_pos])
+              end
             else
               @src.pos = reset_pos
               add_text(result)
             end
           end
         else
           add_text(@src.scan(/./))
         end
       end
-      Registry.define_parser(:span, :span_html, HTML_BLOCK_START, self)
+      Registry.define_parser(:span, :span_html, HTML_SPAN_START, self)
 
 
       LINK_TEXT_BRACKET_RE = /\\\[|\\\]|\[|\]/
       LINK_INLINE_ID_RE = /\s*?\[(#{LINK_ID_CHARS}+)?\]/
       LINK_INLINE_TITLE_RE = /\s*?(["'])(.+?)\1\s*?\)/