lib/kramdown/parser.rb in kramdown-0.1.0 vs lib/kramdown/parser.rb in kramdown-0.2.0
- old
+ new
@@ -82,14 +82,14 @@
#######
private
#######
BLOCK_PARSERS = [:blank_line, :codeblock, :codeblock_fenced, :blockquote, :atx_header,
- :setext_header, :horizontal_rule, :list, :link_definition, :block_html,
+ :setext_header, :horizontal_rule, :list, :definition_list, :link_definition, :block_html,
:footnote_definition, :ald, :block_ial, :extension_block, :eob_marker, :paragraph]
SPAN_PARSERS = [:emphasis, :codespan, :autolink, :span_html, :footnote_marker, :link,
- :span_ial, :html_entity, :typographic_syms, :special_html_chars, :line_break, :escaped_chars,]
+ :span_ial, :html_entity, :typographic_syms, :line_break, :escaped_chars]
# Adapt the object to allow parsing like specified in the options.
def configure_parser
@parsers = {}
BLOCK_PARSERS.each do |name|
@@ -290,10 +290,11 @@
EOB_MARKER = /^\^\s*?\n/
# Parse the EOB marker at the current location.
def parse_eob_marker
@src.pos += @src.matched_size
+ @tree.children << Element.new(:eob)
true
end
Registry.define_parser(:block, :eob_marker, EOB_MARKER, self)
@@ -310,42 +311,44 @@
end
true
end
Registry.define_parser(:block, :paragraph, PARAGRAPH_START, self)
+ HEADER_ID=/(?:[ \t]\{#((?:\w|\d)[\w\d-]*)\})?/
+ SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)#{HEADER_ID}[ \t]*?\n(-|=)+\s*?\n/
- SETEXT_HEADER_START = /^(#{OPT_SPACE}[^ \t].*?)\n(-|=)+\s*?\n/
-
# Parse the Setext header at the current location.
def parse_setext_header
if @tree.children.last && @tree.children.last.type != :blank
return false
end
@src.pos += @src.matched_size
- text, level = @src[1].strip, @src[2]
+ text, id, level = @src[1].strip, @src[2], @src[3]
el = Element.new(:header, nil, :level => (level == '-' ? 2 : 1))
add_text(text, el)
- el.options[:attr] = {:id => generate_id(text)} if @doc.options[:auto_ids]
+ el.options[:attr] = {'id' => id} if id
+ el.options[:attr] = {'id' => generate_id(text)} if @doc.options[:auto_ids] && !id
@tree.children << el
true
end
Registry.define_parser(:block, :setext_header, SETEXT_HEADER_START, self)
ATX_HEADER_START = /^\#{1,6}/
- ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*\s*?\n/
+ ATX_HEADER_MATCH = /^(\#{1,6})(.+?)\s*?#*#{HEADER_ID}\s*?\n/
# Parse the Atx header at the current location.
def parse_atx_header
if @tree.children.last && @tree.children.last.type != :blank
return false
end
result = @src.scan(ATX_HEADER_MATCH)
- level, text = @src[1], @src[2].strip
+ level, text, id = @src[1], @src[2].strip, @src[3]
el = Element.new(:header, nil, :level => level.length)
add_text(text, el)
- el.options[:attr] = {:id => generate_id(text)} if @doc.options[:auto_ids]
+ el.options[:attr] = {'id' => id} if id
+ el.options[:attr] = {'id' => generate_id(text)} if @doc.options[:auto_ids] && !id
@tree.children << el
true
end
Registry.define_parser(:block, :atx_header, ATX_HEADER_START, self)
@@ -396,11 +399,11 @@
end
end
Registry.define_parser(:block, :codeblock_fenced, FENCED_CODEBLOCK_START, self)
- HR_START = /^#{OPT_SPACE}(\*|-|_) *\1 *\1 *(\1| )*\n/
+ HR_START = /^#{OPT_SPACE}(\*|-|_)[ \t]*\1[ \t]*\1[ \t]*(\1|[ \t])*\n/
# Parse the horizontal rule at the current location.
def parse_horizontal_rule
@src.pos += @src.matched_size
@tree.children << Element.new(:hr)
@@ -429,27 +432,14 @@
nested_list_found = false
while !@src.eos?
if @src.check(HR_START)
break
elsif @src.scan(list_start_re)
- indentation, content = @src[1].length, @src[2]
item = Element.new(:li)
+ item.value, indentation, content_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
list.children << item
- if content =~ /^\s*\n/
- indentation = 4
- else
- while content =~ /^ *\t/
- temp = content.scan(/^ */).first.length + indentation
- content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
- end
- indentation += content.scan(/^ */).first.length
- end
- content.sub!(/^\s*/, '')
- item.value = content
- indent_re = /^ {#{indentation}}/
- content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*?\n/
list_start_re = (type == :ul ? /^( {0,#{[3, indentation - 1].min}}[+*-])([\t| ].*?\n)/ :
/^( {0,#{[3, indentation - 1].min}}\d+\.)([\t| ].*?\n)/)
nested_list_found = false
elsif result = @src.scan(content_re)
result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
@@ -489,11 +479,11 @@
(item == list.children.last && item.children.length == 2 && !eob_found))
text = item.children.shift.children.first
text.value += "\n" if !item.children.empty? && item.children[0].type != :blank
item.children.unshift(text)
else
- item.options[:first_as_block] = true
+ item.options[:first_is_block] = true
end
if item.children.last.type == :blank
last = item.children.pop
else
@@ -505,11 +495,115 @@
true
end
Registry.define_parser(:block, :list, LIST_START, self)
+ def parse_first_list_line(indentation, content)
+ if content =~ /^\s*\n/
+ indentation = 4
+ else
+ while content =~ /^ *\t/
+ temp = content.scan(/^ */).first.length + indentation
+ content.sub!(/^( *)(\t+)/) {$1 + " "*(4 - (temp % 4)) + " "*($2.length - 1)*4}
+ end
+ indentation += content.scan(/^ */).first.length
+ end
+ content.sub!(/^\s*/, '')
+ indent_re = /^ {#{indentation}}/
+ content_re = /^(?:(?:\t| {4}){#{indentation / 4}} {#{indentation % 4}}|(?:\t| {4}){#{indentation / 4 + 1}}).*?\n/
+ [content, indentation, content_re, indent_re]
+ end
+
+
+ DEFINITION_LIST_START = /^(#{OPT_SPACE}:)([\t| ].*?\n)/
+
+ # Parse the ordered or unordered list at the current location.
+ def parse_definition_list
+ children = @tree.children
+ if !children.last || (children.length == 1 && children.last.type != :p ) ||
+ (children.length >= 2 && children[-1].type != :p && (children[-1].type != :blank || children[-1].value != "\n" || children[-2].type != :p))
+ return false
+ end
+
+ first_as_para = false
+ deflist = Element.new(:dl)
+ para = @tree.children.pop
+ if para.type == :blank
+ para = @tree.children.pop
+ first_as_para = true
+ end
+ para.children.first.value.split("\n").each do |term|
+ el = Element.new(:dt)
+ el.children << Element.new(:text, term)
+ deflist.children << el
+ end
+
+ item = nil
+ indent_re = nil
+ content_re = nil
+ def_start_re = DEFINITION_LIST_START
+ while !@src.eos?
+ if @src.scan(def_start_re)
+ item = Element.new(:dd)
+ item.options[:first_as_para] = first_as_para
+ item.value, indentation, content_re, indent_re = parse_first_list_line(@src[1].length, @src[2])
+ deflist.children << item
+
+ def_start_re = /^( {0,#{[3, indentation - 1].min}}:)([\t| ].*?\n)/
+ first_as_para = false
+ elsif result = @src.scan(content_re)
+ result.sub!(/^(\t+)/) { " "*4*($1 ? $1.length : 0) }
+ result.sub!(indent_re, '')
+ item.value << result
+ first_as_para = false
+ elsif result = @src.scan(BLANK_LINE)
+ first_as_para = true
+ item.value << result
+ else
+ break
+ end
+ end
+
+ last = nil
+ deflist.children.each do |item|
+ next if item.type == :dt
+
+ parse_blocks(item, item.value)
+ item.value = nil
+ next if item.children.size == 0
+
+ if item.children.last.type == :blank
+ last = item.children.pop
+ else
+ last = nil
+ end
+ if item.children.first.type == :p && !item.options.delete(:first_as_para)
+ text = item.children.shift.children.first
+ text.value += "\n" if !item.children.empty?
+ item.children.unshift(text)
+ else
+ item.options[:first_is_block] = true
+ end
+ end
+
+ if @tree.children.length >= 1 && @tree.children.last.type == :dl
+ @tree.children[-1].children += deflist.children
+ elsif @tree.children.length >= 2 && @tree.children[-1].type == :blank && @tree.children[-2].type == :dl
+ @tree.children.pop
+ @tree.children[-1].children += deflist.children
+ else
+ @tree.children << deflist
+ end
+
+ @tree.children << last if !last.nil?
+
+ true
+ end
+ Registry.define_parser(:block, :definition_list, DEFINITION_LIST_START, self)
+
+
PUNCTUATION_CHARS = "_.:,;!?-"
LINK_ID_CHARS = /[a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
LINK_ID_NON_CHARS = /[^a-zA-Z0-9 #{PUNCTUATION_CHARS}]/
LINK_DEFINITION_START = /^#{OPT_SPACE}\[(#{LINK_ID_CHARS}+)\]:[ \t]*(?:<(.*?)>|([^\s]+))[ \t]*?(?:\n?[ \t]*?(["'])(.+?)\4[ \t]*?)?\n/
@@ -610,24 +704,38 @@
#:stopdoc:
# The following regexps are based on the ones used by REXML, with some slight modifications.
#:startdoc:
HTML_COMMENT_RE = /<!--(.*?)-->/m
HTML_INSTRUCTION_RE = /<\?(.*?)\?>/m
- HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/
- HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/
+ HTML_ATTRIBUTE_RE = /\s*(#{REXML::Parsers::BaseParser::UNAME_STR})\s*=\s*(["'])(.*?)\2/m
+ HTML_TAG_RE = /<((?>#{REXML::Parsers::BaseParser::UNAME_STR}))\s*((?>\s+#{REXML::Parsers::BaseParser::UNAME_STR}\s*=\s*(["']).*?\3)*)\s*(\/)?>/m
HTML_TAG_CLOSE_RE = /<\/(#{REXML::Parsers::BaseParser::NAME_STR})\s*>/
- HTML_PARSE_AS_BLOCK = %w{div blockquote table dl ol ul form fieldset}
- HTML_PARSE_AS_SPAN = %w{a address b dd dt em h1 h2 h3 h4 h5 h6 legend li p pre span td th}
- HTML_PARSE_AS_RAW = %w{script math}
- HTML_PARSE_AS = Hash.new {|h,k| h[k] = :span}
+ HTML_PARSE_AS_BLOCK = %w{applet button blockquote colgroup dd div dl fieldset form iframe li
+ map noscript object ol table tbody td th thead tfoot tr ul}
+ HTML_PARSE_AS_SPAN = %w{a abbr acronym address b bdo big cite caption code del dfn dt em
+ h1 h2 h3 h4 h5 h6 i ins kbd label legend optgroup p pre q rb rbc
+ rp rt rtc ruby samp select small span strong sub sup tt var}
+ HTML_PARSE_AS_RAW = %w{script math option textarea}
+
+ HTML_PARSE_AS = Hash.new {|h,k| h[k] = :raw}
HTML_PARSE_AS_BLOCK.each {|i| HTML_PARSE_AS[i] = :block}
HTML_PARSE_AS_SPAN.each {|i| HTML_PARSE_AS[i] = :span}
HTML_PARSE_AS_RAW.each {|i| HTML_PARSE_AS[i] = :raw}
- HTML_BLOCK_ELEMENTS = %w[div p pre h1 h2 h3 h4 h5 h6 hr form fieldset iframe legend script dl ul ol table ins del blockquote address]
+ #:stopdoc:
+ # Some HTML elements like script belong to both categories (i.e. are valid in block and
+ # span HTML) and don't appear therefore!
+ #:startdoc:
+ HTML_SPAN_ELEMENTS = %w{a abbr acronym b big bdo br button cite code del dfn em i img input
+ ins kbd label option q rb rbc rp rt rtc ruby samp select small span
+ strong sub sup textarea tt var}
+ HTML_BLOCK_ELEMENTS = %w{address applet button blockquote caption col colgroup dd div dl dt fieldset
+ form h1 h2 h3 h4 h5 h6 hr iframe legend li map ol optgroup p pre table tbody
+ td th thead tfoot tr ul}
+ HTML_ELEMENTS_WITHOUT_BODY = %w{area br col hr img input}
HTML_BLOCK_START = /^#{OPT_SPACE}<(#{REXML::Parsers::BaseParser::UNAME_STR}|\?|!--|\/)/
# Parse the HTML at the current position as block level HTML.
def parse_block_html
@@ -638,89 +746,129 @@
elsif result = @src.scan(HTML_INSTRUCTION_RE)
@tree.children << Element.new(:html_raw, result, :type => :block)
@src.scan(/.*?\n/)
true
else
- if !((@src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && (HTML_BLOCK_ELEMENTS.include?(@src[1]) || @src[1] =~ /:/)) ||
- @src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/))
- return false
+ if (!@src.check(/^#{OPT_SPACE}#{HTML_TAG_RE}/) && !@src.check(/^#{OPT_SPACE}#{HTML_TAG_CLOSE_RE}/)) ||
+ HTML_SPAN_ELEMENTS.include?(@src[1])
+ if @tree.type == :html_element && @tree.options[:parse_type] != :block
+ add_html_text(@src.scan(/.*?\n/), @tree)
+ add_html_text(@src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/), @tree)
+ return true
+ else
+ return false
+ end
end
- @src.scan(/^(.*?)\n/)
- line = @src[1]
- temp = nil
+ current_el = (@tree.type == :html_element ? @tree : nil)
+ @src.scan(/^(#{OPT_SPACE})(.*?)\n/)
+ if current_el && current_el.options[:parse_type] == :raw
+ add_html_text(@src[1], current_el)
+ end
+ line = @src[2]
stack = []
while line.size > 0
index_start_tag, index_close_tag = line.index(HTML_TAG_RE), line.index(HTML_TAG_CLOSE_RE)
- if index_start_tag && (!index_close_tag || index_start_tag < index_close_tag) && (!temp || temp.options[:parse_type] == :block)
+ if index_start_tag && (!index_close_tag || index_start_tag < index_close_tag)
md = line.match(HTML_TAG_RE)
- break if !(HTML_BLOCK_ELEMENTS.include?(md[1]) || md[1] =~ /:/)
-
- add_text(md.pre_match + "\n", temp) if temp
line = md.post_match
+ add_html_text(md.pre_match, current_el) if current_el
+ if HTML_SPAN_ELEMENTS.include?(md[1]) || (current_el && current_el.options[:parse_type] == :span)
+ add_html_text(md.to_s, current_el) if current_el
+ next
+ end
attrs = {}
md[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
- el = Element.new(:html_element, md[1], :attr => attrs, :type => :block,
- :parse_type => HTML_PARSE_AS[md[1]])
- (temp || @tree).children << el
- if !md[4]
+ parse_type = if !current_el || current_el.options[:parse_type] != :raw
+ (@doc.options[:parse_block_html] ? HTML_PARSE_AS[md[1]] : :raw)
+ else
+ :raw
+ end
+ if val = get_parse_type(attrs.delete('markdown'))
+ parse_type = (val == :default ? HTML_PARSE_AS[md[1]] : val)
+ end
+ el = Element.new(:html_element, md[1], :attr => attrs, :type => :block, :parse_type => parse_type)
+ el.options[:no_start_indent] = true if !stack.empty?
+ el.options[:outer_element] = true if !current_el
+ el.options[:parent_is_raw] = true if current_el && current_el.options[:parse_type] == :raw
+
+ @tree.children << el
+ if !md[4] && HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
+ warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
+ elsif !md[4]
@unclosed_html_tags.push(el)
- stack << temp
- temp = el
+ @stack.push(@tree)
+ stack.push(current_el)
+ @tree = current_el = el
end
elsif index_close_tag
md = line.match(HTML_TAG_CLOSE_RE)
- add_text(md.pre_match, temp) if temp
-
line = md.post_match
+ add_html_text(md.pre_match, current_el) if current_el
+
if @unclosed_html_tags.size > 0 && md[1] == @unclosed_html_tags.last.value
el = @unclosed_html_tags.pop
- @tree = @stack.pop unless temp
- temp = stack.pop
- if el.options[:parse_type] == :raw
- raise Kramdown::Error, "Bug: please report!" if el.children.size > 1
- el.children.first.type = :raw if el.children.first
- end
+ @tree = @stack.pop
+ current_el.options[:compact] = true if stack.size > 0
+ current_el = stack.pop || (@tree.type == :html_element ? @tree : nil)
else
- if HTML_BLOCK_ELEMENTS.include?(md[1]) && (temp || @tree).options[:parse_type] == :block
- warning("Found invalidly nested HTML closing tag for '#{md[1]}'")
+ if !HTML_SPAN_ELEMENTS.include?(md[1]) && @tree.options[:parse_type] != :span
+ warning("Found invalidly used HTML closing tag for '#{md[1]}'")
+ elsif current_el
+ add_html_text(md.to_s, current_el)
end
- if temp
- add_text(md.to_s, temp)
- else
- add_text(md.to_s + "\n")
- end
end
else
- if temp
- add_text(line, temp)
+ if current_el
+ line.rstrip! if current_el.options[:parse_type] == :block
+ add_html_text(line + "\n", current_el)
else
- warning("Ignoring characters at the end of an HTML block line")
+ add_text(line + "\n")
end
line = ''
end
end
- if temp && temp.children.last && temp.children.last.type == :text
- temp.children.last.value << "\n"
+ if current_el && (current_el.options[:parse_type] == :span || current_el.options[:parse_type] == :raw)
+ result = @src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/)
+ last = current_el.children.last
+ result = "\n" + result if last.nil? || (last.type != :text && last.type != :raw) || last.value !~ /\n\Z/
+ add_html_text(result, current_el)
end
- if temp
- if temp.options[:parse_type] == :span || temp.options[:parse_type] == :raw
- result = @src.scan_until(/(?=#{HTML_BLOCK_START})|\Z/)
- add_text(result, temp)
- end
- @stack.push(@tree)
- @tree = temp
- end
true
end
end
Registry.define_parser(:block, :block_html, HTML_BLOCK_START, self)
+ # Return the HTML parse type defined by the string +val+, i.e. raw when "0", default parsing
+ # (return value +nil+) when "1", span parsing when "span" and block parsing when "block". If
+ # +val+ is nil, then the default parsing mode is used.
+ def get_parse_type(val)
+ case val
+ when "0" then :raw
+ when "1" then :default
+ when "span" then :span
+ when "block" then :block
+ when NilClass then nil
+ else
+ warning("Invalid markdown attribute val '#{val}', using default")
+ nil
+ end
+ end
+ # Special version of #add_text which either creates a :text element or a :raw element,
+ # depending on the HTML element type.
+ def add_html_text(text, tree)
+ type = (tree.options[:parse_type] == :raw ? :raw : :text)
+ if tree.children.last && tree.children.last.type == type
+ tree.children.last.value << text
+ elsif !text.empty?
+ tree.children << Element.new(type, text)
+ end
+ end
ESCAPED_CHARS = /\\([\\.*_+-`()\[\]{}#!])/
# Parse the backslash-escaped character at the current location.
@@ -732,51 +880,46 @@
# Parse the HTML entity at the current location.
def parse_html_entity
@src.pos += @src.matched_size
- add_text(@src.matched)
+ @tree.children << Element.new(:entity, @src.matched)
end
Registry.define_parser(:span, :html_entity, REXML::Parsers::BaseParser::REFERENCE_RE, self)
- SPECIAL_HTML_CHARS = /&|>|</
-
- # Parse the special HTML characters at the current location.
- def parse_special_html_chars
- @src.pos += @src.matched_size
- add_text(@src.matched)
- end
- Registry.define_parser(:span, :special_html_chars, SPECIAL_HTML_CHARS, self)
-
-
LINE_BREAK = /( |\\\\)(?=\n)/
# Parse the line break at the current location.
def parse_line_break
@src.pos += @src.matched_size
@tree.children << Element.new(:br)
end
Registry.define_parser(:span, :line_break, LINE_BREAK, self)
- TYPOGRAPHIC_SYMS = [['---', '—'], ['--', '–'], ['...', '…'],
+ TYPOGRAPHIC_SYMS = [['---', :mdash], ['--', :ndash], ['...', :ellipsis],
['\\<<', '<<'], ['\\>>', '>>'],
- ['<< ', '« '], [' >>', ' »'],
- ['<<', '«'], ['>>', '»']]
+ ['<< ', :laquo_space], [' >>', :raquo_space],
+ ['<<', :laquo], ['>>', :raquo]]
TYPOGRAPHIC_SYMS_SUBST = Hash[*TYPOGRAPHIC_SYMS.flatten]
TYPOGRAPHIC_SYMS_RE = /#{TYPOGRAPHIC_SYMS.map {|k,v| Regexp.escape(k)}.join('|')}/
# Parse the typographic symbols at the current location.
def parse_typographic_syms
@src.pos += @src.matched_size
- add_text(TYPOGRAPHIC_SYMS_SUBST[@src.matched].dup)
+ val = TYPOGRAPHIC_SYMS_SUBST[@src.matched]
+ if val.kind_of?(Symbol)
+ @tree.children << Element.new(:typographic_sym, val)
+ else
+ add_text(val.dup)
+ end
end
Registry.define_parser(:span, :typographic_syms, TYPOGRAPHIC_SYMS_RE, self)
- AUTOLINK_START = /<((mailto|https?|ftps?):.*?|.*?@.*?)>/
+ AUTOLINK_START = /<((mailto|https?|ftps?):.*?|\S*?@\S*?)>/
# Parse the autolink at the current location.
def parse_autolink
@src.pos += @src.matched_size
@@ -914,30 +1057,55 @@
if result = @src.scan(HTML_COMMENT_RE)
@tree.children << Element.new(:html_raw, result, :type => :span)
elsif result = @src.scan(HTML_INSTRUCTION_RE)
@tree.children << Element.new(:html_raw, result, :type => :span)
elsif result = @src.scan(HTML_TAG_RE)
+ if HTML_BLOCK_ELEMENTS.include?(@src[1])
+ add_text(result)
+ return
+ end
reset_pos = @src.pos
attrs = {}
- @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val}
+ @src[2].scan(HTML_ATTRIBUTE_RE).each {|name,sep,val| attrs[name] = val.gsub(/\n+/, ' ')}
+
+ do_parsing = @doc.options[:parse_span_html]
+ if val = get_parse_type(attrs.delete('markdown'))
+ if val == :block
+ warning("Cannot use block level parsing in span level HTML tag - using default mode")
+ elsif val == :span || val == :default
+ do_parsing = true
+ elsif val == :raw
+ do_parsing = false
+ end
+ end
+ do_parsing = false if HTML_PARSE_AS_RAW.include?(@src[1])
+
el = Element.new(:html_element, @src[1], :attr => attrs, :type => :span)
+ stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
if @src[4]
@tree.children << el
+ elsif HTML_ELEMENTS_WITHOUT_BODY.include?(el.value)
+ warning("The HTML tag '#{el.value}' cannot have any content - auto-closing it")
+ @tree.children << el
else
- stop_re = /<\/#{Regexp.escape(@src[1])}\s*>/
if parse_spans(el, stop_re)
+ end_pos = @src.pos
@src.scan(stop_re)
@tree.children << el
+ if !do_parsing
+ el.children.clear
+ el.children << Element.new(:raw, @src.string[reset_pos...end_pos])
+ end
else
@src.pos = reset_pos
add_text(result)
end
end
else
add_text(@src.scan(/./))
end
end
- Registry.define_parser(:span, :span_html, HTML_BLOCK_START, self)
+ Registry.define_parser(:span, :span_html, HTML_SPAN_START, self)
LINK_TEXT_BRACKET_RE = /\\\[|\\\]|\[|\]/
LINK_INLINE_ID_RE = /\s*?\[(#{LINK_ID_CHARS}+)?\]/
LINK_INLINE_TITLE_RE = /\s*?(["'])(.+?)\1\s*?\)/