converter.rb in asciidoctor-doctest-2.0.0.beta.4

- old
+ new

@@ -1,5 +1,6 @@
+# frozen_string_literal: false
 require 'asciidoctor/doctest/html_normalizer'
 require 'corefines'
 require 'htmlbeautifier'
 require 'nokogiri'
 
@@ -13,34 +14,45 @@
         @paragraph_xpath = paragraph_xpath
         super opts
       end
 
       def convert_examples(input_exmpl, output_exmpl)
-        opts = output_exmpl.opts.dup
+        opts = input_exmpl.opts.merge(output_exmpl.opts)
 
         # The header & footer are excluded by default; always enable for document examples.
         opts[:header_footer] ||= input_exmpl.name.start_with?('document')
 
         # When asserting inline examples, defaults to ignore paragraph "wrapper".
         opts[:include] ||= (@paragraph_xpath if input_exmpl.name.start_with? 'inline_')
 
         actual = convert(input_exmpl.content, header_footer: opts[:header_footer])
-          .then { |s| parse_html s, !opts[:header_footer] }
+          .then { |s| parse_html s }
           .then { |h| find_nodes h, opts[:include] }
           .then { |h| remove_nodes h, opts[:exclude] }
-          .then { |h| normalize(h) }
+          .then { |h| normalize h }
 
         expected = normalize(output_exmpl.content)
 
         [actual, expected]
       end
 
       protected
 
       def normalize(content)
         content = parse_html(content) if content.is_a? String
-        HtmlBeautifier.beautify(content.normalize!)
+
+        has_content_type = !!meta_content_type(content)
+        result = HtmlBeautifier.beautify(content.normalize!)
+
+        # XXX: Nokogiri injects meta tag with Content-Type into rendered HTML
+        # document. This nasty hack removes that tag from the result if not
+        # present in the original HTML.
+        if !has_content_type && content.is_a?(Nokogiri::HTML::Document)
+          result.sub!(/^\s*<meta http-equiv="Content-Type" content="[^"]+"\s*\/?>\n/i, '')
+        end
+
+        result
       end
 
       def find_nodes(html, xpaths)
         Array(xpaths).reduce(html) do |htm, xpath|
           # XPath returns NodeSet, but we need DocumentFragment, so convert it again.
@@ -54,11 +66,29 @@
         Array(xpaths).each_with_object(html.clone) do |xpath, htm|
           htm.xpath(xpath).remove
         end
       end
 
-      def parse_html(str, fragment = true)
-        fragment ? ::Nokogiri::HTML.fragment(str) : ::Nokogiri::HTML.parse(str)
+      def parse_html(str)
+        if str =~ /^\s*<!DOCTYPE\s/
+          ::Nokogiri::HTML.parse(str)
+        else
+          ::Nokogiri::HTML.fragment(str)
+        end
+      end
+
+      private
+
+      ##
+      # Searches <tt><meta http-equiv="Content-Type" content="..."></tt>
+      # element in the given HTML document.
+      #
+      # @param html [Nokogiri::HTML::Document, Nokogiri::HTML::DocumentFragment]
+      # @return [Nokogiri::XML::Element, nil]
+      def meta_content_type(html)
+        html.xpath('//meta[@http-equiv and boolean(@content)]').find do |node|
+          node['http-equiv'] =~ /\AContent-Type\z/i
+        end
       end
     end
   end
 end