require 'corefines' require 'nokogiri' using Corefines::Object::try module Asciidoctor::DocTest module HTML ## # Module to be included into +Nokogiri::HTML::Document+ # or +DocumentFragment+ to add {#normalize!} feature. # # @example # Nokogiri::HTML.parse(str).normalize! # Nokogiri::HTML.fragment(str).normalize! module Normalizer ## # Normalizes the HTML document or fragment so it can be easily compared # with another HTML. # # What does it actually do? # # * sorts element attributes by name # * sorts inline CSS declarations inside a +style+ attribute by name # * removes all blank text nodes (i.e. node that contain just whitespaces) # * strips nonsignificant leading and trailing whitespaces around text # * strips nonsignificant repeated whitespaces # # @return [Object] self # def normalize! traverse do |node| case node.type when Nokogiri::XML::Node::ELEMENT_NODE sort_element_attrs! node sort_element_style_attr! node when Nokogiri::XML::Node::TEXT_NODE # Remove text node that contains whitespaces only. if node.blank? node.remove elsif !preformatted_block? node strip_redundant_spaces! node strip_spaces_around_text! node end end end self end private # Sorts attributes of the element +node+ by name. def sort_element_attrs!(node) node.attributes.sort_by(&:first).each do |name, value| node.delete(name) node[name] = value end end # Sorts CSS declarations in style attribute of the element +node+ by name. def sort_element_style_attr!(node) return unless node.has_attribute? 'style' decls = node['style'].scan(/([\w-]+):\s*([^;]+);?/).sort_by(&:first) node['style'] = decls.map { |name, val| "#{name}: #{val};" }.join(' ') end # Note: muttable methods like +gsub!+ doesn't work on node content. # Strips repeated whitespaces in the text +node+. def strip_redundant_spaces!(node) node.content = node.content.gsub("\n", ' ').gsub(/(\s)+/, '\1') end # Strips nonsignificant leading and trailing whitespaces in the text +node+. def strip_spaces_around_text!(node) node.content = node.content.lstrip if text_block_boundary? node, :left node.content = node.content.rstrip if text_block_boundary? node, :right end ## # Returns +true+ if the text +node+ is the first (+:left+), or the last # (+:right+) inline element of the nearest block element ancestor or # direct sibling of +
+ element. # # @return [Boolean] # def text_block_boundary?(node, side) method = { left: :previous_sibling, right: :next_sibling }[side] return true if node.send(method).try(:name) == 'br' loop do if (sibling = node.send(method)) return false if sibling.text? || inline_element?(sibling) end node = node.parent return true unless inline_element? node end end HTML_INLINE_ELEMENTS = Nokogiri::HTML::ElementDescription::HTML_INLINE.flatten # @return [Boolean] true if the +node+ represents an inline HTML element. def inline_element?(node) node.element? && HTML_INLINE_ELEMENTS.include?(node.name) end # @return [Boolean] true if the +node+ is descendant of +
+ node.
      def preformatted_block?(node)
        node.path =~ %r{/pre/}
      end
    end
  end
end

[Nokogiri::HTML::Document, Nokogiri::HTML::DocumentFragment].each do |klass|
  klass.send :include, Asciidoctor::DocTest::HTML::Normalizer
end