require 'corefines'
require 'nokogiri'
using Corefines::Object::try
module Asciidoctor::DocTest
module HTML
##
# Module to be included into +Nokogiri::HTML::Document+
# or +DocumentFragment+ to add {#normalize!} feature.
#
# @example
# Nokogiri::HTML.parse(str).normalize!
# Nokogiri::HTML.fragment(str).normalize!
module Normalizer
##
# Normalizes the HTML document or fragment so it can be easily compared
# with another HTML.
#
# What does it actually do?
#
# * sorts element attributes by name
# * sorts inline CSS declarations inside a +style+ attribute by name
# * removes all blank text nodes (i.e. node that contain just whitespaces)
# * strips nonsignificant leading and trailing whitespaces around text
# * strips nonsignificant repeated whitespaces
#
# @return [Object] self
#
def normalize!
traverse do |node|
case node.type
when Nokogiri::XML::Node::ELEMENT_NODE
sort_element_attrs! node
sort_element_style_attr! node
when Nokogiri::XML::Node::TEXT_NODE
# Remove text node that contains whitespaces only.
if node.blank?
node.remove
elsif !preformatted_block? node
strip_redundant_spaces! node
strip_spaces_around_text! node
end
end
end
self
end
private
# Sorts attributes of the element +node+ by name.
def sort_element_attrs!(node)
node.attributes.sort_by(&:first).each do |name, value|
node.delete(name)
node[name] = value
end
end
# Sorts CSS declarations in style attribute of the element +node+ by name.
def sort_element_style_attr!(node)
return unless node.has_attribute? 'style'
decls = node['style'].scan(/([\w-]+):\s*([^;]+);?/).sort_by(&:first)
node['style'] = decls.map { |name, val| "#{name}: #{val};" }.join(' ')
end
# Note: muttable methods like +gsub!+ doesn't work on node content.
# Strips repeated whitespaces in the text +node+.
def strip_redundant_spaces!(node)
node.content = node.content.gsub("\n", ' ').gsub(/(\s)+/, '\1')
end
# Strips nonsignificant leading and trailing whitespaces in the text +node+.
def strip_spaces_around_text!(node)
node.content = node.content.lstrip if text_block_boundary? node, :left
node.content = node.content.rstrip if text_block_boundary? node, :right
end
##
# Returns +true+ if the text +node+ is the first (+:left+), or the last
# (+:right+) inline element of the nearest block element ancestor or
# direct sibling of +
+ element.
#
# @return [Boolean]
#
def text_block_boundary?(node, side)
method = { left: :previous_sibling, right: :next_sibling }[side]
return true if node.send(method).try(:name) == 'br'
loop do
if (sibling = node.send(method))
return false if sibling.text? || inline_element?(sibling)
end
node = node.parent
return true unless inline_element? node
end
end
HTML_INLINE_ELEMENTS = Nokogiri::HTML::ElementDescription::HTML_INLINE.flatten
# @return [Boolean] true if the +node+ represents an inline HTML element.
def inline_element?(node)
node.element? && HTML_INLINE_ELEMENTS.include?(node.name)
end
# @return [Boolean] true if the +node+ is descendant of +
+ node. def preformatted_block?(node) node.path =~ %r{/pre/} end end end end [Nokogiri::HTML::Document, Nokogiri::HTML::DocumentFragment].each do |klass| klass.send :include, Asciidoctor::DocTest::HTML::Normalizer end