lib/compare-xml.rb in compare-xml-0.5.2 vs lib/compare-xml.rb in compare-xml-0.6
- old
+ new
@@ -3,153 +3,161 @@
module CompareXML
# default options used by the module; all of these can be overridden
DEFAULTS_OPTS = {
+ # when true, trims and collapses whitespace in text nodes and comments to a single space
+ # when false, all whitespace is preserved as it is without any changes
+ collapse_whitespace: true,
+
# when true, attribute order is not important (all attributes are sorted before comparison)
# when false, attributes are compared in order and comparison stops on the first mismatch
ignore_attr_order: true,
+ # contains an array of user specified strings that is used to ignore any attributes
+ # whose content contains a string from this array (e.g. "good automobile" contains "mobile")
+ ignore_attr_content: [],
+
# contains an array of user-specified CSS rules used to perform attribute exclusions
# for this to work, a CSS rule MUST contain the attribute to be excluded,
# i.e. a[href] will exclude all "href" attributes contained in <a> tags.
- ignore_attrs: {},
+ ignore_attrs: [],
# when true ignores XML and HTML comments
# when false, all comments are compared to their counterparts
ignore_comments: true,
# contains an array of user-specified CSS rules used to perform node exclusions
- ignore_nodes: {},
+ ignore_nodes: [],
# when true, ignores all text nodes (although blank text nodes are always ignored)
# when false, all text nodes are compared to their counterparts (except the empty ones)
ignore_text_nodes: false,
- # when true, trims and collapses whitespace in text nodes and comments to a single space
- # when false, all whitespace is preserved as it is without any changes
- collapse_whitespace: true,
-
# when true, provides a list of all error messages encountered in comparisons
# when false, execution stops when the first error is encountered with no error messages
verbose: false
}
- # used internally only in order to differentiate equivalence for inequivalence
- EQUIVALENT = 1
- # a list of all possible inequivalence types for nodes
- # these are returned in the errors array to differentiate error types.
- MISSING_ATTRIBUTE = 2 # attribute is missing its counterpart
- MISSING_NODE = 3 # node is missing its counterpart
- UNEQUAL_ATTRIBUTES = 4 # attributes are not equal
- UNEQUAL_COMMENTS = 5 # comment contents are not equal
- UNEQUAL_DOCUMENTS = 6 # document types are not equal
- UNEQUAL_ELEMENTS = 7 # nodes have the same type but are not equal
- UNEQUAL_NODES_TYPES = 8 # nodes do not have the same type
- UNEQUAL_TEXT_CONTENTS = 9 # text contents are not equal
+ class << self
+ # used internally only in order to differentiate equivalence for inequivalence
+ EQUIVALENT = 1
- class << self
+ # a list of all possible inequivalence types for nodes
+ # these are returned in the differences array to differentiate error types.
+ MISSING_ATTRIBUTE = 2 # attribute is missing its counterpart
+ MISSING_NODE = 3 # node is missing its counterpart
+ UNEQUAL_ATTRIBUTES = 4 # attributes are not equal
+ UNEQUAL_COMMENTS = 5 # comment contents are not equal
+ UNEQUAL_DOCUMENTS = 6 # document types are not equal
+ UNEQUAL_ELEMENTS = 7 # nodes have the same type but are not equal
+ UNEQUAL_NODES_TYPES = 8 # nodes do not have the same type
+ UNEQUAL_TEXT_CONTENTS = 9 # text node contents are not equal
##
# Determines whether two XML documents or fragments are equal to each other.
# The two parameters could be any type of XML documents, or fragments
# or node sets or even text nodes - any subclass of Nokogiri::XML::Node.
#
- # @param [Nokogiri::XML::Node] n1 left attribute
- # @param [Nokogiri::XML::Node] n2 right attribute
+ # @param [Nokogiri::XML::Element] n1 left node element
+ # @param [Nokogiri::XML::Element] n2 right node element
# @param [Hash] opts user-overridden options
#
- # @return true if equal, [Array] errors otherwise
+ # @return true if equal, [Array] differences otherwise
#
def equivalent?(n1, n2, opts = {})
- opts, errors = DEFAULTS_OPTS.merge(opts), []
- result = compareNodes(n1, n2, opts, errors)
- opts[:verbose] ? errors : result == EQUIVALENT
+ opts, differences = DEFAULTS_OPTS.merge(opts), []
+ result = compareNodes(n1, n2, opts, differences)
+ opts[:verbose] ? differences : result == EQUIVALENT
end
private
##
# Compares two nodes for equivalence. The nodes could be any subclass
# of Nokogiri::XML::Node including node sets and document fragments.
#
- # @param [Nokogiri::XML::Node] n1 left attribute
- # @param [Nokogiri::XML::Node] n2 right attribute
+ # @param [Nokogiri::XML::Node] n1 left node
+ # @param [Nokogiri::XML::Node] n2 right node
# @param [Hash] opts user-overridden options
- # @param [Array] errors inequivalence messages
+ # @param [Array] differences inequivalence messages
+ # @param [int] status comparison status code (EQUIVALENT by default)
#
# @return type of equivalence (from equivalence constants)
#
- def compareNodes(n1, n2, opts, errors, status = EQUIVALENT)
+ def compareNodes(n1, n2, opts, differences, status = EQUIVALENT)
if n1.class == n2.class
case n1
when Nokogiri::XML::Comment
- compareCommentNodes(n1, n2, opts, errors)
+ compareCommentNodes(n1, n2, opts, differences)
when Nokogiri::HTML::Document
- compareDocumentNodes(n1, n2, opts, errors)
+ compareDocumentNodes(n1, n2, opts, differences)
when Nokogiri::XML::Element
- status = compareElementNodes(n1, n2, opts, errors)
+ status = compareElementNodes(n1, n2, opts, differences)
when Nokogiri::XML::Text
- status = compareTextNodes(n1, n2, opts, errors)
+ status = compareTextNodes(n1, n2, opts, differences)
else
- status = compareChildren(n1.children, n2.children, opts, errors)
+ if n1.is_a?(Nokogiri::XML::Node) || n1.is_a?(Nokogiri::XML::NodeSet)
+ status = compareChildren(n1.children, n2.children, opts, differences)
+ else
+ raise 'Comparison only allowed between objects of type Nokogiri::XML::Node and Nokogiri::XML::NodeSet.'
+ end
end
- elsif n1.nil?
+ elsif n1.nil? || n2.nil?
status = MISSING_NODE
- errors << [nodePath(n2), nil, status, n2.name, nodePath(n2)] if opts[:verbose]
- elsif n2.nil?
- status = MISSING_NODE
- errors << [nodePath(n1), n1.name, status, nil, nodePath(n1)] if opts[:verbose]
+ addDifference(n1, n2, n1, n2, opts, differences)
else
status = UNEQUAL_NODES_TYPES
- errors << [nodePath(n1), n1.class, status, n2.class, nodePath(n2)] if opts[:verbose]
+ addDifference(n1, n2, n1, n2, opts, differences)
end
status
end
##
# Compares two nodes of type Nokogiri::HTML::Comment.
#
- # @param [Nokogiri::XML::Comment] n1 left attribute
- # @param [Nokogiri::XML::Comment] n2 right attribute
+ # @param [Nokogiri::XML::Comment] n1 left comment
+ # @param [Nokogiri::XML::Comment] n2 right comment
# @param [Hash] opts user-overridden options
- # @param [Array] errors inequivalence messages
+ # @param [Array] differences inequivalence messages
+ # @param [int] status comparison status code (EQUIVALENT by default)
#
# @return type of equivalence (from equivalence constants)
#
- def compareCommentNodes(n1, n2, opts, errors, status = EQUIVALENT)
+ def compareCommentNodes(n1, n2, opts, differences, status = EQUIVALENT)
return true if opts[:ignore_comments]
t1, t2 = n1.content, n2.content
t1, t2 = collapse(t1), collapse(t2) if opts[:collapse_whitespace]
unless t1 == t2
status = UNEQUAL_COMMENTS
- errors << [nodePath(n1.parent), t1, status, t2, nodePath(n2.parent)] if opts[:verbose]
+ addDifference(n1, n2, t1, t2, opts, differences)
end
status
end
##
# Compares two nodes of type Nokogiri::HTML::Document.
#
- # @param [Nokogiri::XML::Document] n1 left attribute
- # @param [Nokogiri::XML::Document] n2 right attribute
+ # @param [Nokogiri::XML::Document] n1 left document
+ # @param [Nokogiri::XML::Document] n2 right document
# @param [Hash] opts user-overridden options
- # @param [Array] errors inequivalence messages
+ # @param [Array] differences inequivalence messages
+ # @param [int] status comparison status code (EQUIVALENT by default)
#
# @return type of equivalence (from equivalence constants)
#
- def compareDocumentNodes(n1, n2, opts, errors, status = EQUIVALENT)
+ def compareDocumentNodes(n1, n2, opts, differences, status = EQUIVALENT)
if n1.name == n2.name
- status = compareChildren(n1.children, n2.children, opts, errors)
+ status = compareChildren(n1.children, n2.children, opts, differences)
else
status == UNEQUAL_DOCUMENTS
- errors << [nodePath(n1), n1, status, n2, nodePath(n2)] if opts[:verbose]
+ addDifference(n1, n2, n1, n2, opts, differences)
end
status
end
@@ -157,23 +165,24 @@
# Compares two sets of Nokogiri::XML::NodeSet elements.
#
# @param [Nokogiri::XML::NodeSet] n1_set left set of Nokogiri::XML::Node elements
# @param [Nokogiri::XML::NodeSet] n2_set right set of Nokogiri::XML::Node elements
# @param [Hash] opts user-overridden options
- # @param [Array] errors inequivalence messages
+ # @param [Array] differences inequivalence messages
+ # @param [int] status comparison status code (EQUIVALENT by default)
#
# @return type of equivalence (from equivalence constants)
#
- def compareChildren(n1_set, n2_set, opts, errors, status = EQUIVALENT)
+ def compareChildren(n1_set, n2_set, opts, differences, status = EQUIVALENT)
i = 0; j = 0
while i < n1_set.length || j < n2_set.length
if !n1_set[i].nil? && nodeExcluded?(n1_set[i], opts)
i += 1 # increment counter if left node is excluded
elsif !n2_set[j].nil? && nodeExcluded?(n2_set[j], opts)
j += 1 # increment counter if right node is excluded
else
- result = compareNodes(n1_set[i], n2_set[j], opts, errors)
+ result = compareNodes(n1_set[i], n2_set[j], opts, differences)
status = result unless result == EQUIVALENT
# return false so that this subtree could halt comparison on error
# but neighbours of parents' subtrees could still be compared (in verbose mode)
return false if status == UNEQUAL_NODES_TYPES || status == UNEQUAL_ELEMENTS
@@ -192,157 +201,173 @@
##
# Compares two nodes of type Nokogiri::XML::Element.
# - compares element attributes
# - recursively compares element children
#
- # @param [Nokogiri::XML::Element] n1 left attribute
- # @param [Nokogiri::XML::Element] n2 right attribute
+ # @param [Nokogiri::XML::Element] n1 left node element
+ # @param [Nokogiri::XML::Element] n2 right node element
# @param [Hash] opts user-overridden options
- # @param [Array] errors inequivalence messages
+ # @param [Array] differences inequivalence messages
+ # @param [int] status comparison status code (EQUIVALENT by default)
#
# @return type of equivalence (from equivalence constants)
#
- def compareElementNodes(n1, n2, opts, errors, status = EQUIVALENT)
+ def compareElementNodes(n1, n2, opts, differences, status = EQUIVALENT)
if n1.name == n2.name
- result = compareAttributeSets(n1.attribute_nodes, n2.attribute_nodes, opts, errors)
+ result = compareAttributeSets(n1, n2, n1.attribute_nodes, n2.attribute_nodes, opts, differences)
+ return result unless result == EQUIVALENT
+ result = compareChildren(n1.children, n2.children, opts, differences)
status = result unless result == EQUIVALENT
- result = compareChildren(n1.children, n2.children, opts, errors)
- status = result unless result == EQUIVALENT
else
status = UNEQUAL_ELEMENTS
- errors << [nodePath(n1), n1.name, status, n2.name, nodePath(n2)] if opts[:verbose]
+ addDifference(n1, n2, n1.name, n2.name, opts, differences)
end
status
end
##
# Compares two nodes of type Nokogiri::XML::Text.
#
- # @param [Nokogiri::XML::Text] n1 left attribute
- # @param [Nokogiri::XML::Text] n2 right attribute
+ # @param [Nokogiri::XML::Text] n1 left text node
+ # @param [Nokogiri::XML::Text] n2 right text node
# @param [Hash] opts user-overridden options
- # @param [Array] errors inequivalence messages
+ # @param [Array] differences inequivalence messages
+ # @param [int] status comparison status code (EQUIVALENT by default)
#
# @return type of equivalence (from equivalence constants)
#
- def compareTextNodes(n1, n2, opts, errors, status = EQUIVALENT)
+ def compareTextNodes(n1, n2, opts, differences, status = EQUIVALENT)
return true if opts[:ignore_text_nodes]
t1, t2 = n1.content, n2.content
t1, t2 = collapse(t1), collapse(t2) if opts[:collapse_whitespace]
unless t1 == t2
status = UNEQUAL_TEXT_CONTENTS
- errors << [nodePath(n1.parent), t1, status, t2, nodePath(n2.parent)] if opts[:verbose]
+ addDifference(n1.parent, n2.parent, t1, t2, opts, differences)
end
status
end
##
- # Compares two sets of Nokogiri::XML::Node attributes.
+ # Compares two sets of Nokogiri::XML::Element attributes.
#
+ # @param [Nokogiri::XML::Element] n1 left node element
+ # @param [Nokogiri::XML::Element] n2 right node element
# @param [Array] a1_set left attribute set
# @param [Array] a2_set right attribute set
# @param [Hash] opts user-overridden options
- # @param [Array] errors inequivalence messages
+ # @param [Array] differences inequivalence messages
#
# @return type of equivalence (from equivalence constants)
#
- def compareAttributeSets(a1_set, a2_set, opts, errors)
+ def compareAttributeSets(n1, n2, a1_set, a2_set, opts, differences)
return false unless a1_set.length == a2_set.length || opts[:verbose]
if opts[:ignore_attr_order]
- compareSortedAttributeSets(a1_set, a2_set, opts, errors)
+ compareSortedAttributeSets(n1, n2, a1_set, a2_set, opts, differences)
else
- compareUnsortedAttributeSets(a1_set, a2_set, opts, errors)
+ compareUnsortedAttributeSets(n1, n2, a1_set, a2_set, opts, differences)
end
end
##
# Compares two sets of Nokogiri::XML::Node attributes by sorting them first.
# When the attributes are sorted, only attributes of the same type are compared
# to each other, and missing attributes can be easily detected.
#
+ # @param [Nokogiri::XML::Element] n1 left node element
+ # @param [Nokogiri::XML::Element] n2 right node element
# @param [Array] a1_set left attribute set
# @param [Array] a2_set right attribute set
# @param [Hash] opts user-overridden options
- # @param [Array] errors inequivalence messages
+ # @param [Array] differences inequivalence messages
+ # @param [int] status comparison status code (EQUIVALENT by default)
#
# @return type of equivalence (from equivalence constants)
#
- def compareSortedAttributeSets(a1_set, a2_set, opts, errors, status = EQUIVALENT)
+ def compareSortedAttributeSets(n1, n2, a1_set, a2_set, opts, differences, status = EQUIVALENT)
a1_set, a2_set = a1_set.sort_by { |a| a.name }, a2_set.sort_by { |a| a.name }
i = j = 0
while i < a1_set.length || j < a2_set.length
+
if a1_set[i].nil?
- result = compareAttributes(nil, a2_set[j], opts, errors); j += 1
+ result = compareAttributes(n1, n2, nil, a2_set[j], opts, differences); j += 1
elsif a2_set[j].nil?
- result = compareAttributes(a1_set[i], nil, opts, errors); i += 1
+ result = compareAttributes(n1, n2, a1_set[i], nil, opts, differences); i += 1
elsif a1_set[i].name < a2_set[j].name
- result = compareAttributes(a1_set[i], nil, opts, errors); i += 1
+ result = compareAttributes(n1, n2, a1_set[i], nil, opts, differences); i += 1
elsif a1_set[i].name > a2_set[j].name
- result = compareAttributes(nil, a2_set[j], opts, errors); j += 1
+ result = compareAttributes(n1, n2, nil, a2_set[j], opts, differences); j += 1
else
- result = compareAttributes(a1_set[i], a2_set[j], opts, errors); i += 1; j += 1
+ result = compareAttributes(n1, n2, a1_set[i], a2_set[j], opts, differences); i += 1; j += 1
end
+
status = result unless result == EQUIVALENT
break unless status == EQUIVALENT || opts[:verbose]
end
status
end
##
- # Compares two sets of Nokogiri::XML::Node attributes without sorting them.
+ # Compares two sets of Nokogiri::XML::Element attributes without sorting them.
# As a result attributes of different types may be compared, and even if all
# attributes are identical in both sets, if their order is different,
# the comparison will stop as soon two unequal attributes are found.
#
+ # @param [Nokogiri::XML::Element] n1 left node element
+ # @param [Nokogiri::XML::Element] n2 right node element
# @param [Array] a1_set left attribute set
# @param [Array] a2_set right attribute set
# @param [Hash] opts user-overridden options
- # @param [Array] errors inequivalence messages
+ # @param [Array] differences inequivalence messages
+ # @param [int] status comparison status code (EQUIVALENT by default)
#
# @return type of equivalence (from equivalence constants)
#
- def compareUnsortedAttributeSets(a1_set, a2_set, opts, errors, status = EQUIVALENT)
+ def compareUnsortedAttributeSets(n1, n2, a1_set, a2_set, opts, differences, status = EQUIVALENT)
[a1_set.length, a2_set.length].max.times do |i|
- result = compareAttributes(a1_set[i], a2_set[i], opts, errors)
+ result = compareAttributes(n1, n2, a1_set[i], a2_set[i], opts, differences)
status = result unless result == EQUIVALENT
break unless status == EQUIVALENT
end
status
end
##
# Compares two attributes by name and value.
#
+ # @param [Nokogiri::XML::Element] n1 left node element
+ # @param [Nokogiri::XML::Element] n2 right node element
# @param [Nokogiri::XML::Attr] a1 left attribute
# @param [Nokogiri::XML::Attr] a2 right attribute
# @param [Hash] opts user-overridden options
- # @param [Array] errors inequivalence messages
+ # @param [Array] differences inequivalence messages
+ # @param [int] status comparison status code (EQUIVALENT by default)
#
# @return type of equivalence (from equivalence constants)
#
- def compareAttributes(a1, a2, opts, errors, status = EQUIVALENT)
+ def compareAttributes(n1, n2, a1, a2, opts, differences, status = EQUIVALENT)
if a1.nil?
status = MISSING_ATTRIBUTE
- errors << [nodePath(a2.parent), nil, status, "#{a2.name}=\"#{a2.value}\"", nodePath(a2.parent)] if opts[:verbose]
+ addDifference(n1, n2, nil, "#{a2.name}=\"#{a2.value}\"", opts, differences)
elsif a2.nil?
status = MISSING_ATTRIBUTE
- errors << [nodePath(a1.parent), "#{a1.name}=\"#{a1.value}\"", status, nil, nodePath(a1.parent)] if opts[:verbose]
+ addDifference(n1, n2, "#{a1.name}=\"#{a1.value}\"", nil, opts, differences)
elsif a1.name == a2.name
return status if attrsExcluded?(a1, a2, opts)
+ return status if attrContentExcluded?(a1, a2, opts)
if a1.value != a2.value
status = UNEQUAL_ATTRIBUTES
- errors << [nodePath(a1.parent), "#{a1.name}=\"#{a1.value}\"", status, "#{a2.name}=\"#{a2.value}\"", nodePath(a2.parent)] if opts[:verbose]
+ addDifference(n1, n2, "#{a1.name}=\"#{a1.value}\"", "#{a2.name}=\"#{a2.value}\"", opts, differences)
end
else
status = UNEQUAL_ATTRIBUTES
- errors << [nodePath(a1.parent), a1.name, status, a2.name, nodePath(a2.parent)] if opts[:verbose]
+ addDifference(n1, n2, "#{a1.name}=\"#{a1.value}\"", "#{a2.name}=\"#{a2.value}\"", opts, differences)
end
status
end
@@ -351,23 +376,22 @@
# it is completely ignored, as if it did not exist.
#
# Several types of nodes are considered ignored:
# - comments (only in +ignore_comments+ mode)
# - text nodes (only in +ignore_text_nodes+ mode OR when a text node is empty)
- # - node matches a user-specified css rule from +ignore_comments+
+ # - node matches a user-specified css rule from +ignore_nodes+
#
# @param [Nokogiri::XML::Node] n node being tested for exclusion
# @param [Hash] opts user-overridden options
#
# @return true if excluded, false otherwise
#
def nodeExcluded?(n, opts)
+ return true if n.is_a?(Nokogiri::XML::DTD)
return true if n.is_a?(Nokogiri::XML::Comment) && opts[:ignore_comments]
return true if n.is_a?(Nokogiri::XML::Text) && (opts[:ignore_text_nodes] || collapse(n.content).empty?)
- opts[:ignore_nodes].each do |css|
- return true if n.xpath('../*').css(css).include?(n)
- end
+ opts[:ignore_nodes].each { |css| return true if n.parent.css(css).include? n }
false
end
##
@@ -391,46 +415,46 @@
false
end
##
- # Produces the hierarchical ancestral path of a node in the following format: <html:body:div(3):h2:b(2)>.
- # This means that the element is located in:
+ # Checks whether two given attributes should be excluded, based on their content.
+ # Checks whether both attributes contain content that should be excluded, and
+ # returns true only if an excluded string is contained in both attribute values.
#
- # <html>
- # <body>
- # <div>...</div>
- # <div>...</div>
- # <div>
- # <h2>
- # <b>...</b>
- # <b>TARGET</b>
- # </h2>
- # </div>
- # </body>
- # </html>
+ # @param [Nokogiri::XML::Attr] a1 left attribute
+ # @param [Nokogiri::XML::Attr] a2 right attribute
+ # @param [Hash] opts user-overridden options
#
- # Note that the counts of element locations only apply to elements of the same type. For example, div(3) means
- # that it is the 3rd <div> element in the <body>, but there could be many other elements in between the three
- # <div> elements.
- #
- # When +ignore_comments+ mode is disabled, mismatching comments will show up as <...:comment>.
- #
- # @param [Nokogiri::XML::Node] n node for which to determine a hierarchical path
- #
# @return true if excluded, false otherwise
#
- def nodePath(n)
- name = n.name
+ def attrContentExcluded?(a1, a2, opts)
+ a1_excluded, a2_excluded = false, false
+ opts[:ignore_attr_content].each do |content|
+ a1_excluded = a1_excluded || a1.value.include?(content)
+ a2_excluded = a2_excluded || a2.value.include?(content)
+ return true if a1_excluded && a2_excluded
+ end
+ false
+ end
- # find the index of the node if there are several of the same type
- siblings = n.xpath("../#{name}")
- name += "(#{siblings.index(n) + 1})" if siblings.length > 1
- if defined? n.parent
- status = "#{nodePath(n.parent)}:#{name}"
- status = status[1..-1] if status[0] == ':'
- status
+ ##
+ # Strips the whitespace (from beginning and end) and collapses it,
+ # i.e. multiple spaces, new lines and tabs are all collapsed to a single space.
+ #
+ # @param [Nokogiri::XML::Node] node1 left node
+ # @param [Nokogiri::XML::Node] node2 right node
+ # @param [String] diff1 left diffing value
+ # @param [String] diff2 right diffing value
+ # @param [Hash] opts user-overridden options
+ # @param [Array] differences inequivalence messages
+ #
+ # @return collapsed string
+ #
+ def addDifference(node1, node2, diff1, diff2, opts, differences)
+ if opts[:verbose]
+ differences << {node1: node1, node2: node2, diff1: diff1, diff2: diff2}
end
end
##
\ No newline at end of file