lib/rdf/isomorphic.rb in rdf-isomorphic-0.2.0 vs lib/rdf/isomorphic.rb in rdf-isomorphic-0.3.0

- old
+ new

@@ -1,7 +1,8 @@ require 'digest/sha1' require 'rdf' +require 'rdf/ntriples' module RDF ## # Isomorphism for rdf.rb Enumerables @@ -73,34 +74,44 @@ # @param [Hash] other_grounded_hashes # @return [nil,Hash] # @private def build_bijection_to(anon_stmts, nodes, other_anon_stmts, other_nodes, these_grounded_hashes = {}, other_grounded_hashes = {}) - # Create a hash signature of every node, based on the signature of # statements it exists in. # We also save hashes of nodes that cannot be reliably known; we will use # that information to eliminate possible recursion combinations. # # Any mappings given in the method parameters are considered grounded. these_hashes, these_ungrounded_hashes = RDF::Isomorphic.hash_nodes(anon_stmts, nodes, these_grounded_hashes) other_hashes, other_ungrounded_hashes = RDF::Isomorphic.hash_nodes(other_anon_stmts, other_nodes, other_grounded_hashes) + # Grounded hashes are built at the same rate between the two graphs (if + # they are isomorphic). If there exists a grounded node in one that is + # not in the other, we can just return. Ungrounded nodes might still + # conflict, so we don't check them. This is a little bit messy in the + # middle of the method, and probably slows down isomorphic checks, but + # prevents almost-isomorphic cases from getting nutty. + return nil if these_hashes.values.any? { |hash| + !(other_hashes.values.member?(hash)) } + return nil if other_hashes.values.any? { |hash| !(these_hashes.values.member?(hash)) } # Using the created hashes, map nodes to other_nodes + # Ungrounded hashes will also be equal, but we keep the distinction + # around for when we recurse later (we only recurse on ungrounded nodes) bijection = {} nodes.each do | node | - other_node, other_hash = other_hashes.find do | other_node, other_hash | + other_node, other_hash = other_ungrounded_hashes.find do | other_node, other_hash | # we need to use eql?, as coincedentally-named bnode identifiers are == in rdf.rb - these_hashes[node].eql? other_hash + these_ungrounded_hashes[node].eql? other_hash end next unless other_node bijection[node] = other_node - - # we need to delete , as we don't want two nodes with the same hash - # to be mapped to the same other_node. - other_hashes.delete other_node + + # Deletion is required to keep counts even; two nodes with identical + # signatures can biject to each other at random. + other_ungrounded_hashes.delete other_node end # bijection is now a mapping of nodes to other_nodes. If all are # accounted for on both sides, we have a bijection. # @@ -164,21 +175,30 @@ # We may have to go over the list multiple times. If a node is marked as # grounded, other nodes can then use it to decide their own state of # grounded. while hash_needed - hash_needed = false + starting_grounded_nodes = hashes.size nodes.each do | node | unless hashes.member? node grounded, hash = node_hash_for(node, statements, hashes) if grounded - hash_needed = true hashes[node] = hash end ungrounded_hashes[node] = hash end end + # after going over the list, any nodes with a unique hash can be marked + # as grounded, even if we have not tied them back to a root yet. + uniques = {} + ungrounded_hashes.each do |node, hash| + uniques[hash] = uniques[hash].is_a?(RDF::Node) ? false : node + end + uniques.each do |hash, node| + hashes[node] = hash unless node == false + end + hash_needed = starting_grounded_nodes != hashes.size end [hashes,ungrounded_hashes] end # Generate a hash for a node based on the signature of the statements it @@ -243,9 +263,13 @@ "itself" when node.node? && hashes.member?(node) hashes[node] when node.node? "a blank node" + # RDF.rb auto-boxing magic makes some literals the same when they + # should not be; the ntriples serializer will take care of us + when node.literal? + node.class.name + RDF::NTriples.serialize(node) else node.to_s end end end