lib/rdf/isomorphic.rb in rdf-isomorphic-0.3.0 vs lib/rdf/isomorphic.rb in rdf-isomorphic-0.3.1

- old
+ new

@@ -12,29 +12,44 @@ # @see http://rdf.rubyforge.org # @see http://www.hpl.hp.com/techreports/2001/HPL-2001-293.pdf module Isomorphic # Returns `true` if this RDF::Enumerable is isomorphic with another. + # + # Takes a :canonicalize => true argument. If true, RDF::Literals will be + # canonicalized while producing a bijection. This results in broader + # matches for isomorphism in the case of equivalent literals with different + # representations. + # + # @param opts [Hash<Symbol => Any>] options + # @param other [RDF::Enumerable] # @return [Boolean] # @example # repository_a.isomorphic_with repository_b #=> true - def isomorphic_with?(other) - !(bijection_to(other).nil?) + def isomorphic_with?(other, opts = {}) + !(bijection_to(other, opts).nil?) end alias_method :isomorphic?, :isomorphic_with? # Returns a hash of RDF::Nodes => RDF::Nodes representing an isomorphic # bijection of this RDF::Enumerable's to another RDF::Enumerable's blank # nodes, or nil if a bijection cannot be found. + # + # Takes a :canonicalize => true argument. If true, RDF::Literals will be + # canonicalized while producing a bijection. This results in broader + # matches for isomorphism in the case of equivalent literals with different + # representations. + # # @example # repository_a.bijection_to repository_b # @param other [RDF::Enumerable] + # @param opts [Hash<Symbol => Any>] options # @return [Hash, nil] - def bijection_to(other) - + def bijection_to(other, opts = {}) + grounded_stmts_match = (count == other.count) grounded_stmts_match &&= each_statement.all? do | stmt | stmt.has_blank_nodes? || other.has_statement?(stmt) end @@ -47,11 +62,11 @@ blank_stmts = find_all { |statement| statement.has_blank_nodes? } other_blank_stmts = other.find_all { |statement| statement.has_blank_nodes? } nodes = RDF::Isomorphic.blank_nodes_in(blank_stmts) other_nodes = RDF::Isomorphic.blank_nodes_in(other_blank_stmts) - build_bijection_to blank_stmts, nodes, other_blank_stmts, other_nodes + build_bijection_to blank_stmts, nodes, other_blank_stmts, other_nodes, {}, {}, opts else nil end end @@ -70,22 +85,23 @@ # @param [Array] nodes # @param [RDF::Enumerable] other_anon_stmts # @param [Array] other_nodes # @param [Hash] these_grounded_hashes # @param [Hash] other_grounded_hashes + # @param [Hash] options # @return [nil,Hash] # @private - def build_bijection_to(anon_stmts, nodes, other_anon_stmts, other_nodes, these_grounded_hashes = {}, other_grounded_hashes = {}) + def build_bijection_to(anon_stmts, nodes, other_anon_stmts, other_nodes, these_grounded_hashes = {}, other_grounded_hashes = {}, opts = {}) # Create a hash signature of every node, based on the signature of # statements it exists in. # We also save hashes of nodes that cannot be reliably known; we will use # that information to eliminate possible recursion combinations. # # Any mappings given in the method parameters are considered grounded. - these_hashes, these_ungrounded_hashes = RDF::Isomorphic.hash_nodes(anon_stmts, nodes, these_grounded_hashes) - other_hashes, other_ungrounded_hashes = RDF::Isomorphic.hash_nodes(other_anon_stmts, other_nodes, other_grounded_hashes) + these_hashes, these_ungrounded_hashes = RDF::Isomorphic.hash_nodes(anon_stmts, nodes, these_grounded_hashes, opts[:canonicalize]) + other_hashes, other_ungrounded_hashes = RDF::Isomorphic.hash_nodes(other_anon_stmts, other_nodes, other_grounded_hashes, opts[:canonicalize]) # Grounded hashes are built at the same rate between the two graphs (if # they are isomorphic). If there exists a grounded node in one that is # not in the other, we can just return. Ungrounded nodes might still # conflict, so we don't check them. This is a little bit messy in the @@ -166,11 +182,11 @@ # @param [Array] statements # @param [Array] nodes # @param [Hash] grounded_hashes # @private # @return [Hash, Hash] - def self.hash_nodes(statements, nodes, grounded_hashes) + def self.hash_nodes(statements, nodes, grounded_hashes, canonicalize = false) hashes = grounded_hashes.dup ungrounded_hashes = {} hash_needed = true # We may have to go over the list multiple times. If a node is marked as @@ -178,11 +194,11 @@ # grounded. while hash_needed starting_grounded_nodes = hashes.size nodes.each do | node | unless hashes.member? node - grounded, hash = node_hash_for(node, statements, hashes) + grounded, hash = node_hash_for(node, statements, hashes, canonicalize) if grounded hashes[node] = hash end ungrounded_hashes[node] = hash end @@ -213,16 +229,16 @@ # # Returns a tuple consisting of grounded being true or false and the String # for the hash # @private # @return [Boolean, String] - def self.node_hash_for(node,statements,hashes) + def self.node_hash_for(node, statements, hashes, canonicalize) statement_signatures = [] grounded = true statements.each do | statement | if (statement.object == node) || (statement.subject == node) - statement_signatures << hash_string_for(statement,hashes,node) + statement_signatures << hash_string_for(statement, hashes, node, canonicalize) [statement.subject, statement.object].each do | resource | grounded = false unless grounded(resource, hashes) || resource == node end end end @@ -233,15 +249,15 @@ # Provide a string signature for the given statement, collecting # string signatures for grounded node elements. # return [String] # @private - def self.hash_string_for(statement,hashes,node) + def self.hash_string_for(statement, hashes, node, canonicalize) string = "" - string << string_for_node(statement.subject,hashes,node) + string << string_for_node(statement.subject, hashes, node, canonicalize) string << statement.predicate.to_s - string << string_for_node(statement.object,hashes,node) + string << string_for_node(statement.object, hashes, node, canonicalize) string end # Returns true if a given node is grounded # A node is groundd if it is not a blank node or it is included @@ -255,21 +271,21 @@ # Provides a string for the given node for use in a string signature # Non-anonymous nodes will return their string form. Grounded anonymous # nodes will return their hashed form. # @return [String] # @private - def self.string_for_node(node, hashes,target) + def self.string_for_node(node, hashes,target, canonicalize) case when node == target "itself" when node.node? && hashes.member?(node) hashes[node] when node.node? "a blank node" # RDF.rb auto-boxing magic makes some literals the same when they # should not be; the ntriples serializer will take care of us when node.literal? - node.class.name + RDF::NTriples.serialize(node) + node.class.name + RDF::NTriples.serialize(canonicalize ? node.canonicalize : node) else node.to_s end end end