lib/rdf/isomorphic.rb in rdf-isomorphic-0.3.0 vs lib/rdf/isomorphic.rb in rdf-isomorphic-0.3.1
- old
+ new
@@ -12,29 +12,44 @@
# @see http://rdf.rubyforge.org
# @see http://www.hpl.hp.com/techreports/2001/HPL-2001-293.pdf
module Isomorphic
# Returns `true` if this RDF::Enumerable is isomorphic with another.
+ #
+ # Takes a :canonicalize => true argument. If true, RDF::Literals will be
+ # canonicalized while producing a bijection. This results in broader
+ # matches for isomorphism in the case of equivalent literals with different
+ # representations.
+ #
+ # @param opts [Hash<Symbol => Any>] options
+ # @param other [RDF::Enumerable]
# @return [Boolean]
# @example
# repository_a.isomorphic_with repository_b #=> true
- def isomorphic_with?(other)
- !(bijection_to(other).nil?)
+ def isomorphic_with?(other, opts = {})
+ !(bijection_to(other, opts).nil?)
end
alias_method :isomorphic?, :isomorphic_with?
# Returns a hash of RDF::Nodes => RDF::Nodes representing an isomorphic
# bijection of this RDF::Enumerable's to another RDF::Enumerable's blank
# nodes, or nil if a bijection cannot be found.
+ #
+ # Takes a :canonicalize => true argument. If true, RDF::Literals will be
+ # canonicalized while producing a bijection. This results in broader
+ # matches for isomorphism in the case of equivalent literals with different
+ # representations.
+ #
# @example
# repository_a.bijection_to repository_b
# @param other [RDF::Enumerable]
+ # @param opts [Hash<Symbol => Any>] options
# @return [Hash, nil]
- def bijection_to(other)
-
+ def bijection_to(other, opts = {})
+
grounded_stmts_match = (count == other.count)
grounded_stmts_match &&= each_statement.all? do | stmt |
stmt.has_blank_nodes? || other.has_statement?(stmt)
end
@@ -47,11 +62,11 @@
blank_stmts = find_all { |statement| statement.has_blank_nodes? }
other_blank_stmts = other.find_all { |statement| statement.has_blank_nodes? }
nodes = RDF::Isomorphic.blank_nodes_in(blank_stmts)
other_nodes = RDF::Isomorphic.blank_nodes_in(other_blank_stmts)
- build_bijection_to blank_stmts, nodes, other_blank_stmts, other_nodes
+ build_bijection_to blank_stmts, nodes, other_blank_stmts, other_nodes, {}, {}, opts
else
nil
end
end
@@ -70,22 +85,23 @@
# @param [Array] nodes
# @param [RDF::Enumerable] other_anon_stmts
# @param [Array] other_nodes
# @param [Hash] these_grounded_hashes
# @param [Hash] other_grounded_hashes
+ # @param [Hash] options
# @return [nil,Hash]
# @private
- def build_bijection_to(anon_stmts, nodes, other_anon_stmts, other_nodes, these_grounded_hashes = {}, other_grounded_hashes = {})
+ def build_bijection_to(anon_stmts, nodes, other_anon_stmts, other_nodes, these_grounded_hashes = {}, other_grounded_hashes = {}, opts = {})
# Create a hash signature of every node, based on the signature of
# statements it exists in.
# We also save hashes of nodes that cannot be reliably known; we will use
# that information to eliminate possible recursion combinations.
#
# Any mappings given in the method parameters are considered grounded.
- these_hashes, these_ungrounded_hashes = RDF::Isomorphic.hash_nodes(anon_stmts, nodes, these_grounded_hashes)
- other_hashes, other_ungrounded_hashes = RDF::Isomorphic.hash_nodes(other_anon_stmts, other_nodes, other_grounded_hashes)
+ these_hashes, these_ungrounded_hashes = RDF::Isomorphic.hash_nodes(anon_stmts, nodes, these_grounded_hashes, opts[:canonicalize])
+ other_hashes, other_ungrounded_hashes = RDF::Isomorphic.hash_nodes(other_anon_stmts, other_nodes, other_grounded_hashes, opts[:canonicalize])
# Grounded hashes are built at the same rate between the two graphs (if
# they are isomorphic). If there exists a grounded node in one that is
# not in the other, we can just return. Ungrounded nodes might still
# conflict, so we don't check them. This is a little bit messy in the
@@ -166,11 +182,11 @@
# @param [Array] statements
# @param [Array] nodes
# @param [Hash] grounded_hashes
# @private
# @return [Hash, Hash]
- def self.hash_nodes(statements, nodes, grounded_hashes)
+ def self.hash_nodes(statements, nodes, grounded_hashes, canonicalize = false)
hashes = grounded_hashes.dup
ungrounded_hashes = {}
hash_needed = true
# We may have to go over the list multiple times. If a node is marked as
@@ -178,11 +194,11 @@
# grounded.
while hash_needed
starting_grounded_nodes = hashes.size
nodes.each do | node |
unless hashes.member? node
- grounded, hash = node_hash_for(node, statements, hashes)
+ grounded, hash = node_hash_for(node, statements, hashes, canonicalize)
if grounded
hashes[node] = hash
end
ungrounded_hashes[node] = hash
end
@@ -213,16 +229,16 @@
#
# Returns a tuple consisting of grounded being true or false and the String
# for the hash
# @private
# @return [Boolean, String]
- def self.node_hash_for(node,statements,hashes)
+ def self.node_hash_for(node, statements, hashes, canonicalize)
statement_signatures = []
grounded = true
statements.each do | statement |
if (statement.object == node) || (statement.subject == node)
- statement_signatures << hash_string_for(statement,hashes,node)
+ statement_signatures << hash_string_for(statement, hashes, node, canonicalize)
[statement.subject, statement.object].each do | resource |
grounded = false unless grounded(resource, hashes) || resource == node
end
end
end
@@ -233,15 +249,15 @@
# Provide a string signature for the given statement, collecting
# string signatures for grounded node elements.
# return [String]
# @private
- def self.hash_string_for(statement,hashes,node)
+ def self.hash_string_for(statement, hashes, node, canonicalize)
string = ""
- string << string_for_node(statement.subject,hashes,node)
+ string << string_for_node(statement.subject, hashes, node, canonicalize)
string << statement.predicate.to_s
- string << string_for_node(statement.object,hashes,node)
+ string << string_for_node(statement.object, hashes, node, canonicalize)
string
end
# Returns true if a given node is grounded
# A node is groundd if it is not a blank node or it is included
@@ -255,21 +271,21 @@
# Provides a string for the given node for use in a string signature
# Non-anonymous nodes will return their string form. Grounded anonymous
# nodes will return their hashed form.
# @return [String]
# @private
- def self.string_for_node(node, hashes,target)
+ def self.string_for_node(node, hashes,target, canonicalize)
case
when node == target
"itself"
when node.node? && hashes.member?(node)
hashes[node]
when node.node?
"a blank node"
# RDF.rb auto-boxing magic makes some literals the same when they
# should not be; the ntriples serializer will take care of us
when node.literal?
- node.class.name + RDF::NTriples.serialize(node)
+ node.class.name + RDF::NTriples.serialize(canonicalize ? node.canonicalize : node)
else
node.to_s
end
end
end