lib/rdf/isomorphic.rb in rdf-isomorphic-3.1.0 vs lib/rdf/isomorphic.rb in rdf-isomorphic-3.1.1

- old
+ new

@@ -7,29 +7,26 @@ ## # Isomorphism for rdf.rb Enumerables # # RDF::Isomorphic provides the functions isomorphic_with and bijection_to for RDF::Enumerable. # - # @see http://www.rubydoc.info/github/ruby-rdf/rdf/ - # @see http://www.hpl.hp.com/techreports/2001/HPL-2001-293.pdf + # @see https://www.rubydoc.info/github/ruby-rdf/rdf/ + # @see https://www.hpl.hp.com/techreports/2001/HPL-2001-293.pdf module Isomorphic autoload :VERSION, 'rdf/isomorphic/version' # Returns `true` if this RDF::Enumerable is isomorphic with another. # - # Takes a canonicalize: true argument. If true, RDF::Literals will be - # canonicalized while producing a bijection. This results in broader - # matches for isomorphism in the case of equivalent literals with different - # representations. - # - # @param opts [Hash<Symbol => Any>] options + # @param canonicalize [Boolean] (false) + # If `true`, RDF::Literals will be canonicalized while producing a bijection. This results in broader matches for isomorphism in the case of equivalent literals with different representations. + # @param opts [Hash<Symbol => Any>] other options ignored # @param other [RDF::Enumerable] # @return [Boolean] # @example # repository_a.isomorphic_with repository_b #=> true - def isomorphic_with?(other, **opts) - !(bijection_to(other, **opts).nil?) + def isomorphic_with?(other, canonicalize: false, **opts) + !(bijection_to(other, canonicalize: false, **opts).nil?) end alias_method :isomorphic?, :isomorphic_with? @@ -43,13 +40,15 @@ # representations. # # @example # repository_a.bijection_to repository_b # @param other [RDF::Enumerable] - # @param opts [Hash<Symbol => Any>] options + # @param canonicalize [Boolean] (false) + # If true, RDF::Literals will be canonicalized while producing a bijection. This results in broader matches for isomorphism in the case of equivalent literals with different representations. + # @param opts [Hash<Symbol => Any>] other options ignored # @return [Hash, nil] - def bijection_to(other, **opts) + def bijection_to(other, canonicalize: false, **opts) grounded_stmts_match = (count == other.count) grounded_stmts_match &&= each_statement.all? do | stmt | stmt.node? || other.has_statement?(stmt) @@ -63,11 +62,14 @@ blank_stmts = find_all { |statement| statement.node? } other_blank_stmts = other.find_all { |statement| statement.node? } nodes = RDF::Isomorphic.blank_nodes_in(blank_stmts) other_nodes = RDF::Isomorphic.blank_nodes_in(other_blank_stmts) - build_bijection_to blank_stmts, nodes, other_blank_stmts, other_nodes, {}, {}, **opts + build_bijection_to blank_stmts, nodes, other_blank_stmts, other_nodes, + these_grounded_hashes: {}, + other_grounded_hashes: {}, + canonicalize: false else nil end end @@ -75,34 +77,38 @@ private # The main recursive bijection algorithm. # # This algorithm is very similar to the one explained by Jeremy Carroll in - # http://www.hpl.hp.com/techreports/2001/HPL-2001-293.pdf. Page 12 has the + # https://www.hpl.hp.com/techreports/2001/HPL-2001-293.pdf. Page 12 has the # relevant pseudocode. # # Many more comments are in the method itself. # # @param [RDF::Enumerable] anon_stmts # @param [Array] nodes # @param [RDF::Enumerable] other_anon_stmts # @param [Array] other_nodes # @param [Hash] these_grounded_hashes # @param [Hash] other_grounded_hashes - # @param [Hash] opts + # @param canonicalize [Boolean] (false) + # If true, RDF::Literals will be canonicalized while producing a bijection. This results in broader matches for isomorphism in the case of equivalent literals with different representations. # @return [nil,Hash] # @private - def build_bijection_to(anon_stmts, nodes, other_anon_stmts, other_nodes, these_grounded_hashes = {}, other_grounded_hashes = {}, **opts) + def build_bijection_to(anon_stmts, nodes, other_anon_stmts, other_nodes, + these_grounded_hashes: {}, + other_grounded_hashes: {}, + canonicalize: false) # Create a hash signature of every node, based on the signature of # statements it exists in. # We also save hashes of nodes that cannot be reliably known; we will use # that information to eliminate possible recursion combinations. # # Any mappings given in the method parameters are considered grounded. - these_hashes, these_ungrounded_hashes = RDF::Isomorphic.hash_nodes(anon_stmts, nodes, these_grounded_hashes, opts[:canonicalize]) - other_hashes, other_ungrounded_hashes = RDF::Isomorphic.hash_nodes(other_anon_stmts, other_nodes, other_grounded_hashes, opts[:canonicalize]) + these_hashes, these_ungrounded_hashes = RDF::Isomorphic.hash_nodes(anon_stmts, nodes, these_grounded_hashes, canonicalize: canonicalize) + other_hashes, other_ungrounded_hashes = RDF::Isomorphic.hash_nodes(other_anon_stmts, other_nodes, other_grounded_hashes, canonicalize: canonicalize) # Grounded hashes are built at the same rate between the two graphs (if # they are isomorphic). If there exists a grounded node in one that is # not in the other, we can just return. Ungrounded nodes might still # conflict, so we don't check them. This is a little bit messy in the @@ -114,11 +120,11 @@ # Using the created hashes, map nodes to other_nodes # Ungrounded hashes will also be equal, but we keep the distinction # around for when we recurse later (we only recurse on ungrounded nodes) bijection = {} nodes.each do | node | - other_node, other_hash = other_ungrounded_hashes.find do | other_node, other_hash | + other_node, _ = other_ungrounded_hashes.find do | other_node, other_hash | # we need to use eql?, as coincedentally-named bnode identifiers are == in rdf.rb these_ungrounded_hashes[node].eql? other_hash end next unless other_node bijection[node] = other_node @@ -146,11 +152,15 @@ # The ungrounded signature must match for this to potentially work next unless these_ungrounded_hashes[node] == other_ungrounded_hashes[other_node] hash = Digest::SHA1.hexdigest(node.to_s) - bijection = build_bijection_to(anon_stmts, nodes, other_anon_stmts, other_nodes, these_hashes.merge( node => hash), other_hashes.merge(other_node => hash)) + bijection = build_bijection_to(anon_stmts, nodes, + other_anon_stmts, other_nodes, + these_grounded_hashes: these_hashes.merge( node => hash), + other_grounded_hashes: other_hashes.merge(other_node => hash), + canonicalize: canonicalize) end bijection end end @@ -160,11 +170,11 @@ # Blank nodes appearing in given list of statements # @private # @param [Array<RDF::Statement>] blank_stmt_list # @return [Array<RDF::Node>] def self.blank_nodes_in(blank_stmt_list) - blank_stmt_list.map {|statement | statement.to_quad.compact.select(&:node?)}.flatten.uniq + blank_stmt_list.map {|statement | statement.terms.select(&:node?)}.flatten.uniq end # Given a set of statements, create a mapping of node => SHA1 for a given # set of blank nodes. grounded_hashes is a mapping of node => SHA1 pairs # that we will take as a given, and use those to make more specific @@ -178,11 +188,11 @@ # @param [Array] statements # @param [Array] nodes # @param [Hash] grounded_hashes # @private # @return [Hash, Hash] - def self.hash_nodes(statements, nodes, grounded_hashes, canonicalize = false) + def self.hash_nodes(statements, nodes, grounded_hashes, canonicalize: false) hashes = grounded_hashes.dup ungrounded_hashes = {} hash_needed = true # We may have to go over the list multiple times. If a node is marked as @@ -190,11 +200,11 @@ # grounded. while hash_needed starting_grounded_nodes = hashes.size nodes.each do | node | unless hashes.member? node - grounded, hash = node_hash_for(node, statements, hashes, canonicalize) + grounded, hash = node_hash_for(node, statements, hashes, canonicalize: canonicalize) if grounded hashes[node] = hash end ungrounded_hashes[node] = hash end @@ -229,17 +239,17 @@ # @param [RDF::Node] node # @param [Array<RDF::Statement>] statements # @param [Hash] hashes # @param [Boolean] canonicalize # @return [Boolean, String] - def self.node_hash_for(node, statements, hashes, canonicalize) + def self.node_hash_for(node, statements, hashes, canonicalize:) statement_signatures = [] grounded = true statements.each do | statement | - if statement.to_quad.include?(node) - statement_signatures << hash_string_for(statement, hashes, node, canonicalize) - statement.to_quad.compact.each do | resource | + if statement.terms.include?(node) + statement_signatures << hash_string_for(statement, hashes, node, canonicalize: canonicalize) + statement.terms.each do | resource | grounded = false unless grounded?(resource, hashes) || resource == node end end end # Note that we sort the signatures--without a canonical ordering, @@ -249,12 +259,12 @@ # Provide a string signature for the given statement, collecting # string signatures for grounded node elements. # return [String] # @private - def self.hash_string_for(statement, hashes, node, canonicalize) - statement.to_quad.map {|r| string_for_node(r, hashes, node, canonicalize)}.join("") + def self.hash_string_for(statement, hashes, node, canonicalize:) + statement.terms.map {|r| string_for_node(r, hashes, node, canonicalize: canonicalize)}.join("") end # Returns true if a given node is grounded # A node is groundd if it is not a blank node or it is included # in the given mapping of grounded nodes. @@ -267,10 +277,10 @@ # Provides a string for the given node for use in a string signature # Non-anonymous nodes will return their string form. Grounded anonymous # nodes will return their hashed form. # @return [String] # @private - def self.string_for_node(node, hashes,target, canonicalize) + def self.string_for_node(node, hashes,target, canonicalize:) case when node.nil? "" when node == target "itself"