lib/rdf/rdfa/reader.rb in rdf-rdfa-2.2.2 vs lib/rdf/rdfa/reader.rb in rdf-rdfa-2.2.3
- old
+ new
@@ -29,11 +29,11 @@
format Format
include Expansion
include RDF::Util::Logger
XHTML = "http://www.w3.org/1999/xhtml"
-
+
# Content model for @about and @resource. In RDFa 1.0, this was URIorSafeCURIE
SafeCURIEorCURIEorIRI = {
:"rdfa1.0" => [:safe_curie, :uri, :bnode],
:"rdfa1.1" => [:safe_curie, :curie, :uri, :bnode],
}
@@ -81,21 +81,21 @@
# Host language
# @!attribute [r] host_language
# @return [:xml, :xhtml1, :xhtml5, :html4, :html5, :svg]
attr_reader :host_language
-
+
# Version
# @!attribute [r] version
# @return [:"rdfa1.0", :"rdfa1.1"]
attr_reader :version
-
+
# Repository used for collecting triples.
# @!attribute [r] repository
# @return [RDF::Repository]
attr_reader :repository
-
+
# Returns the XML implementation module for this reader instance.
#
# @!attribute [rw] implementation
# @return [Module]
attr_reader :implementation
@@ -122,11 +122,11 @@
# but it will usually change during the course of processing.
#
# @!attribute [rw] parent_subject
# @return [RDF::URI]
attr_accessor :parent_subject
-
+
##
# The parent object.
#
# In some situations the object of a statement becomes the subject of any nested statements,
# and this property is used to convey this value.
@@ -136,26 +136,26 @@
# and this property is used to convey this value.
#
# @!attribute [rw] parent_object
# @return [RDF::URI]
attr_accessor :parent_object
-
+
##
# A list of current, in-scope URI mappings.
#
# @!attribute [rw] uri_mappings
# @return [Hash{Symbol => String}]
attr_accessor :uri_mappings
-
+
##
# A list of current, in-scope Namespaces. This is the subset of uri_mappings
# which are defined using xmlns.
#
# @!attribute [rw] namespaces
# @return [Hash{String => Namespace}]
attr_accessor :namespaces
-
+
##
# A list of incomplete triples.
#
# A triple can be incomplete when no object resource
# is provided alongside a predicate that requires a resource (i.e., @rel or @rev).
@@ -163,29 +163,29 @@
# which will be when the next subject is specified (part of the process called chaining).
#
# @!attribute [rw] incomplete_triples
# @return [Array<Array<RDF::URI, RDF::Resource>>]
attr_accessor :incomplete_triples
-
+
##
# The language. Note that there is no default language.
#
# @!attribute [rw] language
# @return [Symbol]
attr_accessor :language
-
+
##
# The term mappings, a list of terms and their associated URIs.
#
# This specification does not define an initial list.
# Host Languages may define an initial list.
# If a Host Language provides an initial list, it should do so via an RDFa Context document.
#
# @!attribute [rw] term_mappings
# @return [Hash{Symbol => RDF::URI}]
attr_accessor :term_mappings
-
+
##
# The default vocabulary
#
# A value to use as the prefix URI when a term is used.
# This specification does not define an initial setting for the default vocabulary.
@@ -228,11 +228,11 @@
@uri_mappings = from.uri_mappings.clone
@incomplete_triples = from.incomplete_triples.clone
@namespaces = from.namespaces.clone
@list_mapping = from.list_mapping # Don't clone
end
-
+
def inspect
v = ['base', 'parent_subject', 'parent_object', 'language', 'default_vocabulary'].map do |a|
"#{a}=#{o = self.send(a); o.respond_to?(:to_ntriples) ? o.to_ntriples : o.inspect}"
end
v << "uri_mappings[#{uri_mappings.keys.length}]"
@@ -327,12 +327,24 @@
end
self.extend(@implementation)
detect_host_language_version(input, options)
- add_info(@doc, "version = #{@version}, host_language = #{@host_language}, library = #{@library}, rdfagraph = #{@options[:rdfagraph].inspect}, expand = #{@options[:vocab_expansion]}")
+ parse_lib = if @library == :nokogiri && @host_language == :html5
+ begin
+ require 'nokogumbo' unless defined?(::Nokogumbo)
+ :nokobumbo
+ rescue LoadError
+ :nokogiri
+ end
+ else
+ @library
+ end
+ parse_lib = @library == :nokogiri && defined?(::Nokogumbo) ? :nokogumbo : @library
+ add_info(@doc, "version = #{@version}, host_language = #{@host_language}, library = #{parse_lib}, rdfagraph = #{@options[:rdfagraph].inspect}, expand = #{@options[:vocab_expansion]}")
+
begin
initialize_xml(input, options)
rescue
add_error(nil, "Malformed document: #{$!.message}")
end
@@ -417,11 +429,11 @@
reader.new(doc, options).each(&block)
else
add_debug(el, "=> no reader found")
end
end
-
+
# Look for Embedded RDF/XML
unless @root.xpath("//rdf:RDF", "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#").empty?
extract_script(@root, @doc, "application/rdf+xml", @options) do |statement|
@repository << statement
end
@@ -435,11 +447,11 @@
extract_script(el, text, type, @options) do |statement|
@repository << statement
end
end
-
+
# Look for Embedded microdata
unless @root.xpath("//@itemscope").empty?
begin
require 'rdf/microdata'
add_debug(@doc, "process microdata")
@@ -452,11 +464,11 @@
# Perform property copying
copy_properties(@repository) if @options[:reference_folding]
# Perform vocabulary expansion
expand(@repository) if @options[:vocab_expansion]
-
+
@processed = true
end
# Return statements in the default graph for
# statements in the associated named or default graph from the
@@ -491,24 +503,24 @@
block.call(*statement.to_triple)
end
end
enum_for(:each_triple)
end
-
+
private
# Keep track of allocated BNodes
def bnode(value = nil)
@bnode_cache ||= {}
@bnode_cache[value.to_s] ||= RDF::Node.new(value)
end
-
+
# Figure out the document path, if it is an Element or Attribute
def node_path(node)
"<#{base_uri}>#{node.respond_to?(:display_path) ? node.display_path : node}"
end
-
+
# Add debug event to debug array, if specified
#
# @param [#display_path, #to_s] node XML Node or string for showing context
# @param [String] message
# @yieldreturn [String] appended to message, to allow for lazy-evaulation of message
@@ -517,19 +529,19 @@
end
def add_info(node, message, process_class = RDF::RDFA.Info, &block)
add_processor_message(node, message, process_class, &block)
end
-
+
def add_warning(node, message, process_class = RDF::RDFA.Warning)
add_processor_message(node, message, process_class)
end
-
+
def add_error(node, message, process_class = RDF::RDFA.Error)
add_processor_message(node, message, process_class)
end
-
+
def add_processor_message(node, message, process_class, &block)
case process_class
when RDF::RDFA.Error then log_error(node_path(node), message, &block)
when RDF::RDFA.Warning then log_warn(node_path(node), message, &block)
when RDF::RDFA.Info then log_info(node_path(node), message, &block)
@@ -550,11 +562,11 @@
RDF::Statement.new(n, RDF::RDFA.context, nc, graph_name: RDF::RDFA.ProcessorGraph),
RDF::Statement.new(nc, RDF["type"], RDF::PTR.XPathPointer, graph_name: RDF::RDFA.ProcessorGraph),
RDF::Statement.new(nc, RDF::PTR.expression, node.path, graph_name: RDF::RDFA.ProcessorGraph)
]
end
-
+
@repository.insert(*processor_statements)
if cb = @options[:processor_callback]
processor_statements.each {|s| cb.call(s)}
end
end
@@ -589,11 +601,11 @@
add_debug("") {"parse_whole_doc: base='#{base}'"}
end
# initialize the evaluation context with the appropriate base
evaluation_context = EvaluationContext.new(base, @host_defaults)
-
+
if @version != :"rdfa1.0"
# Process default vocabularies
load_initial_contexts(@host_defaults[:initial_contexts]) do |which, value|
add_debug(root) { "parse_whole_document, #{which}: #{value.inspect}"}
case which
@@ -601,15 +613,15 @@
when :term_mappings then evaluation_context.term_mappings.merge!(value)
when :default_vocabulary then evaluation_context.default_vocabulary = value
end
end
end
-
+
traverse(root, evaluation_context)
add_debug("", "parse_whole_doc: traversal complete'")
end
-
+
# Parse and process URI mappings, Term mappings and a default vocabulary from @context
#
# Yields each mapping
def load_initial_contexts(initial_contexts)
initial_contexts.
@@ -723,11 +735,11 @@
def traverse(element, evaluation_context)
if element.nil?
add_error(element, "Can't parse nil element")
return nil
end
-
+
add_debug(element) { "ec: #{evaluation_context.inspect}" }
# local variables [7.5 Step 1]
recurse = true
skip = false
@@ -805,16 +817,16 @@
end
add_debug(element) {
"[Step 2] default_vocaulary: #{default_vocabulary.inspect}"
}
end
-
+
# Local term mappings [7.5 Step 3]
# Next, the current element is then examined for URI mapping s and these are added to the local list of URI mappings.
# Note that a URI mapping will simply overwrite any current mapping in the list that has the same name
extract_mappings(element, uri_mappings, namespaces)
-
+
# Language information [7.5 Step 4]
language = element.language || language
language = nil if language.to_s.empty?
add_debug(element) {"HTML5 [3.2.3.3] lang: #{language.inspect}"} if language
@@ -842,18 +854,18 @@
revs = process_uris(element, attrs[:rev], evaluation_context, base,
uri_mappings: uri_mappings,
term_mappings: term_mappings,
vocab: default_vocabulary,
restrictions: TERMorCURIEorAbsIRI.fetch(@version, []))
-
+
add_debug(element) do
"rels: #{rels.join(" ")}, revs: #{revs.join(" ")}"
end unless (rels + revs).empty?
if !(attrs[:rel] || attrs[:rev])
# Establishing a new subject if no rel/rev [7.5 Step 5]
-
+
if @version == :"rdfa1.0"
new_subject = if attrs[:about]
process_uri(element, attrs[:about], evaluation_context, base,
uri_mappings: uri_mappings,
restrictions: SafeCURIEorCURIEorIRI.fetch(@version, []))
@@ -987,11 +999,11 @@
uri_mappings: uri_mappings,
restrictions: SafeCURIEorCURIEorIRI.fetch(@version, []))
new_subject ||= process_uri(element, attrs[:src], evaluation_context, base,
uri_mappings: uri_mappings,
restrictions: [:uri]) if @version == :"rdfa1.0"
-
+
# if the @typeof attribute is present, set typed resource to new subject
typed_resource = new_subject if attrs[:typeof]
# If no URI is provided then the first match from the following rules will apply
new_subject ||= if element == root && base
@@ -1006,11 +1018,11 @@
else
# if it's null, it's null and nothing changes
evaluation_context.parent_object
# no skip flag set this time
end
-
+
# Then the current object resource is set to the URI obtained from the first match from the following rules:
current_object_resource = process_uri(element, attrs[:resource], evaluation_context, base,
uri_mappings: uri_mappings,
restrictions: SafeCURIEorCURIEorIRI.fetch(@version, [])) if attrs[:resource]
current_object_resource ||= process_uri(element, attrs[:href], evaluation_context, base,
@@ -1032,11 +1044,11 @@
"[Step 6] new_subject: #{new_subject}, " +
"current_object_resource = #{current_object_resource.nil? ? 'nil' : current_object_resource} " +
"typed_resource: #{typed_resource.to_ntriples rescue 'nil'}, "
}
end
-
+
# [Step 7] If in any of the previous steps a typed resource was set to a non-null value, it is now used to provide a subject for type values;
if typed_resource
# Typeof is TERMorCURIEorAbsIRIs
types = process_uris(element, attrs[:typeof], evaluation_context, base,
uri_mappings: uri_mappings,
@@ -1080,19 +1092,19 @@
else
# Predicates for the current object resource can be set by using one or both of the @rel and the @rev attributes but, in case of the @rel attribute, only if the @inlist is not present:
add_triple(element, new_subject, r, current_object_resource)
end
end
-
+
revs.each do |r|
add_triple(element, current_object_resource, r, new_subject)
end
elsif attrs[:rel] || attrs[:rev]
# Incomplete triples and bnode creation [Step 10]
add_debug(element) {"[Step 10] incompletes: rels: #{rels}, revs: #{revs}"}
current_object_resource = RDF::Node.new
-
+
# predicate: full IRI
# direction: forward/reverse
# lists: Save into list, don't generate triple
rels.each do |r|
@@ -1106,16 +1118,16 @@
incomplete_triples << {list: list_mapping[r], direction: :none}
else
incomplete_triples << {predicate: r, direction: :forward}
end
end
-
+
revs.each do |r|
incomplete_triples << {predicate: r, direction: :reverse}
end
end
-
+
# Establish current object literal [Step 11]
#
# If the current element has a @inlist attribute, add the property to the
# list associated with that property, creating a new list if necessary.
if attrs[:property]
@@ -1265,15 +1277,15 @@
add_debug(element) {"[Step 11] lists(#{p}): create #{list_mapping[p].inspect}"}
end
add_debug(element) {"[Step 11] add #{current_property_value.to_ntriples} to #{p.to_ntriples} #{list_mapping[p].inspect}"}
list_mapping[p] << current_property_value
elsif new_subject
- add_triple(element, new_subject, p, current_property_value)
+ add_triple(element, new_subject, p, current_property_value)
end
end
end
-
+
if !skip and new_subject && !evaluation_context.incomplete_triples.empty?
# Complete the incomplete triples from the evaluation context [Step 12]
add_debug(element) do
"[Step 12] complete incomplete triples: " +
"new_subject=#{new_subject.to_ntriples}, " +
@@ -1327,16 +1339,16 @@
new_ec.term_mappings = term_mappings
new_ec.default_vocabulary = default_vocabulary
new_ec.list_mapping = list_mapping
add_debug(element, "[Step 13] new ec")
end
-
+
element.children.each do |child|
# recurse only if it's an element
traverse(child, new_ec) if child.element?
end
-
+
# Step 14: after traversing through child elements, for each list associated with
# a property
(list_mapping || {}).each do |p, l|
# if that list is different from the evaluation context
ec_list = evaluation_context.list_mapping[p] if evaluation_context.list_mapping
@@ -1442,19 +1454,19 @@
add_warning(element, "Undefined prefix #{$1}")
else
add_warning(element, "Relative URI #{value}")
end
end
-
+
# [7.4.3] General Use of Terms in Attributes
def process_term(element, value, options)
if options[:vocab]
# If there is a local default vocabulary, the IRI is obtained by concatenating that value and the term
return uri(options[:vocab] + value)
elsif options[:term_mappings].is_a?(Hash)
# If the term is in the local term mappings, use the associated URI (case sensitive).
return uri(options[:term_mappings][value.to_s.to_sym]) if options[:term_mappings].has_key?(value.to_s.to_sym)
-
+
# Otherwise, check for case-insensitive match
options[:term_mappings].each_pair do |term, uri|
return uri(uri) if term.to_s.downcase == value.to_s.downcase
end
end