lib/rdf/rdfa/reader.rb in rdf-rdfa-0.2.1 vs lib/rdf/rdfa/reader.rb in rdf-rdfa-0.2.2
- old
+ new
@@ -2,14 +2,31 @@
module RDF::RDFa
##
# An RDFa parser in Ruby
#
+ # Based on processing rules described here:
+ # @see http://www.w3.org/TR/rdfa-syntax/#s_model RDFa 1.0
+ # @see http://www.w3.org/2010/02/rdfa/drafts/2010/ED-rdfa-core-20100803/ RDFa 1.1
+ #
# @author [Gregg Kellogg](http://kellogg-assoc.com/)
class Reader < RDF::Reader
format Format
+ SafeCURIEorCURIEorURI = {
+ :rdfa_1_0 => [:term, :safe_curie, :uri, :bnode],
+ :rdfa_1_1 => [:safe_curie, :curie, :term, :uri, :bnode],
+ }
+ TERMorCURIEorAbsURI = {
+ :rdfa_1_0 => [:term, :curie],
+ :rdfa_1_1 => [:term, :curie, :absuri],
+ }
+ TERMorCURIEorAbsURIprop = {
+ :rdfa_1_0 => [:curie],
+ :rdfa_1_1 => [:term, :curie, :absuri],
+ }
+
NC_REGEXP = Regexp.new(
%{^
(?!\\\\u0301) # ́ is a non-spacing acute accent.
# It is legal within an XML Name, but not as the first character.
( [a-zA-Z_]
@@ -19,66 +36,97 @@
| \\\\u([0-9a-fA-F]{4})
)*
$},
Regexp::EXTENDED)
- # Host language, One of:
- # :xhtml_rdfa_1_0
- # :xhtml_rdfa_1_1
+ # Host language
+ # @return [:xhtml]
attr_reader :host_language
# The Recursive Baggage
+ # @private
class EvaluationContext # :nodoc:
- # The base. This will usually be the URL of the document being processed,
+ # The base.
+ #
+ # This will usually be the URL of the document being processed,
# but it could be some other URL, set by some other mechanism,
# such as the (X)HTML base element. The important thing is that it establishes
# a URL against which relative paths can be resolved.
+ #
+ # @return [URI]
attr :base, true
# The parent subject.
+ #
# The initial value will be the same as the initial value of base,
# but it will usually change during the course of processing.
+ #
+ # @return [URI]
attr :parent_subject, true
# The parent object.
+ #
# In some situations the object of a statement becomes the subject of any nested statements,
# and this property is used to convey this value.
# Note that this value may be a bnode, since in some situations a number of nested statements
# are grouped together on one bnode.
# This means that the bnode must be set in the containing statement and passed down,
# and this property is used to convey this value.
+ #
+ # @return URI
attr :parent_object, true
# A list of current, in-scope URI mappings.
+ #
+ # @return [Hash{Symbol => String}]
attr :uri_mappings, true
- # A list of incomplete triples. A triple can be incomplete when no object resource
+ # A list of incomplete triples.
+ #
+ # A triple can be incomplete when no object resource
# is provided alongside a predicate that requires a resource (i.e., @rel or @rev).
# The triples can be completed when a resource becomes available,
# which will be when the next subject is specified (part of the process called chaining).
+ #
+ # @return [Array<Array<URI, Resource>>]
attr :incomplete_triples, true
# The language. Note that there is no default language.
+ #
+ # @return [Symbol]
attr :language, true
# The term mappings, a list of terms and their associated URIs.
+ #
# This specification does not define an initial list.
# Host Languages may define an initial list.
# If a Host Language provides an initial list, it should do so via an RDFa Profile document.
+ #
+ # @return [Hash{Symbol => URI}]
attr :term_mappings, true
- # The default vocabulary, a value to use as the prefix URI when a term is used.
+ # The default vocabulary
+ #
+ # A value to use as the prefix URI when a term is used.
# This specification does not define an initial setting for the default vocabulary.
# Host Languages may define an initial setting.
+ #
+ # @return [URI]
attr :default_vocabulary, true
+ # @param [RDF::URI] base
+ # @param [Hash] host_defaults
+ # @option host_defaults [Hash{String => URI}] :term_mappings Hash of NCName => URI
+ # @option host_defaults [Hash{String => URI}] :vocabulary Hash of prefix => URI
def initialize(base, host_defaults)
# Initialize the evaluation context, [5.1]
@base = base
@parent_subject = @base
@parent_object = nil
@incomplete_triples = []
@language = nil
@uri_mappings = host_defaults.fetch(:uri_mappings, {})
@term_mappings = host_defaults.fetch(:term_mappings, {})
- @default_voabulary = host_defaults.fetch(:voabulary, nil)
+ @default_vocabulary = host_defaults.fetch(:vocabulary, nil)
end
# Copy this Evaluation Context
+ #
+ # @param [EvaluationContext] from
def initialize_copy(from)
# clone the evaluation context correctly
@uri_mappings = from.uri_mappings.clone
@incomplete_triples = from.incomplete_triples.clone
end
@@ -93,33 +141,40 @@
end
##
# Initializes the RDFa reader instance.
#
- # @param [Nokogiri::HTML::Document, Nokogiri::XML::Document, IO, File, String] input
+ # @param [Nokogiri::HTML::Document, Nokogiri::XML::Document, #read, #to_s] input
# @option options [Array] :debug (nil) Array to place debug messages
+ # @option options [Graph] :processor_graph (nil) Graph to record information, warnings and errors.
# @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise
# @option options [Boolean] :base_uri (nil) Base URI to use for relative URIs.
+ # @option options [:rdfa_1_0, :rdfa_1_1] :version (:rdfa_1_1) Parser version information
+ # @option options [:xhtml] :host_language (:xhtml) Host Language
# @return [reader]
# @yield [reader]
- # @yieldparam [Reader] reader
+ # @yieldparam [RDF::Reader] reader
# @raise [RDF::ReaderError]:: Raises RDF::ReaderError if _strict_
def initialize(input = $stdin, options = {}, &block)
super do
@debug = options[:debug]
@strict = options[:strict]
@base_uri = RDF::URI.intern(options[:base_uri])
@@vocabulary_cache ||= {}
+ @version = options[:version] ? options[:version].to_sym : :rdfa_1_1
+ @host_language = options[:host_language] || :xhtml
+
@doc = case input
when Nokogiri::HTML::Document then input
when Nokogiri::XML::Document then input
else Nokogiri::XML.parse(input, @base_uri.to_s)
end
- raise RDF::ReaderError, "Synax errors:\n#{@doc.errors}" if !@doc.errors.empty? && @strict
- raise RDF::ReaderError, "Empty document" if (@doc.nil? || @doc.root.nil?) && @strict
+ add_error(nil, "Empty document", RDF::RDFA.HostLanguageMarkupError) if (@doc.nil? || @doc.root.nil?)
+ add_warning(nil, "Synax errors:\n#{@doc.errors}", RDF::RDFA.HostLanguageMarkupError) unless @doc.errors.empty?
+
block.call(self) if block_given?
end
end
##
@@ -129,20 +184,15 @@
# @yieldparam [RDF::Statement] statement
# @return [void]
def each_statement(&block)
@callback = block
- # Determine host language
- # XXX - right now only XHTML defined
- @host_language = case @doc.root.attributes["version"].to_s
- when /XHTML+RDFa/ then :xhtml
- end
-
- # If none found, assume xhtml
- @host_language ||= :xhtml
-
- @host_defaults = {}
+ # Section 4.2 RDFa Host Language Conformance
+ #
+ # The Host Language may define a default RDFa Profile. If it does, the RDFa Profile triples that establish term or
+ # URI mappings associated with that profile must not change without changing the profile URI. RDFa Processors may
+ # embed, cache, or retrieve the RDFa Profile triples associated with that profile.
@host_defaults = case @host_language
when :xhtml
{
:vocabulary => RDF::XHV.to_s,
:prefix => "xhv",
@@ -154,10 +204,14 @@
}
else
{}
end
+ @host_defaults.delete(:vocabulary) if @version == :rdfa_1_0
+
+ add_debug(@doc, "version = #{@version}, host_language = #{@host_language}")
+
# parse
parse_whole_document(@doc, @base_uri)
end
##
@@ -183,36 +237,61 @@
end
# Figure out the document path, if it is a Nokogiri::XML::Element or Attribute
def node_path(node)
case node
- when Nokogiri::XML::Element, Nokogiri::XML::Attr then "#{node_path(node.parent)}/#{node.name}"
- when String then node
- else ""
+ when Nokogiri::XML::Node then node.display_path
+ else node.to_s
end
end
# Add debug event to debug array, if specified
#
# @param [XML Node, any] node:: XML Node or string for showing context
# @param [String] message::
def add_debug(node, message)
- puts "#{node_path(node)}: #{message}" if $DEBUG
- @debug << "#{node_path(node)}: #{message}" if @debug.is_a?(Array)
+ add_processor_message(node, message, RDF::RDFA.InformationalMessage)
end
+ def add_info(node, message, process_class = RDF::RDFA.InformationalMessage)
+ add_processor_message(node, message, process_class)
+ end
+
+ def add_warning(node, message, process_class = RDF::RDFA.MiscellaneousWarning)
+ add_processor_message(node, message, process_class)
+ end
+
+ def add_error(node, message, process_class = RDF::RDFA.MiscellaneousError)
+ add_processor_message(node, message, process_class)
+ raise ParserException, message if @strict
+ end
+
+ def add_processor_message(node, message, process_class)
+ puts "#{node_path(node)}: #{message}" if ::RDF::RDFa::debug?
+ @debug << "#{node_path(node)}: #{message}" if @debug.is_a?(Array)
+ if @processor_graph
+ @processor_sequence ||= 0
+ n = RDF::Node.new
+ @processor_graph << RDF::Statement.new(n, RDF["type"], process_class)
+ @processor_graph << RDF::Statement.new(n, RDF::DC.description, message)
+ @processor_graph << RDF::Statement.new(n, RDF::DC.date, RDF::Literal::Date.new(DateTime.now.to_date))
+ @processor_graph << RDF::Statement.new(n, RDF::RDFA.sequence, RDF::Literal::Integer.new(@processor_sequence += 1))
+ @processor_graph << RDF::Statement.new(n, RDF::RDFA.source, node_path(node))
+ end
+ end
+
# add a statement, object can be literal or URI or bnode
#
# @param [Nokogiri::XML::Node, any] node:: XML Node or string for showing context
# @param [URI, BNode] subject:: the subject of the statement
# @param [URI] predicate:: the predicate of the statement
# @param [URI, BNode, Literal] object:: the object of the statement
# @return [Statement]:: Added statement
# @raise [ReaderError]:: Checks parameter types and raises if they are incorrect if parsing mode is _strict_.
def add_triple(node, subject, predicate, object)
statement = RDF::Statement.new(subject, predicate, object)
- add_debug(node, "statement: #{statement}")
+ add_debug(node, "statement: #{statement.to_ntriples}")
@callback.call(statement)
end
# Parsing an RDFa document (this is *not* the recursive method)
@@ -227,103 +306,100 @@
@base_uri = RDF::URI.intern(base)
add_debug(base_el, "parse_whole_doc: base='#{base}'")
end
# initialize the evaluation context with the appropriate base
- evaluation_context = EvaluationContext.new(base, @host_defaults)
+ evaluation_context = EvaluationContext.new(@base_uri, @host_defaults)
traverse(doc.root, evaluation_context)
end
- # Extract the XMLNS mappings from an element
- def extract_mappings(element, uri_mappings, term_mappings)
- # Process @profile
- # Next the current element is parsed for any updates to the local term mappings and
- # local list of URI mappings via @profile.
- # If @profile is present, its value is processed as defined in RDFa Profiles.
- element.attributes['profile'].to_s.split(/\s/).each do |profile|
- if node_path(element) == "/html/head"
- # Don't try to open ourselves!
- add_debug(element, "extract_mappings: skip head profile <#{profile}>")
- next
- elsif @@vocabulary_cache[profile]
- add_debug(element, "extract_mappings: cached profile <#{profile}>")
- @@vocabulary_cache[profile]
- elsif @base_uri.to_s == profile
- # Don't try to open ourselves!
- add_debug(element, "extract_mappings: skip recursive profile <#{profile}>")
- next
+ # Parse and process URI mappings, Term mappings and a default vocabulary from @profile
+ #
+ # Yields each mapping
+ def process_profile(element)
+ element.attributes['profile'].to_s.split(/\s/).reverse.each do |profile|
+ # Don't try to open ourselves!
+ if @uri == profile
+ add_debug(element, "process_profile: skip recursive profile <#{profile}>")
elsif @@vocabulary_cache.has_key?(profile)
- add_debug(element, "extract_mappings: skip previously parsed profile <#{profile}>")
+ add_debug(element, "process_profile: skip previously parsed profile <#{profile}>")
else
begin
- add_debug(element, "extract_mappings: parse profile <#{profile}>")
@@vocabulary_cache[profile] = {
:uri_mappings => {},
- :term_mappings => {}
+ :term_mappings => {},
+ :default_vocabulary => nil
}
um = @@vocabulary_cache[profile][:uri_mappings]
tm = @@vocabulary_cache[profile][:term_mappings]
- add_debug(element, "extract_mappings: profile open <#{profile}>")
-
- old_debug, old_verbose, = $DEBUG, $verbose
- $DEBUG, $verbose = false, false
- # FIXME: format shouldn't need to be specified here
- p_graph = RDF::Graph.load(profile, :base_uri => profile, :format => RDF::Format.for(profile) || :rdfa)
- puts p_graph.inspect if old_debug
- $DEBUG, $verbose = old_debug, old_verbose
- p_graph.each_subject do |subject|
- # If one of the objects is not a Literal no mapping is created.
+ add_debug(element, "process_profile: parse profile <#{profile}>")
+
+ # Parse profile, and extract mappings from graph
+ old_debug, old_verbose, = ::RDF::RDFa::debug?, $verbose
+ ::RDF::RDFa::debug, $verbose = false, false
+ # Fixme, RDF isn't smart enough to figure this out from MIME-Type
+ load_opts = {:base_uri => profile}
+ load_opts[:format] = :rdfa unless RDF::Format.for(:file_name => profile)
+ p_graph = RDF::Graph.load(profile, load_opts)
+ ::RDF::RDFa::debug, $verbose = old_debug, old_verbose
+ p_graph.subjects.each do |subject|
+ # If one of the objects is not a Literal or if there are additional rdfa:uri or rdfa:term
+ # predicates sharing the same subject, no mapping is created.
uri = p_graph.first_object([subject, RDF::RDFA['uri'], nil])
term = p_graph.first_object([subject, RDF::RDFA['term'], nil])
prefix = p_graph.first_object([subject, RDF::RDFA['prefix'], nil])
- add_debug(element, "extract_mappings: uri=#{uri.inspect}, term=#{term.inspect}, prefix=#{prefix.inspect}")
+ vocab = p_graph.first_object([subject, RDF::RDFA['vocabulary'], nil])
+ add_debug(element, "process_profile: uri=#{uri.inspect}, term=#{term.inspect}, prefix=#{prefix.inspect}, vocabulary=#{vocab.inspect}")
- next if !uri || (!term && !prefix)
- raise RDF::ReaderError, "rdf:uri must be a Literal" unless uri.is_a?(RDF::Literal)
- raise RDF::ReaderError, "rdf:term must be a Literal" unless term.nil? || term.is_a?(RDF::Literal)
- raise RDF::ReaderError, "rdf:prefix must be a Literal" unless prefix.nil? || prefix.is_a?(RDF::Literal)
-
+ raise RDF::ReaderError, "rdf:uri #{uri.inspect} must be a Literal" unless uri.nil? || uri.is_a?(RDF::Literal)
+ raise RDF::ReaderError, "rdf:term #{term.inspect} must be a Literal" unless term.nil? || term.is_a?(RDF::Literal)
+ raise RDF::ReaderError, "rdf:prefix #{prefix.inspect} must be a Literal" unless prefix.nil? || prefix.is_a?(RDF::Literal)
+ raise RDF::ReaderError, "rdf:vocabulary #{vocab.inspect} must be a Literal" unless vocab.nil? || vocab.is_a?(RDF::Literal)
+
+ @@vocabulary_cache[profile][:default_vocabulary] = vocab.value if vocab
+
# For every extracted triple that is the common subject of an rdfa:prefix and an rdfa:uri
# predicate, create a mapping from the object literal of the rdfa:prefix predicate to the
# object literal of the rdfa:uri predicate. Add or update this mapping in the local list of
# URI mappings after transforming the 'prefix' component to lower-case.
# For every extracted
- um[prefix.value.downcase] = uri.value if prefix
+ um[prefix.value.downcase] = uri.value if prefix && prefix.value != "_"
# triple that is the common subject of an rdfa:term and an rdfa:uri predicate, create a
# mapping from the object literal of the rdfa:term predicate to the object literal of the
# rdfa:uri predicate. Add or update this mapping in the local term mappings.
- tm[term.value] = RDF::URI.intern(uri.value) if term
+ tm[term.value.downcase] = RDF::URI.intern(uri.value) if term
end
- # FIXME: subject isn't in scope here
- #rescue RDF::ReaderError
- # add_debug(element, "extract_mappings: profile subject #{subject.to_s}: #{e.message}")
- # raise if @strict
- rescue RuntimeError => e
- add_debug(element, "extract_mappings: profile: #{e.message}")
- raise if @strict
+ rescue RDF::ReaderError => e
+ add_error(element, e.message, RDF::RDFA.ProfileReferenceError)
+ raise # Incase we're not in strict mode, we need to be sure processing stops
end
end
-
- # Merge mappings from this vocabulary
- uri_mappings.merge!(@@vocabulary_cache[profile][:uri_mappings])
- term_mappings.merge!(@@vocabulary_cache[profile][:term_mappings])
+ profile_mappings = @@vocabulary_cache[profile]
+ yield :uri_mappings, profile_mappings[:uri_mappings] unless profile_mappings[:uri_mappings].empty?
+ yield :term_mappings, profile_mappings[:term_mappings] unless profile_mappings[:term_mappings].empty?
+ yield :default_vocabulary, profile_mappings[:default_vocabulary] if profile_mappings[:default_vocabulary]
end
-
+ end
+
+ # Extract the XMLNS mappings from an element
+ def extract_mappings(element, uri_mappings, term_mappings)
# look for xmlns
# (note, this may be dependent on @host_language)
# Regardless of how the mapping is declared, the value to be mapped must be converted to lower case,
# and the URI is not processed in any way; in particular if it is a relative path it is
# not resolved against the current base.
- element.namespaces.each do |attr_name, attr_value|
- begin
- abbr, prefix = attr_name.split(":")
- uri_mappings[prefix.to_s.downcase] = attr_value.to_s if abbr.downcase == "xmlns" && prefix
- rescue ReaderError => e
- add_debug(element, "extract_mappings raised #{e.class}: #{e.message}")
- raise if @strict
+ element.namespace_definitions.each do |ns|
+ # A Conforming RDFa Processor must ignore any definition of a mapping for the '_' prefix.
+ next if ns.prefix == "_"
+
+ # Downcase prefix for RDFa 1.1
+ pfx_lc = (@version == :rdfa_1_0 || ns.prefix.nil?) ? ns.prefix : ns.prefix.to_s.downcase
+ if ns.prefix
+ uri_mappings[pfx_lc] = ns.href
+ add_debug(element, "extract_mappings: xmlns:#{ns.prefix} => <#{ns.href}>")
end
end
# Set mappings from @prefix
# prefix is a whitespace separated list of prefix-name URI pairs of the form
@@ -333,15 +409,16 @@
prefix, uri = mappings.shift.downcase, mappings.shift
#puts "uri_mappings prefix #{prefix} <#{uri}>"
next unless prefix.match(/:$/)
prefix.chop!
+ # A Conforming RDFa Processor must ignore any definition of a mapping for the '_' prefix.
+ next if prefix == "_"
+
uri_mappings[prefix] = uri
- end
-
- add_debug(element, "uri_mappings: #{uri_mappings.map{|k,v|"#{k}='#{v}'"}.join(", ")}")
- add_debug(element, "term_mappings: #{term_mappings.map{|k,v|"#{k}='#{v}'"}.join(", ")}")
+ add_debug(element, "extract_mappings: prefix #{prefix} => <#{uri}>")
+ end unless @version == :rdfa_1_0
end
# The recursive helper function
def traverse(element, evaluation_context)
if element.nil?
@@ -350,11 +427,11 @@
return nil
end
add_debug(element, "traverse, ec: #{evaluation_context.inspect}")
- # local variables [5.5 Step 1]
+ # local variables [7.5 Step 1]
recurse = true
skip = false
new_subject = nil
current_object_resource = nil
uri_mappings = evaluation_context.uri_mappings.clone
@@ -373,34 +450,57 @@
resource = attrs['resource']
href = attrs['href']
vocab = attrs['vocab']
# Pull out the attributes needed for the skip test.
- property = attrs['property'].to_s if attrs['property']
- typeof = attrs['typeof'].to_s if attrs['typeof']
+ property = attrs['property'].to_s.strip if attrs['property']
+ typeof = attrs['typeof'].to_s.strip if attrs['typeof']
datatype = attrs['datatype'].to_s if attrs['datatype']
content = attrs['content'].to_s if attrs['content']
- rel = attrs['rel'].to_s if attrs['rel']
- rev = attrs['rev'].to_s if attrs['rev']
+ rel = attrs['rel'].to_s.strip if attrs['rel']
+ rev = attrs['rev'].to_s.strip if attrs['rev']
- # Default vocabulary [7.5 Step 2]
- # First the current element is examined for any change to the default vocabulary via @vocab.
+ # Local term mappings [7.5 Steps 2]
+ # Next the current element is parsed for any updates to the local term mappings and local list of URI mappings via @profile.
+ # If @profile is present, its value is processed as defined in RDFa Profiles.
+ unless @version == :rdfa_1_0
+ begin
+ process_profile(element) do |which, value|
+ add_debug(element, "[Step 2] traverse, #{which}: #{value.inspect}")
+ case which
+ when :uri_mappings then uri_mappings.merge!(value)
+ when :term_mappings then term_mappings.merge!(value)
+ when :default_vocabulary then default_vocabulary = value
+ end
+ end
+ rescue
+ # Skip this element and all sub-elements
+ # If any referenced RDFa Profile is not available, then the current element and its children must not place any
+ # triples in the default graph .
+ raise if @strict
+ return
+ end
+ end
+
+ # Default vocabulary [7.5 Step 3]
+ # Next the current element is examined for any change to the default vocabulary via @vocab.
# If @vocab is present and contains a value, its value updates the local default vocabulary.
# If the value is empty, then the local default vocabulary must be reset to the Host Language defined default.
unless vocab.nil?
default_vocabulary = if vocab.to_s.empty?
# Set default_vocabulary to host language default
- @host_defaults.fetch(:voabulary, nil)
+ add_debug(element, "[Step 2] traverse, reset default_vocaulary to #{@host_defaults.fetch(:vocabulary, nil).inspect}")
+ @host_defaults.fetch(:vocabulary, nil)
else
- vocab.to_s
+ RDF::URI.intern(vocab)
end
add_debug(element, "[Step 2] traverse, default_vocaulary: #{default_vocabulary.inspect}")
end
- # Local term mappings [7.5 Steps 3 & 4]
- # Next the current element is parsed for any updates to the local term mappings and local list of URI mappings via @profile.
- # If @profile is present, its value is processed as defined in RDFa Profiles.
+ # Local term mappings [7.5 Steps 4]
+ # Next, the current element is then examined for URI mapping s and these are added to the local list of URI mappings.
+ # Note that a URI mapping will simply overwrite any current mapping in the list that has the same name
extract_mappings(element, uri_mappings, term_mappings)
# Language information [7.5 Step 5]
# From HTML5 [3.2.3.3]
# If both the lang attribute in no namespace and the lang attribute in the XML namespace are set
@@ -417,94 +517,110 @@
end
language = nil if language.to_s.empty?
add_debug(element, "HTML5 [3.2.3.3] traverse, lang: #{language || 'nil'}") if attrs['lang']
# rels and revs
- rels = process_uris(element, rel, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
- revs = process_uris(element, rev, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
+ rels = process_uris(element, rel, evaluation_context,
+ :uri_mappings => uri_mappings,
+ :term_mappings => term_mappings,
+ :vocab => default_vocabulary,
+ :restrictions => TERMorCURIEorAbsURI[@version])
+ revs = process_uris(element, rev, evaluation_context,
+ :uri_mappings => uri_mappings,
+ :term_mappings => term_mappings,
+ :vocab => default_vocabulary,
+ :restrictions => TERMorCURIEorAbsURI[@version])
add_debug(element, "traverse, about: #{about.nil? ? 'nil' : about}, src: #{src.nil? ? 'nil' : src}, resource: #{resource.nil? ? 'nil' : resource}, href: #{href.nil? ? 'nil' : href}")
add_debug(element, "traverse, property: #{property.nil? ? 'nil' : property}, typeof: #{typeof.nil? ? 'nil' : typeof}, datatype: #{datatype.nil? ? 'nil' : datatype}, content: #{content.nil? ? 'nil' : content}")
add_debug(element, "traverse, rels: #{rels.join(" ")}, revs: #{revs.join(" ")}")
if !(rel || rev)
# Establishing a new subject if no rel/rev [7.5 Step 6]
# May not be valid, but can exist
- if about
- new_subject = process_uri(element, about, evaluation_context, :uri_mappings => uri_mappings)
+ new_subject = if about
+ process_uri(element, about, evaluation_context,
+ :uri_mappings => uri_mappings,
+ :restrictions => SafeCURIEorCURIEorURI[@version])
elsif src
- new_subject = process_uri(element, src, evaluation_context)
+ process_uri(element, src, evaluation_context, :restrictions => [:uri])
elsif resource
- new_subject = process_uri(element, resource, evaluation_context, :uri_mappings => uri_mappings)
+ process_uri(element, resource, evaluation_context,
+ :uri_mappings => uri_mappings,
+ :restrictions => SafeCURIEorCURIEorURI[@version])
elsif href
- new_subject = process_uri(element, href, evaluation_context)
+ process_uri(element, href, evaluation_context, :restrictions => [:uri])
end
# If no URI is provided by a resource attribute, then the first match from the following rules
# will apply:
# if @typeof is present, then new subject is set to be a newly created bnode.
# otherwise,
# if parent object is present, new subject is set to the value of parent object.
# Additionally, if @property is not present then the skip element flag is set to 'true';
- if new_subject.nil?
- if @host_language == :xhtml && element.name =~ /^(head|body)$/ && evaluation_context.base
- # From XHTML+RDFa 1.1:
- # if no URI is provided, then first check to see if the element is the head or body element.
- # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
- new_subject = RDF::URI.intern(evaluation_context.base)
- elsif element.attributes['typeof']
- new_subject = RDF::Node.new
- else
- # if it's null, it's null and nothing changes
- new_subject = evaluation_context.parent_object
- skip = true unless property
- end
+ new_subject ||= if @host_language == :xhtml && element.name =~ /^(head|body)$/ && evaluation_context.base
+ # From XHTML+RDFa 1.1:
+ # if no URI is provided, then first check to see if the element is the head or body element.
+ # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
+ evaluation_context.base
+ elsif element.attributes['typeof']
+ RDF::Node.new
+ else
+ # if it's null, it's null and nothing changes
+ skip = true unless property
+ evaluation_context.parent_object
end
add_debug(element, "[Step 6] new_subject: #{new_subject}, skip = #{skip}")
else
# [7.5 Step 7]
# If the current element does contain a @rel or @rev attribute, then the next step is to
# establish both a value for new subject and a value for current object resource:
- if about
- new_subject = process_uri(element, about, evaluation_context, :uri_mappings => uri_mappings)
- elsif src
- new_subject = process_uri(element, src, evaluation_context, :uri_mappings => uri_mappings)
- end
+ new_subject = process_uri(element, about, evaluation_context,
+ :uri_mappings => uri_mappings,
+ :restrictions => SafeCURIEorCURIEorURI[@version]) ||
+ process_uri(element, src, evaluation_context,
+ :uri_mappings => uri_mappings,
+ :restrictions => [:uri])
# If no URI is provided then the first match from the following rules will apply
- if new_subject.nil?
- if @host_language == :xhtml && element.name =~ /^(head|body)$/
- # From XHTML+RDFa 1.1:
- # if no URI is provided, then first check to see if the element is the head or body element.
- # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
- new_subject = RDF::URI.intern(evaluation_context.base)
- elsif element.attributes['typeof']
- new_subject = RDF::Node.new
- else
- # if it's null, it's null and nothing changes
- new_subject = evaluation_context.parent_object
- # no skip flag set this time
- end
+ new_subject ||= if @host_language == :xhtml && element.name =~ /^(head|body)$/
+ # From XHTML+RDFa 1.1:
+ # if no URI is provided, then first check to see if the element is the head or body element.
+ # If it is, then act as if there is an empty @about present, and process it according to the rule for @about.
+ evaluation_context.base
+ elsif element.attributes['typeof']
+ RDF::Node.new
+ else
+ # if it's null, it's null and nothing changes
+ evaluation_context.parent_object
+ # no skip flag set this time
end
# Then the current object resource is set to the URI obtained from the first match from the following rules:
- if resource
- current_object_resource = process_uri(element, resource, evaluation_context, :uri_mappings => uri_mappings)
+ current_object_resource = if resource
+ process_uri(element, resource, evaluation_context,
+ :uri_mappings => uri_mappings,
+ :restrictions => SafeCURIEorCURIEorURI[@version])
elsif href
- current_object_resource = process_uri(element, href, evaluation_context)
+ process_uri(element, href, evaluation_context,
+ :restrictions => [:uri])
end
add_debug(element, "[Step 7] new_subject: #{new_subject}, current_object_resource = #{current_object_resource.nil? ? 'nil' : current_object_resource}")
end
# Process @typeof if there is a subject [Step 8]
if new_subject and typeof
- # Typeof is TERMorCURIEorURIs
- types = process_uris(element, typeof, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
+ # Typeof is TERMorCURIEorAbsURIs
+ types = process_uris(element, typeof, evaluation_context,
+ :uri_mappings => uri_mappings,
+ :term_mappings => term_mappings,
+ :vocab => default_vocabulary,
+ :restrictions => TERMorCURIEorAbsURI[@version])
add_debug(element, "typeof: #{typeof}")
types.each do |one_type|
- add_triple(element, new_subject, RDF.type, one_type)
+ add_triple(element, new_subject, RDF["type"], one_type)
end
end
# Generate triples with given object [Step 9]
if current_object_resource
@@ -529,34 +645,64 @@
end
end
# Establish current object literal [Step 11]
if property
- properties = process_uris(element, property, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
+ properties = process_uris(element, property, evaluation_context,
+ :uri_mappings => uri_mappings,
+ :term_mappings => term_mappings,
+ :vocab => default_vocabulary,
+ :restrictions => TERMorCURIEorAbsURIprop[@version])
+ properties.reject! do |p|
+ if p.is_a?(RDF::URI)
+ false
+ else
+ add_debug(element, "Illegal predicate: #{p.inspect}")
+ raise RDF::ReaderError, "predicate #{p.inspect} must be a URI" if @strict
+ true
+ end
+ end
+
# get the literal datatype
- type = datatype
children_node_types = element.children.collect{|c| c.class}.uniq
# the following 3 IF clauses should be mutually exclusive. Written as is to prevent extensive indentation.
- type_resource = process_uri(element, type, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary) if type
- if type and !type.empty? and (type_resource.to_s != RDF.XMLLiteral.to_s)
+ datatype = process_uri(element, datatype, evaluation_context,
+ :uri_mappings => uri_mappings,
+ :term_mappings => term_mappings,
+ :vocab => default_vocabulary,
+ :restrictions => TERMorCURIEorAbsURI[@version]) unless datatype.to_s.empty?
+ current_object_literal = if !datatype.to_s.empty? && datatype.to_s != RDF.XMLLiteral.to_s
# typed literal
- add_debug(element, "[Step 11] typed literal")
- current_object_literal = RDF::Literal.new(content || element.inner_text.to_s, :datatype => type_resource, :language => language)
- elsif content or (children_node_types == [Nokogiri::XML::Text]) or (element.children.length == 0) or (type == '')
- # plain literal
- add_debug(element, "[Step 11] plain literal")
- current_object_literal = RDF::Literal.new(content || element.inner_text.to_s, :language => language)
- elsif children_node_types != [Nokogiri::XML::Text] and (type == nil or type_resource.to_s == RDF.XMLLiteral.to_s)
- # XML Literal
- add_debug(element, "[Step 11] XML Literal: #{element.inner_html}")
- current_object_literal = RDF::Literal.new(element.inner_html, :datatype => RDF.XMLLiteral, :language => language, :namespaces => uri_mappings.merge("" => "http://www.w3.org/1999/xhtml"))
- recurse = false
+ add_debug(element, "[Step 11] typed literal (#{datatype})")
+ RDF::Literal.new(content || element.inner_text.to_s, :datatype => datatype, :language => language)
+ elsif @version == :rdfa_1_1
+ if datatype.to_s == RDF.XMLLiteral.to_s
+ # XML Literal
+ add_debug(element, "[Step 11(1.1)] XML Literal: #{element.inner_html}")
+ recurse = false
+ RDF::Literal.new(element.inner_html, :datatype => RDF.XMLLiteral, :language => language, :namespaces => uri_mappings.merge("" => "http://www.w3.org/1999/xhtml"))
+ else
+ # plain literal
+ add_debug(element, "[Step 11(1.1)] plain literal")
+ RDF::Literal.new(content || element.inner_text.to_s, :language => language)
+ end
+ else
+ if content || (children_node_types == [Nokogiri::XML::Text]) || (element.children.length == 0) || datatype == ""
+ # plain literal
+ add_debug(element, "[Step 11 (1.0)] plain literal")
+ RDF::Literal.new(content || element.inner_text.to_s, :language => language)
+ elsif children_node_types != [Nokogiri::XML::Text] and (datatype == nil or datatype.to_s == RDF.XMLLiteral.to_s)
+ # XML Literal
+ add_debug(element, "[Step 11 (1.0)] XML Literal: #{element.inner_html}")
+ recurse = false
+ RDF::Literal.new(element.inner_html, :datatype => RDF.XMLLiteral, :language => language, :namespaces => uri_mappings.merge("" => "http://www.w3.org/1999/xhtml"))
+ end
end
-
- # add each property
+
+ # add each property
properties.each do |p|
add_triple(element, new_subject, p, current_object_literal)
end
# SPEC CONFUSION: "the triple has been created" ==> there may be more than one
# set the recurse flag above in the IF about xmlliteral, as it is the only place that can happen
@@ -609,46 +755,68 @@
traverse(child, new_ec) if child.class == Nokogiri::XML::Element
end
end
end
- # space-separated TERMorCURIEorURI
+ # space-separated TERMorCURIEorAbsURI or SafeCURIEorCURIEorURI
def process_uris(element, value, evaluation_context, options)
return [] if value.to_s.empty?
add_debug(element, "process_uris: #{value}")
value.to_s.split(/\s+/).map {|v| process_uri(element, v, evaluation_context, options)}.compact
end
def process_uri(element, value, evaluation_context, options = {})
- #return if value.to_s.empty?
- #add_debug(element, "process_uri: #{value}")
+ return if value.nil?
+ restrictions = options[:restrictions]
+ add_debug(element, "process_uri: #{value}, restrictions = #{restrictions.inspect}")
options = {:uri_mappings => {}}.merge(options)
- if !options[:term_mappings] && options[:uri_mappings] && value.to_s.match(/^\[(.*)\]$/)
+ if !options[:term_mappings] && options[:uri_mappings] && value.to_s.match(/^\[(.*)\]$/) && restrictions.include?(:safe_curie)
# SafeCURIEorCURIEorURI
# When the value is surrounded by square brackets, then the content within the brackets is
# evaluated as a CURIE according to the CURIE Syntax definition. If it is not a valid CURIE, the
# value must be ignored.
- uri = curie_to_resource_or_bnode(element, $1, options[:uri_mappings], evaluation_context.parent_subject)
+ uri = curie_to_resource_or_bnode(element, $1, options[:uri_mappings], evaluation_context.parent_subject, restrictions)
add_debug(element, "process_uri: #{value} => safeCURIE => <#{uri}>")
uri
- elsif options[:term_mappings] && NC_REGEXP.match(value.to_s)
- # TERMorCURIEorURI
+ elsif options[:term_mappings] && NC_REGEXP.match(value.to_s) && restrictions.include?(:term)
+ # TERMorCURIEorAbsURI
# If the value is an NCName, then it is evaluated as a term according to General Use of Terms in
# Attributes. Note that this step may mean that the value is to be ignored.
- uri = process_term(value.to_s, options)
+ uri = process_term(element, value.to_s, options)
add_debug(element, "process_uri: #{value} => term => <#{uri}>")
uri
else
- # SafeCURIEorCURIEorURI or TERMorCURIEorURI
+ # SafeCURIEorCURIEorURI or TERMorCURIEorAbsURI
# Otherwise, the value is evaluated as a CURIE.
# If it is a valid CURIE, the resulting URI is used; otherwise, the value will be processed as a URI.
- uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject)
+ uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject, restrictions)
if uri
add_debug(element, "process_uri: #{value} => CURIE => <#{uri}>")
- else
- ## FIXME: throw exception if there is no base uri set?
- uri = RDF::URI.intern(RDF::URI.intern(evaluation_context.base).join(value))
+ elsif @version == :rdfa_1_0 && value.to_s.match(/^xml/i)
+ # Special case to not allow anything starting with XML to be treated as a URI
+ elsif restrictions.include?(:absuri) || restrictions.include?(:uri)
+ begin
+ # AbsURI does not use xml:base
+ if restrictions.include?(:absuri)
+ uri = RDF::URI.intern(value)
+ unless uri.absolute?
+ uri = nil
+ raise RDF::ReaderError, "Relative URI #{value}"
+ end
+ else
+ uri = evaluation_context.base.join(Addressable::URI.parse(value))
+ end
+ rescue Addressable::URI::InvalidURIError => e
+ add_warning(element, "Malformed prefix #{value}", RDF::RDFA.UndefinedPrefixError)
+ rescue RDF::ReaderError => e
+ add_debug(element, e.message)
+ if value.to_s =~ /^\(^\w\):/
+ add_warning(element, "Undefined prefix #{$1}", RDF::RDFA.UndefinedPrefixError)
+ else
+ add_warning(element, "Relative URI #{value}")
+ end
+ end
add_debug(element, "process_uri: #{value} => URI => <#{uri}>")
end
uri
end
end
@@ -657,49 +825,57 @@
#
# @param [String] term:: term
# @param [Hash] options:: Parser options, one of
# <em>options[:term_mappings]</em>:: Term mappings
# <em>options[:vocab]</em>:: Default vocabulary
- def process_term(value, options)
+ def process_term(element, value, options)
case
when options[:term_mappings].is_a?(Hash) && options[:term_mappings].has_key?(value.to_s.downcase)
# If the term is in the local term mappings, use the associated URI.
# XXX Spec Confusion: are terms always downcased? Or only for XHTML Vocab?
options[:term_mappings][value.to_s.downcase]
when options[:vocab]
# Otherwise, if there is a local default vocabulary the URI is obtained by concatenating that value and the term.
RDF::URI.intern(options[:vocab] + value)
else
# Finally, if there is no local default vocabulary, the term has no associated URI and must be ignored.
+ add_warning(element, "Term #{value} is not defined", RDF::RDFA.UndefinedTermError)
nil
end
end
# From section 6. CURIE Syntax Definition
- def curie_to_resource_or_bnode(element, curie, uri_mappings, subject)
+ def curie_to_resource_or_bnode(element, curie, uri_mappings, subject, restrictions)
# URI mappings for CURIEs default to XHV, rather than the default doc namespace
prefix, reference = curie.to_s.split(":")
# consider the bnode situation
- if prefix == "_"
+ if prefix == "_" && restrictions.include?(:bnode)
+ # we force a non-nil name, otherwise it generates a new name
+ # As a special case, _: is also a valid reference for one specific bnode.
bnode(reference)
elsif curie.to_s.match(/^:/)
+ add_debug(element, "curie_to_resource_or_bnode: default prefix: defined? #{!!uri_mappings[""]}, defaults: #{@host_defaults[:prefix]}")
# Default prefix
if uri_mappings[""]
RDF::URI.intern(uri_mappings[""] + reference.to_s)
elsif @host_defaults[:prefix]
RDF::URI.intern(uri_mappings[@host_defaults[:prefix]] + reference.to_s)
+ else
+ #add_warning(element, "Default namespace prefix is not defined", RDF::RDFA.UndefinedPrefixError)
+ nil
end
elsif !curie.to_s.match(/:/)
# No prefix, undefined (in this context, it is evaluated as a term elsewhere)
nil
else
# Prefixes always downcased
- ns = uri_mappings[prefix.to_s.downcase]
+ prefix = prefix.to_s.downcase unless @version == :rdfa_1_0
+ ns = uri_mappings[prefix.to_s]
if ns
RDF::URI.intern(ns + reference.to_s)
else
- add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix.downcase}")
+ #add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix}")
nil
end
end
end
end
\ No newline at end of file