begin raise LoadError, "not with java" if RUBY_PLATFORM == "java" require 'nokogiri' rescue LoadError => e :rexml end require 'rdf/ntriples' require 'rdf/xsd' module RDF::RDFa ## # An RDFa parser in Ruby # # This class supports [Nokogiri][] for HTML # processing, and will automatically select the most performant # implementation (Nokogiri or LibXML) that is available. If need be, you # can explicitly override the used implementation by passing in a # `:library` option to `Reader.new` or `Reader.open`. # # [Nokogiri]: http://nokogiri.org/ # # Based on processing rules described here: # @see http://www.w3.org/TR/rdfa-syntax/#s_model RDFa 1.0 # @see http://www.w3.org/TR/2012/CR-rdfa-core-20120313/ # @see http://www.w3.org/TR/2012/CR-xhtml-rdfa-20120313/ # @see http://dev.w3.org/html5/rdfa/ # # @author [Gregg Kellogg](http://kellogg-assoc.com/) class Reader < RDF::Reader format Format include Expansion XHTML = "http://www.w3.org/1999/xhtml" # Content model for @about and @resource. In RDFa 1.0, this was URIorSafeCURIE SafeCURIEorCURIEorIRI = { :"rdfa1.0" => [:safe_curie, :uri, :bnode], :"rdfa1.1" => [:safe_curie, :curie, :uri, :bnode], } # Content model for @datatype. In RDFa 1.0, this was CURIE # Also plural TERMorCURIEorAbsIRIs, content model for @rel, @rev, @property and @typeof TERMorCURIEorAbsIRI = { :"rdfa1.0" => [:term, :curie], :"rdfa1.1" => [:term, :curie, :absuri], } # This expression matches an NCName as defined in # [XML-NAMES](http://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName) # # @see http://www.w3.org/TR/2009/REC-xml-names-20091208/#NT-NCName NC_REGEXP = Regexp.new( %{^ ( [a-zA-Z_] | \\\\u[0-9a-fA-F]{4} ) ( [0-9a-zA-Z_\.-/] | \\\\u([0-9a-fA-F]{4}) )* $}, Regexp::EXTENDED) # This expression matches an term as defined in # [RDFA-CORE](# @see http://www.w3.org/TR/2012/CR-rdfa-core-20120313/#s_terms) # # For the avoidance of doubt, this definition means a 'term' # in RDFa is an XML NCName that also permits slash as a non-leading character. # @see http://www.w3.org/TR/2012/CR-rdfa-core-20120313/#s_terms TERM_REGEXP = Regexp.new( %{^ (?!\\\\u0301) # ́ is a non-spacing acute accent. # It is legal within an XML Name, but not as the first character. ( [a-zA-Z_] | \\\\u[0-9a-fA-F]{4} ) ( [0-9a-zA-Z_\.-\/] | \\\\u([0-9a-fA-F]{4}) )* $}, Regexp::EXTENDED) # Host language # @attr [:xml, :xhtml1, :xhtml5, :html4, :html5, :svg] attr_reader :host_language # Version # @attr [:"rdfa1.0", :"rdfa1.1"] attr_reader :version # The Recursive Baggage # @private class EvaluationContext # :nodoc: ## # The base. # # This will usually be the URL of the document being processed, # but it could be some other URL, set by some other mechanism, # such as the (X)HTML base element. The important thing is that it establishes # a URL against which relative paths can be resolved. # # @attr [RDF::URI] attr :base, true ## # The parent subject. # # The initial value will be the same as the initial value of base, # but it will usually change during the course of processing. # # @attr [RDF::URI] attr :parent_subject, true ## # The parent object. # # In some situations the object of a statement becomes the subject of any nested statements, # and this property is used to convey this value. # Note that this value may be a bnode, since in some situations a number of nested statements # are grouped together on one bnode. # This means that the bnode must be set in the containing statement and passed down, # and this property is used to convey this value. # # @attr [RDF::URI] attr :parent_object, true ## # A list of current, in-scope URI mappings. # # @attr [Hash{Symbol => String}] attr :uri_mappings, true ## # A list of current, in-scope Namespaces. This is the subset of uri_mappings # which are defined using xmlns. # # @attr [Hash{String => Namespace}] attr :namespaces, true ## # A list of incomplete triples. # # A triple can be incomplete when no object resource # is provided alongside a predicate that requires a resource (i.e., @rel or @rev). # The triples can be completed when a resource becomes available, # which will be when the next subject is specified (part of the process called chaining). # # @attr [Array>] attr :incomplete_triples, true ## # The language. Note that there is no default language. # # @attr [Symbol] attr :language, true ## # The term mappings, a list of terms and their associated URIs. # # This specification does not define an initial list. # Host Languages may define an initial list. # If a Host Language provides an initial list, it should do so via an RDFa Context document. # # @attr [Hash{Symbol => RDF::URI}] attr :term_mappings, true ## # The default vocabulary # # A value to use as the prefix URI when a term is used. # This specification does not define an initial setting for the default vocabulary. # Host Languages may define an initial setting. # # @attr [RDF::URI] attr :default_vocabulary, true ## # lists # # A hash associating lists with properties. # @attr [Hash{RDF::URI => Array}] attr :list_mapping, true # @param [RDF::URI] base # @param [Hash] host_defaults # @option host_defaults [Hash{String => RDF::URI}] :term_mappings Hash of NCName => URI # @option host_defaults [Hash{String => RDF::URI}] :vocabulary Hash of prefix => URI def initialize(base, host_defaults) # Initialize the evaluation context, [5.1] @base = base @parent_subject = @base @parent_object = nil @namespaces = {} @incomplete_triples = [] @language = nil @uri_mappings = host_defaults.fetch(:uri_mappings, {}) @term_mappings = host_defaults.fetch(:term_mappings, {}) @default_vocabulary = host_defaults.fetch(:vocabulary, nil) end # Copy this Evaluation Context # # @param [EvaluationContext] from def initialize_copy(from) # clone the evaluation context correctly @uri_mappings = from.uri_mappings.clone @incomplete_triples = from.incomplete_triples.clone @namespaces = from.namespaces.clone @list_mapping = from.list_mapping # Don't clone end def inspect v = ['base', 'parent_subject', 'parent_object', 'language', 'default_vocabulary'].map do |a| "#{a}=#{self.send(a).inspect}" end v << "uri_mappings[#{uri_mappings.keys.length}]" v << "incomplete_triples[#{incomplete_triples.length}]" v << "term_mappings[#{term_mappings.keys.length}]" v << "lists[#{list_mapping.keys.length}]" if list_mapping v.join(", ") end end # Returns the XML implementation module for this reader instance. # # @attr_reader [Module] attr_reader :implementation ## # Initializes the RDFa reader instance. # # @param [IO, File, String] input # the input stream to read # @param [Hash{Symbol => Object}] options # any additional options (see `RDF::Reader#initialize`) # @option options [Symbol] :library # One of :nokogiri or :rexml. If nil/unspecified uses :nokogiri if available, :rexml otherwise. # @option options [Boolean] :vocab_expansion (false) # whether to perform RDFS expansion on the resulting graph # @option options [:xml, :xhtml1, :xhtml5, :html4, :html5, :svg] :host_language (:html5) # Host Language # @option options [:"rdfa1.0", :"rdfa1.1"] :version (:"rdfa1.1") # Parser version information # @option options [Proc] :processor_callback (nil) # Callback used to provide processor graph triples. # @option options [Array] :rdfagraph ([:output]) # Used to indicate if either or both of the :output or :processor graphs are output. # Value is an array containing on or both of :output or :processor. # @option options [Repository] :vocab_repository (nil) # Repository to save loaded vocabularies. # @option options [Array] :debug # Array to place debug messages # @return [reader] # @yield [reader] `self` # @yieldparam [RDF::Reader] reader # @yieldreturn [void] ignored # @raise [Error]:: Raises RDF::ReaderError if _validate_ def initialize(input = $stdin, options = {}, &block) super do @debug = options[:debug] @options[:rdfagraph] = case @options[:rdfagraph] when String, Symbol then @options[:rdfagraph].to_s.split(',').map(&:strip).map(&:to_sym) when Array then @options[:rdfagraph].map {|o| o.to_s.to_sym} else [] end.select {|o| [:output, :processor].include?(o)} @options[:rdfagraph] << :output if @options[:rdfagraph].empty? @library = case options[:library] when nil # Use Nokogiri when available, and REXML otherwise: (defined?(::Nokogiri) && RUBY_PLATFORM != 'java') ? :nokogiri : :rexml when :nokogiri, :rexml options[:library] else raise ArgumentError.new("expected :rexml or :nokogiri, but got #{options[:library].inspect}") end require "rdf/rdfa/reader/#{@library}" @implementation = case @library when :nokogiri then Nokogiri when :rexml then REXML end self.extend(@implementation) detect_host_language_version(input, options) add_info(@doc, "version = #{@version}, host_language = #{@host_language}, library = #{@library}, rdfagraph = #{@options[:rdfagraph].inspect}, expand = #{@options[:vocab_expansion]}") begin initialize_xml(input, options) rescue add_error(nil, "Malformed document: #{$!.message}") end add_error(nil, "Empty document") if root.nil? add_error(nil, "Syntax errors:\n#{doc_errors}") if !doc_errors.empty? # Section 4.2 RDFa Host Language Conformance # # The Host Language may require the automatic inclusion of one or more Initial Contexts @host_defaults = { :vocabulary => nil, :uri_mappings => {}, :initial_contexts => [], } if @version == :"rdfa1.0" # Add default term mappings @host_defaults[:term_mappings] = %w( alternate appendix bookmark cite chapter contents copyright first glossary help icon index last license meta next p3pv1 prev role section stylesheet subsection start top up ).inject({}) { |hash, term| hash[term] = RDF::XHV[term]; hash } end case @host_language when :xml, :svg @host_defaults[:initial_contexts] = [XML_RDFA_CONTEXT] when :xhtml1 @host_defaults[:initial_contexts] = [XML_RDFA_CONTEXT, XHTML_RDFA_CONTEXT] when :xhtml5, :html4, :html5 @host_defaults[:initial_contexts] = [XML_RDFA_CONTEXT, HTML_RDFA_CONTEXT] end block.call(self) if block_given? end end ## # Iterates the given block for each RDF statement in the input. # # Reads to graph and performs expansion if required. # # @yield [statement] # @yieldparam [RDF::Statement] statement # @return [void] def each_statement(&block) if @options[:vocab_expansion] @options[:vocab_expansion] = false expand.each_statement(&block) @options[:vocab_expansion] = true else @callback = block # Process any saved callbacks (processor graph issues) @saved_callbacks.each {|s| @callback.call(s) } if @saved_callbacks # Add prefix definitions from host defaults @host_defaults[:uri_mappings].each_pair do |prefix, value| prefix(prefix, value) end # parse return unless @root parse_whole_document(@doc, RDF::URI(base_uri)) def extract_script(el, input, type, options, &block) add_debug(el, "script element of type #{type}") begin # Formats don't exist unless they've been required case type when 'application/rdf+xml' then require 'rdf/rdfxml' when 'text/ntriples' then require 'rdf/ntriples' when 'text/turtle' then require 'text/turtle' end rescue end if reader = RDF::Reader.for(:content_type => type) add_debug(el, "=> reader #{reader.to_sym}") reader.new(input, options).each(&block) end end # Look for Embedded Turtle and RDF/XML unless @root.xpath("//rdf:RDF", "xmlns:rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#").empty? extract_script(@root, @doc, "application/rdf+xml", @options) do |statement| block.call(statement) end end # Look for Embedded scripts @root.css("script[type]").each do |el| type = el.attribute("type") extract_script(el, el.inner_text, type, @options) do |statement| block.call(statement) end end # Look for Embedded microdata unless @root.xpath("//@itemscope").empty? begin require 'rdf/microdata' add_debug(@doc, "process microdata") RDF::Microdata::Reader.new(@doc, options).each(&block) rescue add_debug(@doc, "microdata detected, not processed") end end end end ## # Iterates the given block for each RDF triple in the input. # # @yield [subject, predicate, object] # @yieldparam [RDF::Resource] subject # @yieldparam [RDF::URI] predicate # @yieldparam [RDF::Value] object # @return [void] def each_triple(&block) each_statement do |statement| block.call(*statement.to_triple) end end private # Keep track of allocated BNodes def bnode(value = nil) @bnode_cache ||= {} @bnode_cache[value.to_s] ||= RDF::Node.new(value) end # Figure out the document path, if it is an Element or Attribute def node_path(node) "<#{base_uri}>#{node.respond_to?(:display_path) ? node.display_path : node}" end # Add debug event to debug array, if specified # # @param [#display_path, #to_s] node XML Node or string for showing context # @param [String] message # @yieldreturn [String] appended to message, to allow for lazy-evaulation of message def add_debug(node, message = "") return unless ::RDF::RDFa.debug? || @debug message = message + yield if block_given? add_processor_message(node, message, RDF::RDFA.Info) end def add_info(node, message, process_class = RDF::RDFA.Info) add_processor_message(node, message, process_class) end def add_warning(node, message, process_class = RDF::RDFA.Warning) add_processor_message(node, message, process_class) end def add_error(node, message, process_class = RDF::RDFA.Error) add_processor_message(node, message, process_class) raise RDF::ReaderError, message if validate? end def add_processor_message(node, message, process_class) puts "#{node_path(node)}: #{message}" if ::RDF::RDFa.debug? @debug << "#{node_path(node)}: #{message}" if @debug.is_a?(Array) if @options[:processor_callback] || @options[:rdfagraph].include?(:processor) g = RDF::Graph.new n = RDF::Node.new g << RDF::Statement.new(n, RDF["type"], process_class) g << RDF::Statement.new(n, RDF::DC.description, message) g << RDF::Statement.new(n, RDF::DC.date, RDF::Literal::Date.new(DateTime.now)) g << RDF::Statement.new(n, RDF::RDFA.context, base_uri) if base_uri if node.respond_to?(:path) nc = RDF::Node.new g << RDF::Statement.new(n, RDF::RDFA.context, nc) g << RDF::Statement.new(nc, RDF["type"], RDF::PTR.XPathPointer) g << RDF::Statement.new(nc, RDF::PTR.expression, node.path) end g.each do |s| # Provide as callback @options[:processor_callback].call(s) if @options[:processor_callback] # Yield as result if @options[:rdfagraph].include?(:processor) if @callback @callback.call(s) else # Save messages for later callback @saved_callbacks ||= [] @saved_callbacks << s end end end end end ## # add a statement, object can be literal or URI or bnode # Yields {RDF::Statement} to the saved callback # # @param [#display_path, #to_s] node XML Node or string for showing context # @param [RDF::Resource] subject the subject of the statement # @param [RDF::URI] predicate the predicate of the statement # @param [RDF::Value] object the object of the statement # @param [RDF::Value] context the context of the statement # @raise [RDF::ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_. def add_triple(node, subject, predicate, object, context = nil) statement = RDF::Statement.new(subject, predicate, object) add_info(node, "statement: #{RDF::NTriples.serialize(statement)}") @callback.call(statement) if @options[:rdfagraph].include?(:output) && statement.valid? end # Parsing an RDFa document (this is *not* the recursive method) def parse_whole_document(doc, base) base = doc_base(base) if (base) # Strip any fragment from base base = base.to_s.split("#").first base = uri(base) add_debug("") {"parse_whole_doc: base='#{base}'"} end # initialize the evaluation context with the appropriate base evaluation_context = EvaluationContext.new(base, @host_defaults) if @version != :"rdfa1.0" # Process default vocabularies load_initial_contexts(@host_defaults[:initial_contexts]) do |which, value| add_debug(root) { "parse_whole_document, #{which}: #{value.inspect}"} case which when :uri_mappings then evaluation_context.uri_mappings.merge!(value) when :term_mappings then evaluation_context.term_mappings.merge!(value) when :default_vocabulary then evaluation_context.default_vocabulary = value end end end traverse(root, evaluation_context) add_debug("", "parse_whole_doc: traversal complete'") end # Parse and process URI mappings, Term mappings and a default vocabulary from @context # # Yields each mapping def load_initial_contexts(initial_contexts) initial_contexts. map {|uri| uri(uri).normalize}. each do |uri| # Don't try to open ourselves! if base_uri == uri add_debug(root) {"load_initial_contexts: skip recursive context <#{uri}>"} next end old_debug = RDF::RDFa.debug? begin add_info(root, "load_initial_contexts: load <#{uri}>") RDF::RDFa.debug = false context = Context.find(uri) rescue Exception => e RDF::RDFa.debug = old_debug add_error(root, e.message) raise # In case we're not in strict mode, we need to be sure processing stops ensure RDF::RDFa.debug = old_debug end # Add URI Mappings to prefixes context.prefixes.each_pair do |prefix, value| prefix(prefix, value) end yield :uri_mappings, context.prefixes unless context.prefixes.empty? yield :term_mappings, context.terms unless context.terms.empty? yield :default_vocabulary, context.vocabulary if context.vocabulary end end # Extract the XMLNS mappings from an element def extract_mappings(element, uri_mappings, namespaces) # look for xmlns # (note, this may be dependent on @host_language) # Regardless of how the mapping is declared, the value to be mapped must be converted to lower case, # and the URI is not processed in any way; in particular if it is a relative path it is # not resolved against the current base. ns_defs = {} element.namespaces.each do |prefix, href| prefix = nil if prefix == "xmlns" add_debug("extract_mappings") { "ns: #{prefix}: #{href}"} ns_defs[prefix] = href end # HTML parsing doesn't create namespace_definitions if ns_defs.empty? ns_defs = {} element.attributes.each do |attr, href| next unless attr =~ /^xmlns(?:\:(.+))?/ prefix = $1 add_debug("extract_mappings") { "ns(attr): #{prefix}: #{href}"} ns_defs[prefix] = href.to_s end end ns_defs.each do |prefix, href| # A Conforming RDFa Processor must ignore any definition of a mapping for the '_' prefix. next if prefix == "_" # Downcase prefix for RDFa 1.1 pfx_lc = (@version == :"rdfa1.0" || prefix.nil?) ? prefix : prefix.downcase if prefix uri_mappings[pfx_lc.to_sym] = href namespaces[pfx_lc] ||= href prefix(pfx_lc, href) add_info(element, "extract_mappings: #{prefix} => <#{href}>") else add_info(element, "extract_mappings: nil => <#{href}>") namespaces[""] ||= href end end # Set mappings from @prefix # prefix is a whitespace separated list of prefix-name URI pairs of the form # NCName ':' ' '+ xs:anyURI mappings = element.attribute("prefix").to_s.strip.split(/\s+/) while mappings.length > 0 do prefix, uri = mappings.shift.downcase, mappings.shift #puts "uri_mappings prefix #{prefix} <#{uri}>" next unless prefix.match(/:$/) prefix.chop! unless prefix.match(NC_REGEXP) add_error(element, "extract_mappings: Prefix #{prefix.inspect} does not match NCName production") next end # A Conforming RDFa Processor must ignore any definition of a mapping for the '_' prefix. next if prefix == "_" uri_mappings[prefix.to_s.empty? ? nil : prefix.to_s.to_sym] = uri prefix(prefix, uri) add_info(element, "extract_mappings: prefix #{prefix} => <#{uri}>") end unless @version == :"rdfa1.0" end # The recursive helper function def traverse(element, evaluation_context) if element.nil? add_error(element, "Can't parse nil element") return nil end add_debug(element) { "ec: #{evaluation_context.inspect}" } # local variables [7.5 Step 1] recurse = true skip = false new_subject = nil typed_resource = nil current_object_resource = nil uri_mappings = evaluation_context.uri_mappings.clone namespaces = evaluation_context.namespaces.clone incomplete_triples = [] language = evaluation_context.language term_mappings = evaluation_context.term_mappings.clone default_vocabulary = evaluation_context.default_vocabulary list_mapping = evaluation_context.list_mapping xml_base = element.base base = xml_base.to_s if xml_base && ![:xhtml1, :html4, :html5].include?(@host_language) add_debug(element) {"base: #{base.inspect}"} if base base ||= evaluation_context.base # Pull out the attributes needed for the skip test. attrs = {} %w( about content datatype datetime href id inlist property rel resource rev role src typeof value vocab ).each do |a| attrs[a.to_sym] = element.attributes[a].to_s.strip if element.attributes[a] end add_debug(element) {"attrs " + attrs.inspect} unless attrs.empty? # If @property and @rel/@rev are on the same elements, the non-CURIE and non-URI @rel/@rev values are ignored. If, after this, the value of @rel/@rev becomes empty, then the then the processor must act as if the attribute is not present. if attrs.has_key?(:property) && @version == :"rdfa1.1" && (@host_language == :html5 || @host_language == :xhtml5 || @host_language == :html4) [:rel, :rev].each do |attr| next unless attrs.has_key?(attr) add_debug(element) {"Remove non-CURIE/non-IRI @#{attr} values from #{attrs[attr].inspect}"} attrs[attr] = attrs[attr]. split(/\s+/). select {|a| a.index(':')}. join(" ") add_debug(element) {" => #{attrs[attr].inspect}"} attrs.delete(attr) if attrs[attr].empty? end end # Default vocabulary [7.5 Step 2] # Next the current element is examined for any change to the default vocabulary via @vocab. # If @vocab is present and contains a value, its value updates the local default vocabulary. # If the value is empty, then the local default vocabulary must be reset to the Host Language defined default. if attrs[:vocab] default_vocabulary = if attrs[:vocab].empty? # Set default_vocabulary to host language default add_debug(element) { "[Step 3] reset default_vocaulary to #{@host_defaults.fetch(:vocabulary, nil).inspect}" } @host_defaults.fetch(:vocabulary, nil) else # Generate a triple indicating that the vocabulary is used add_triple(element, base, RDF::RDFA.usesVocabulary, uri(attrs[:vocab])) uri(attrs[:vocab]) end add_debug(element) { "[Step 2] default_vocaulary: #{default_vocabulary.inspect}" } end # Local term mappings [7.5 Step 3] # Next, the current element is then examined for URI mapping s and these are added to the local list of URI mappings. # Note that a URI mapping will simply overwrite any current mapping in the list that has the same name extract_mappings(element, uri_mappings, namespaces) # Language information [7.5 Step 4] language = element.language || language language = nil if language.to_s.empty? add_debug(element) {"HTML5 [3.2.3.3] lang: #{language.inspect}"} if language # rels and revs rels = process_uris(element, attrs[:rel], evaluation_context, base, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary, :restrictions => TERMorCURIEorAbsIRI.fetch(@version, [])) revs = process_uris(element, attrs[:rev], evaluation_context, base, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary, :restrictions => TERMorCURIEorAbsIRI.fetch(@version, [])) add_debug(element) do "rels: #{rels.join(" ")}, revs: #{revs.join(" ")}" end unless (rels + revs).empty? if !(attrs[:rel] || attrs[:rev]) # Establishing a new subject if no rel/rev [7.5 Step 5] if @version == :"rdfa1.0" new_subject = if attrs[:about] process_uri(element, attrs[:about], evaluation_context, base, :uri_mappings => uri_mappings, :restrictions => SafeCURIEorCURIEorIRI.fetch(@version, [])) elsif attrs[:resource] process_uri(element, attrs[:resource], evaluation_context, base, :uri_mappings => uri_mappings, :restrictions => SafeCURIEorCURIEorIRI.fetch(@version, [])) elsif attrs[:href] || attrs[:src] process_uri(element, (attrs[:href] || attrs[:src]), evaluation_context, base, :restrictions => [:uri]) end # If no URI is provided by a resource attribute, then the first match from the following rules # will apply: new_subject ||= if [:xhtml1, :xhtml5, :html4, :html5].include?(@host_language) && element.name =~ /^(head|body)$/ # From XHTML+RDFa 1.1: # if no URI is provided, then first check to see if the element is the head or body element. # If it is, then act as if the new subject is set to the parent object. uri(base) elsif element == root && base # if the element is the root element of the document, then act as if there is an empty @about present, # and process it according to the rule for @about, above; uri(base) elsif attrs[:typeof] RDF::Node.new else # otherwise, if parent object is present, new subject is set to the value of parent object. skip = true unless attrs[:property] evaluation_context.parent_object end # if the @typeof attribute is present, set typed resource to new subject typed_resource = new_subject if attrs[:typeof] else # If the current element contains no @rel or @rev attribute, then the next step is to establish a value for new subject. # This step has two possible alternatives. # 1. If the current element contains the @property attribute, but does not contain the @content # or the @datatype attribute if attrs[:property] && !(attrs[:content] || attrs[:datatype]) new_subject = process_uri(element, attrs[:about], evaluation_context, base, :uri_mappings => uri_mappings, :restrictions => SafeCURIEorCURIEorIRI.fetch(@version, [])) if attrs[:about] # if the @typeof attribute is present, set typed resource to new subject typed_resource = new_subject if attrs[:typeof] add_debug(element) { "[Step 5] new_subject: #{new_subject.to_ntriples rescue 'nil'}, " + "typed_resource: #{typed_resource.to_ntriples rescue 'nil'}" } # If no URI is provided by a resource attribute, then the first match from the following rules # will apply: new_subject ||= if [:xhtml1, :xhtml5, :html4, :html5].include?(@host_language) && element.name =~ /^(head|body)$/ # From XHTML+RDFa 1.1: # if no URI is provided, then first check to see if the element is the head or body element. # If it is, then act as if the new subject is set to the parent object. evaluation_context.parent_object elsif element == root && base # if the element is the root element of the document, then act as if there is an empty @about present, # and process it according to the rule for @about, above; uri(base) else # otherwise, if parent object is present, new subject is set to the value of parent object. evaluation_context.parent_object end if attrs[:typeof] typed_resource ||= process_uri(element, attrs[:resource], evaluation_context, base, :uri_mappings => uri_mappings, :restrictions => SafeCURIEorCURIEorIRI.fetch(@version, [])) if attrs[:resource] typed_resource ||= process_uri(element, (attrs[:href] || attrs[:src]), evaluation_context, base, :restrictions => [:uri]) if attrs[:href] || attrs[:src] typed_resource ||= RDF::Node.new # The value of the current object resource is set to the value of typed resource. current_object_resource = typed_resource end else # otherwise (ie, the @content or @datatype) new_subject = process_uri(element, attrs[:about], evaluation_context, base, :uri_mappings => uri_mappings, :restrictions => SafeCURIEorCURIEorIRI.fetch(@version, [])) if attrs[:about] new_subject ||= process_uri(element, attrs[:resource], evaluation_context, base, :uri_mappings => uri_mappings, :restrictions => SafeCURIEorCURIEorIRI.fetch(@version, [])) if attrs[:resource] new_subject ||= process_uri(element, (attrs[:href] || attrs[:src]), evaluation_context, base, :restrictions => [:uri]) if attrs[:href] || attrs[:src] # If no URI is provided by a resource attribute, then the first match from the following rules # will apply: new_subject ||= if [:xhtml1, :xhtml5, :html4, :html5].include?(@host_language) && element.name =~ /^(head|body)$/ # From XHTML+RDFa 1.1: # if no URI is provided, then first check to see if the element is the head or body element. # If it is, then act as if the new subject is set to the parent object. evaluation_context.parent_object elsif element == root && base # if the element is the root element of the document, then act as if there is an empty @about present, # and process it according to the rule for @about, above; uri(base) elsif attrs[:typeof] RDF::Node.new else # otherwise, if parent object is present, new subject is set to the value of parent object. # Additionally, if @property is not present then the skip element flag is set to 'true'. skip = true unless attrs[:property] evaluation_context.parent_object end # if @typeof is present, set the typed resource to the value of new subject typed_resource ||= new_subject if attrs[:typeof] end end add_debug(element) { "[Step 5] new_subject: #{new_subject.to_ntriples rescue 'nil'}, " + "typed_resource: #{typed_resource.to_ntriples rescue 'nil'}, " + "skip = #{skip}" } else # [7.5 Step 6] # If the current element does contain a @rel or @rev attribute, then the next step is to # establish both a value for new subject and a value for current object resource: new_subject = process_uri(element, attrs[:about], evaluation_context, base, :uri_mappings => uri_mappings, :restrictions => SafeCURIEorCURIEorIRI.fetch(@version, [])) new_subject ||= process_uri(element, attrs[:src], evaluation_context, base, :uri_mappings => uri_mappings, :restrictions => [:uri]) if @version == :"rdfa1.0" # if the @typeof attribute is present, set typed resource to new subject typed_resource = new_subject if attrs[:typeof] # If no URI is provided then the first match from the following rules will apply new_subject ||= if element == root && base uri(base) elsif [:xhtml1, :xhtml5, :html4, :html5].include?(@host_language) && element.name =~ /^(head|body)$/ # From XHTML+RDFa 1.1: # if no URI is provided, then first check to see if the element is the head or body element. # If it is, then act as if the new subject is set to the parent object. evaluation_context.parent_object elsif attrs[:typeof] && @version == :"rdfa1.0" RDF::Node.new else # if it's null, it's null and nothing changes evaluation_context.parent_object # no skip flag set this time end # Then the current object resource is set to the URI obtained from the first match from the following rules: current_object_resource = process_uri(element, attrs[:resource], evaluation_context, base, :uri_mappings => uri_mappings, :restrictions => SafeCURIEorCURIEorIRI.fetch(@version, [])) if attrs[:resource] current_object_resource ||= process_uri(element, attrs[:href], evaluation_context, base, :restrictions => [:uri]) if attrs[:href] current_object_resource ||= process_uri(element, attrs[:src], evaluation_context, base, :restrictions => [:uri]) if attrs[:src] && @version != :"rdfa1.0" current_object_resource ||= RDF::Node.new if attrs[:typeof] && !attrs[:about] && @version != :"rdfa1.0" # and also set the value typed resource to this bnode if attrs[:typeof] if @version == :"rdfa1.0" typed_resource = new_subject else typed_resource = current_object_resource if !attrs[:about] end end add_debug(element) { "[Step 6] new_subject: #{new_subject}, " + "current_object_resource = #{current_object_resource.nil? ? 'nil' : current_object_resource} " + "typed_resource: #{typed_resource.to_ntriples rescue 'nil'}, " } end # Process @typeof if there is a subject [Step 7] if typed_resource # Typeof is TERMorCURIEorAbsIRIs types = process_uris(element, attrs[:typeof], evaluation_context, base, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary, :restrictions => TERMorCURIEorAbsIRI.fetch(@version, [])) add_debug(element, "[Step 7] typeof: #{attrs[:typeof]}") types.each do |one_type| add_triple(element, typed_resource, RDF["type"], one_type) end end # Create new List mapping [step 8] # # If in any of the previous steps a new subject was set to a non-null value different from the parent object; # The list mapping taken from the evaluation context is set to a new, empty mapping. if (new_subject && (new_subject != evaluation_context.parent_subject || list_mapping.nil?)) list_mapping = {} add_debug(element) do "[Step 8]: create new list mapping(#{list_mapping.object_id}) " + "ns: #{new_subject.to_ntriples}, " + "ps: #{evaluation_context.parent_subject.to_ntriples rescue 'nil'}" end end # Generate triples with given object [Step 9] # # If the current element has a @inlist attribute, add the property to the # list associated with that property, creating a new list if necessary. if new_subject && current_object_resource && (attrs[:rel] || attrs[:rev]) add_debug(element) {"[Step 9] rels: #{rels.inspect} revs: #{revs.inspect}"} rels.each do |r| if attrs[:inlist] # If the current list mapping does not contain a list associated with this IRI, # instantiate a new list unless list_mapping[r] list_mapping[r] = RDF::List.new add_debug(element) {"list(#{r}): create #{list_mapping[r].inspect}"} end add_debug(element) {"[Step 9] add #{current_object_resource.to_ntriples} to #{r} #{list_mapping[r].inspect}"} list_mapping[r] << current_object_resource else add_triple(element, new_subject, r, current_object_resource) end end revs.each do |r| add_triple(element, current_object_resource, r, new_subject) end elsif attrs[:rel] || attrs[:rev] # Incomplete triples and bnode creation [Step 10] add_debug(element) {"[Step 10] incompletes: rels: #{rels}, revs: #{revs}"} current_object_resource = RDF::Node.new # predicate: full IRI # direction: forward/reverse # lists: Save into list, don't generate triple rels.each do |r| if attrs[:inlist] # If the current list mapping does not contain a list associated with this IRI, # instantiate a new list unless list_mapping[r] list_mapping[r] = RDF::List.new add_debug(element) {"[Step 10] list(#{r}): create #{list_mapping[r].inspect}"} end incomplete_triples << {:list => list_mapping[r], :direction => :none} else incomplete_triples << {:predicate => r, :direction => :forward} end end revs.each do |r| incomplete_triples << {:predicate => r, :direction => :reverse} end end # Establish current object literal [Step 11] # # If the current element has a @inlist attribute, add the property to the # list associated with that property, creating a new list if necessary. if attrs[:property] properties = process_uris(element, attrs[:property], evaluation_context, base, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary, :restrictions => TERMorCURIEorAbsIRI.fetch(@version, [])) properties.reject! do |p| if p.is_a?(RDF::URI) false else add_warning(element, "[Step 11] predicate #{p.to_ntriples} must be a URI") true end end datatype = process_uri(element, attrs[:datatype], evaluation_context, base, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary, :restrictions => TERMorCURIEorAbsIRI.fetch(@version, [])) unless attrs[:datatype].to_s.empty? begin current_property_value = case when datatype && ![RDF.XMLLiteral, RDF.HTML].include?(datatype) # typed literal add_debug(element, "[Step 11] typed literal (#{datatype})") RDF::Literal.new(attrs[:datetime] || attrs[:value] || attrs[:content] || element.inner_text.to_s, :datatype => datatype, :language => language, :validate => validate?, :canonicalize => canonicalize?) when @version == :"rdfa1.1" case when datatype == RDF.XMLLiteral # XML Literal add_debug(element) {"[Step 11] XML Literal: #{element.inner_html}"} # In order to maintain maximum portability of this literal, any children of the current node that are # elements must have the current in scope XML namespace declarations (if any) declared on the # serialized element using their respective attributes. Since the child element node could also # declare new XML namespaces, the RDFa Processor must be careful to merge these together when # generating the serialized element definition. For avoidance of doubt, any re-declarations on the # child node must take precedence over declarations that were active on the current node. begin c14nxl = element.children.c14nxl( :library => @library, :language => language, :namespaces => {nil => XHTML}.merge(namespaces)) RDF::Literal.new(c14nxl, :library => @library, :datatype => RDF.XMLLiteral, :validate => validate?, :canonicalize => canonicalize?) rescue ArgumentError => e add_error(element, e.message) end when datatype == RDF.HTML # HTML Literal add_debug(element) {"[Step 11] HTML Literal: #{element.inner_html}"} # Just like XMLLiteral, but without the c14nxl begin RDF::Literal.new(element.children.to_html, :library => @library, :datatype => RDF.HTML, :validate => validate?, :canonicalize => canonicalize?) rescue ArgumentError => e add_error(element, e.message) end when element.name == 'time' # HTML5 support # Lexically scan value and assign appropriate type, otherwise, leave untyped v = (attrs[:datetime] || element.inner_text).to_s datatype = %w(Date Time DateTime Year YearMonth Duration).map {|t| RDF::Literal.const_get(t)}.detect do |dt| v.match(dt::GRAMMAR) end || RDF::Literal add_debug(element) {"[Step 11]