lib/rdf/rdfa/reader.rb in rdf-rdfa-0.3.6 vs lib/rdf/rdfa/reader.rb in rdf-rdfa-0.3.7

- old
+ new

@@ -1,6 +1,7 @@ require 'nokogiri' # FIXME: Implement using different modules as in RDF::TriX +require 'rdf/ntriples' module RDF::RDFa ## # An RDFa parser in Ruby # @@ -48,16 +49,10 @@ # Version # @attr [:"rdfa1.0", :"rdfa1.1"] attr_reader :version - ## - # Returns the base URI determined by this reader. - # - # @attr [RDF::URI] - attr_reader :base_uri - # The Recursive Baggage # @private class EvaluationContext # :nodoc: ## # The base. @@ -141,15 +136,15 @@ # # @attr [RDF::URI] attr :default_vocabulary, true ## - # collections + # lists # - # A hash associating collections with properties. + # A hash associating lists with properties. # @attr [Hash{RDF::URI => Array<RDF::Resource>}] - attr :collection_mappings, true + attr :list_mapping, true # @param [RDF::URI] base # @param [Hash] host_defaults # @option host_defaults [Hash{String => RDF::URI}] :term_mappings Hash of NCName => URI # @option host_defaults [Hash{String => RDF::URI}] :vocabulary Hash of prefix => URI @@ -172,21 +167,21 @@ def initialize_copy(from) # clone the evaluation context correctly @uri_mappings = from.uri_mappings.clone @incomplete_triples = from.incomplete_triples.clone @namespaces = from.namespaces.clone - @collection_mappings = from.collection_mappings # Don't clone + @list_mapping = from.list_mapping # Don't clone end def inspect v = ['base', 'parent_subject', 'parent_object', 'language', 'default_vocabulary'].map do |a| "#{a}='#{self.send(a).inspect}'" end v << "uri_mappings[#{uri_mappings.keys.length}]" v << "incomplete_triples[#{incomplete_triples.length}]" v << "term_mappings[#{term_mappings.keys.length}]" - v << "collections[#{collection_mappings.keys.length}]" if collection_mappings + v << "lists[#{list_mapping.keys.length}]" if list_mapping v.join(", ") end end ## @@ -226,11 +221,10 @@ # @yieldreturn [void] ignored # @raise [Error]:: Raises RDF::ReaderError if _validate_ def initialize(input = $stdin, options = {}, &block) super do @debug = options[:debug] - @base_uri = uri(options[:base_uri]) detect_host_language_version(input, options) @processor_graph = options[:processor_graph] @@ -244,21 +238,21 @@ # Otherwise, default is utf-8 options[:encoding] ||= 'utf-8' case @host_language when :html4, :html5 - Nokogiri::HTML.parse(input, @base_uri.to_s, options[:encoding]) + Nokogiri::HTML.parse(input, base_uri.to_s, options[:encoding]) else - Nokogiri::XML.parse(input, @base_uri.to_s, options[:encoding]) + Nokogiri::XML.parse(input, base_uri.to_s, options[:encoding]) end end - if (@doc.nil? || @doc.root.nil?) + if ((@doc.nil? || @doc.root.nil?) && validate?) add_error(nil, "Empty document", RDF::RDFA.DocumentError) raise RDF::ReaderError, "Empty Document" end - add_warning(nil, "Synax errors:\n#{@doc.errors}", RDF::RDFA.DocumentError) if !@doc.errors.empty? && validate? + add_warning(nil, "Syntax errors:\n#{@doc.errors}", RDF::RDFA.DocumentError) if !@doc.errors.empty? && validate? # Section 4.2 RDFa Host Language Conformance # # The Host Language may require the automatic inclusion of one or more default RDFa Profiles. @host_defaults = { @@ -314,11 +308,11 @@ # Determine from head of document head = if input.respond_to?(:read) input.rewind string = input.read(1000) input.rewind - string + string.to_s else input.to_s[0..1000] end doc_type_string = head.match(%r(<!DOCTYPE[^>]*>)m).to_s @@ -392,11 +386,11 @@ @host_defaults[:uri_mappings].each_pair do |prefix, value| prefix(prefix, value) end # parse - parse_whole_document(@doc, @base_uri) + parse_whole_document(@doc, RDF::URI(base_uri)) end end ## # Iterates the given block for each RDF triple in the input. @@ -420,11 +414,11 @@ @bnode_cache[value.to_s] ||= RDF::Node.new(value) end # Figure out the document path, if it is a Nokogiri::XML::Element or Attribute def node_path(node) - "<#{@base_uri}>" + case node + "<#{base_uri}>" + case node when Nokogiri::XML::Node then node.display_path else node.to_s end end @@ -458,11 +452,11 @@ if @processor_graph n = RDF::Node.new @processor_graph << RDF::Statement.new(n, RDF["type"], process_class) @processor_graph << RDF::Statement.new(n, RDF::DC.description, message) @processor_graph << RDF::Statement.new(n, RDF::DC.date, RDF::Literal::Date.new(DateTime.now)) - @processor_graph << RDF::Statement.new(n, RDF::RDFA.context, @base_uri) + @processor_graph << RDF::Statement.new(n, RDF::RDFA.context, base_uri) nc = RDF::Node.new @processor_graph << RDF::Statement.new(nc, RDF["type"], RDF::PTR.XPathPointer) @processor_graph << RDF::Statement.new(nc, RDF::PTR.expression, node.path) if node.respond_to?(:path) @processor_graph << RDF::Statement.new(n, RDF::RDFA.context, nc) end @@ -526,11 +520,11 @@ def process_profile(element, profiles) profiles. map {|uri| uri(uri).normalize}. each do |uri| # Don't try to open ourselves! - if @base_uri == uri + if base_uri == uri add_debug(element, "process_profile: skip recursive profile <#{uri}>") next end old_debug = RDF::RDFa.debug? @@ -635,11 +629,11 @@ namespaces = evaluation_context.namespaces.clone incomplete_triples = [] language = evaluation_context.language term_mappings = evaluation_context.term_mappings.clone default_vocabulary = evaluation_context.default_vocabulary - collection_mappings = evaluation_context.collection_mappings + list_mapping = evaluation_context.list_mapping # shortcut attrs = element.attributes about = attrs['about'] @@ -657,16 +651,16 @@ datatype = attrs['datatype'].to_s if attrs['datatype'] content = attrs['content'].to_s if attrs['content'] rel = attrs['rel'].to_s.strip if attrs['rel'] rev = attrs['rev'].to_s.strip if attrs['rev'] - # Collections: - # @member + # Lists: + # @inlist # an attribute (value ignored) used to indicate that the object associated with a - # @rel or @property attribute on the same element is to be added to the collection - # for that property. Causes a collection to be created if it does not already exist. - member = attrs['member'].to_s.strip if attrs.has_key?('member') + # @rel or @property attribute on the same element is to be added to the list + # for that property. Causes a list to be created if it does not already exist. + inlist = attrs['inlist'].to_s.strip if attrs.has_key?('inlist') add_debug(element) do attrs = { :about => about, :src => src, @@ -677,11 +671,11 @@ :property => property, :typeof => typeof, :datatype => datatype, :rel => rel, :rev => rev, - :member => member, + :inlist => inlist, }.select {|k,v| v} "attrs " + attrs.map {|a| "#{a.first}: #{a.last}"}.join(", ") end unless attrs.empty? @@ -736,16 +730,16 @@ # rels and revs rels = process_uris(element, rel, evaluation_context, base, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary, - :restrictions => TERMorCURIEorAbsURI[@version]) + :restrictions => TERMorCURIEorAbsURI.fetch(@version, [])) revs = process_uris(element, rev, evaluation_context, base, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary, - :restrictions => TERMorCURIEorAbsURI[@version]) + :restrictions => TERMorCURIEorAbsURI.fetch(@version, [])) add_debug(element) do "rels: #{rels.join(" ")}, revs: #{revs.join(" ")}" end unless (rels + revs).empty? @@ -753,19 +747,19 @@ # Establishing a new subject if no rel/rev [7.5 Step 5] # May not be valid, but can exist new_subject = if about process_uri(element, about, evaluation_context, base, :uri_mappings => uri_mappings, - :restrictions => SafeCURIEorCURIEorURI[@version]) - elsif src - process_uri(element, src, evaluation_context, base, :restrictions => [:uri]) + :restrictions => SafeCURIEorCURIEorURI.fetch(@version, [])) elsif resource process_uri(element, resource, evaluation_context, base, :uri_mappings => uri_mappings, - :restrictions => SafeCURIEorCURIEorURI[@version]) + :restrictions => SafeCURIEorCURIEorURI.fetch(@version, [])) elsif href process_uri(element, href, evaluation_context, base, :restrictions => [:uri]) + elsif src + process_uri(element, src, evaluation_context, base, :restrictions => [:uri]) end # If no URI is provided by a resource attribute, then the first match from the following rules # will apply: # if @typeof is present, then new subject is set to be a newly created bnode. @@ -784,21 +778,21 @@ else # if it's null, it's null and nothing changes skip = true unless property evaluation_context.parent_object end - add_debug(element, "[Step 5] new_subject: #{new_subject}, skip = #{skip}") + add_debug(element, "[Step 5] new_subject: #{new_subject.to_ntriples rescue 'nil'}, skip = #{skip}") else # [7.5 Step 6] # If the current element does contain a @rel or @rev attribute, then the next step is to # establish both a value for new subject and a value for current object resource: new_subject = process_uri(element, about, evaluation_context, base, :uri_mappings => uri_mappings, - :restrictions => SafeCURIEorCURIEorURI[@version]) || - process_uri(element, src, evaluation_context, base, + :restrictions => SafeCURIEorCURIEorURI.fetch(@version, [])) + new_subject ||= process_uri(element, src, evaluation_context, base, :uri_mappings => uri_mappings, - :restrictions => [:uri]) + :restrictions => [:uri]) if @version == :"rdfa1.0" # If no URI is provided then the first match from the following rules will apply new_subject ||= if element == @doc.root && base uri(base) elsif [:xhtml1, :xhtml5, :html4, :html5].include?(@host_language) && element.name =~ /^(head|body)$/ @@ -816,14 +810,17 @@ # Then the current object resource is set to the URI obtained from the first match from the following rules: current_object_resource = if resource process_uri(element, resource, evaluation_context, base, :uri_mappings => uri_mappings, - :restrictions => SafeCURIEorCURIEorURI[@version]) + :restrictions => SafeCURIEorCURIEorURI.fetch(@version, [])) elsif href process_uri(element, href, evaluation_context, base, :restrictions => [:uri]) + elsif src && @version != :"rdfa1.0" + process_uri(element, src, evaluation_context, base, + :restrictions => [:uri]) end add_debug(element, "[Step 6] new_subject: #{new_subject}, current_object_resource = #{current_object_resource.nil? ? 'nil' : current_object_resource}") end @@ -832,96 +829,97 @@ # Typeof is TERMorCURIEorAbsURIs types = process_uris(element, typeof, evaluation_context, base, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary, - :restrictions => TERMorCURIEorAbsURI[@version]) + :restrictions => TERMorCURIEorAbsURI.fetch(@version, [])) add_debug(element, "typeof: #{typeof}") types.each do |one_type| add_triple(element, new_subject, RDF["type"], one_type) end end - # Collections: If new subject is set and is not the same as parent subject, - # replace the collection mappings taken from - # the evaluation context with a new empty mappings. - if (new_subject && new_subject != evaluation_context.parent_subject) || collection_mappings.nil? - collection_mappings = {} + # Create new List mapping [step 8] + # + # If in any of the previous steps a new subject was set to a non-null value different from the parent object; + # The list mapping taken from the evaluation context is set to a new, empty mapping. + if (new_subject && (new_subject != evaluation_context.parent_subject || list_mapping.nil?)) + list_mapping = {} add_debug(element) do - "collections: create new collection mappings(#{collection_mappings.object_id}) " + - "ns: #{new_subject}, " + - "ps: #{evaluation_context.parent_subject}" + "[Step 8]: create new list mapping(#{list_mapping.object_id}) " + + "ns: #{new_subject.to_ntriples}, " + + "ps: #{evaluation_context.parent_subject.to_ntriples rescue 'nil'}" end end - # Generate triples with given object [Step 8] + # Generate triples with given object [Step 9] # - # Collections: if the current element has a @member attribute, add the property to the - # collection associated with that property, creating a new collection if necessary. + # If the current element has a @inlist attribute, add the property to the + # list associated with that property, creating a new list if necessary. if new_subject and current_object_resource rels.each do |r| - if member - # If the current collection mappings does not contain a collection associated with this IRI, - # instantiate a new collection - unless collection_mappings[r] - collection_mappings[r] = RDF::List.new - add_debug(element) {"collections(#{r}): create #{collection_mappings[r]}"} + if inlist + # If the current list mapping does not contain a list associated with this IRI, + # instantiate a new list + unless list_mapping[r] + list_mapping[r] = RDF::List.new + add_debug(element) {"list(#{r}): create #{list_mapping[r].inspect}"} end - add_debug(element, "member: add #{current_object_resource} to #{r} collection") - collection_mappings[r] << current_object_resource + add_debug(element, "[Step 9] add #{current_object_resource.to_ntriples} to #{r} #{list_mapping[r].inspect}") + list_mapping[r] << current_object_resource else add_triple(element, new_subject, r, current_object_resource) end end revs.each do |r| add_triple(element, current_object_resource, r, new_subject) end elsif rel || rev - # Incomplete triples and bnode creation [Step 9] - add_debug(element) {"[Step 9] incompletes: rels: #{rels}, revs: #{revs}"} + # Incomplete triples and bnode creation [Step 10] + add_debug(element) {"[Step 10] incompletes: rels: #{rels}, revs: #{revs}"} current_object_resource = RDF::Node.new # predicate: full IRI # direction: forward/reverse - # collection: Save into collection, don't generate triple + # lists: Save into list, don't generate triple rels.each do |r| - if member - # If the current collection mappings does not contain a collection associated with this IRI, - # instantiate a new collection - unless collection_mappings[r] - collection_mappings[r] = RDF::List.new - add_debug(element) {"collections(#{r}): create #{collection_mappings[r]}"} + if inlist + # If the current list mapping does not contain a list associated with this IRI, + # instantiate a new list + unless list_mapping[r] + list_mapping[r] = RDF::List.new + add_debug(element) {"[Step 10] list(#{r}): create #{list_mapping[r].inspect}"} end - incomplete_triples << {:collection => collection_mappings[r]} + incomplete_triples << {:list => list_mapping[r], :direction => :none} else incomplete_triples << {:predicate => r, :direction => :forward} end end revs.each do |r| incomplete_triples << {:predicate => r, :direction => :reverse} end end - # Establish current object literal [Step 10] + # Establish current object literal [Step 11] # - # Collections: if the current element has a @member attribute, add the property to the - # collection associated with that property, creating a new collection if necessary. + # If the current element has a @inlist attribute, add the property to the + # list associated with that property, creating a new list if necessary. if property properties = process_uris(element, property, evaluation_context, base, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary, - :restrictions => TERMorCURIEorAbsURIprop[@version]) + :restrictions => TERMorCURIEorAbsURIprop.fetch(@version, [])) properties.reject! do |p| if p.is_a?(RDF::URI) false else - add_debug(element, "predicate #{p.inspect} must be a URI") + add_debug(element, "[Step 11] predicate #{p.to_ntriples} must be a URI") true end end # get the literal datatype @@ -930,20 +928,20 @@ # the following 3 IF clauses should be mutually exclusive. Written as is to prevent extensive indentation. datatype = process_uri(element, datatype, evaluation_context, base, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary, - :restrictions => TERMorCURIEorAbsURI[@version]) unless datatype.to_s.empty? + :restrictions => TERMorCURIEorAbsURI.fetch(@version, [])) unless datatype.to_s.empty? begin current_object_literal = if !datatype.to_s.empty? && datatype.to_s != RDF.XMLLiteral.to_s # typed literal - add_debug(element, "[Step 10] typed literal (#{datatype})") + add_debug(element, "[Step 11] typed literal (#{datatype})") RDF::Literal.new(content || element.inner_text.to_s, :datatype => datatype, :language => language, :validate => validate?, :canonicalize => canonicalize?) elsif @version == :"rdfa1.1" if datatype.to_s == RDF.XMLLiteral.to_s # XML Literal - add_debug(element) {"[Step 10(1.1)] XML Literal: #{element.inner_html}"} + add_debug(element) {"[Step 11(1.1)] XML Literal: #{element.inner_html}"} # In order to maintain maximum portability of this literal, any children of the current node that are # elements must have the current in scope XML namespace declarations (if any) declared on the # serialized element using their respective attributes. Since the child element node could also # declare new XML namespaces, the RDFa Processor must be careful to merge these together when @@ -959,21 +957,21 @@ rescue ArgumentError => e add_error(element, e.message) end else # plain literal - add_debug(element, "[Step 10(1.1)] plain literal") + add_debug(element, "[Step 11(1.1)] plain literal") RDF::Literal.new(content || element.inner_text.to_s, :language => language, :validate => validate?, :canonicalize => canonicalize?) end else if content || (children_node_types == [Nokogiri::XML::Text]) || (element.children.length == 0) || datatype == "" # plain literal - add_debug(element, "[Step 10 (1.0)] plain literal") + add_debug(element, "[Step 11 (1.0)] plain literal") RDF::Literal.new(content || element.inner_text.to_s, :language => language, :validate => validate?, :canonicalize => canonicalize?) elsif children_node_types != [Nokogiri::XML::Text] and (datatype == nil or datatype.to_s == RDF.XMLLiteral.to_s) # XML Literal - add_debug(element) {"[Step 10 (1.0)] XML Literal: #{element.inner_html}"} + add_debug(element) {"[Step 11 (1.0)] XML Literal: #{element.inner_html}"} recurse = false RDF::Literal.new(element.inner_html, :datatype => RDF.XMLLiteral, :language => language, :namespaces => namespaces, @@ -985,67 +983,68 @@ add_error(element, e.message) end # add each property properties.each do |p| - # Collections: If element has an @member attribute, add the value to a collection - if member - # If the current collection mappings does not contain a collection associated with this IRI, - # instantiate a new collection - unless collection_mappings[p] - collection_mappings[p] = RDF::List.new - add_debug(element) {"collections(#{p}): create #{collection_mappings[p]}"} + # Lists: If element has an @inlist attribute, add the value to a list + if inlist + # If the current list mapping does not contain a list associated with this IRI, + # instantiate a new list + unless list_mapping[p] + list_mapping[p] = RDF::List.new + add_debug(element) {"[Step 11] lists(#{p}): create #{list_mapping[p].inspect}"} end - add_debug(element) {"member: add #{current_object_literal} to #{p} collection"} - collection_mappings[p] << current_object_literal + add_debug(element) {"[Step 11] add #{current_object_literal.to_ntriples} to #{p.to_ntriples} #{list_mapping[p].inspect}"} + list_mapping[p] << current_object_literal elsif new_subject add_triple(element, new_subject, p, current_object_literal) end end end if not skip and new_subject && !evaluation_context.incomplete_triples.empty? - # Complete the incomplete triples from the evaluation context [Step 11] + # Complete the incomplete triples from the evaluation context [Step 12] add_debug(element) do - "[Step 11] complete incomplete triples: " + - "new_subject=#{new_subject.inspect}, " + + "[Step 12] complete incomplete triples: " + + "new_subject=#{new_subject.to_ntriples}, " + "completes=#{evaluation_context.incomplete_triples.inspect}" end evaluation_context.incomplete_triples.each do |trip| - if trip[:collection] - add_debug(element) {"member: add #{current_object_resource} to #{trip[:collection]} collection"} - trip[:collection] << new_subject - elsif trip[:direction] == :forward + case trip[:direction] + when :none + add_debug(element) {"[Step 12] add #{new_subject.to_ntriples} to #{trip[:list].inspect}"} + trip[:list] << new_subject + when :forward add_triple(element, evaluation_context.parent_subject, trip[:predicate], new_subject) - elsif trip[:direction] == :reverse + when :reverse add_triple(element, new_subject, trip[:predicate], evaluation_context.parent_subject) end end end - # Create a new evaluation context and proceed recursively [Step 12] + # Create a new evaluation context and proceed recursively [Step 13] if recurse if skip if language == evaluation_context.language && uri_mappings == evaluation_context.uri_mappings && term_mappings == evaluation_context.term_mappings && default_vocabulary == evaluation_context.default_vocabulary && base == evaluation_context.base && - collection_mappings == evaluation_context.collection_mappings + list_mapping == evaluation_context.list_mapping new_ec = evaluation_context - add_debug(element, "[Step 12] skip: reused ec") + add_debug(element, "[Step 13] skip: reused ec") else new_ec = evaluation_context.clone new_ec.base = base new_ec.language = language new_ec.uri_mappings = uri_mappings new_ec.namespaces = namespaces new_ec.term_mappings = term_mappings new_ec.default_vocabulary = default_vocabulary - new_ec.collection_mappings = collection_mappings - add_debug(element, "[Step 12] skip: cloned ec") + new_ec.list_mapping = list_mapping + add_debug(element, "[Step 13] skip: cloned ec") end else # create a new evaluation context new_ec = EvaluationContext.new(base, @host_defaults) new_ec.parent_subject = new_subject || evaluation_context.parent_subject @@ -1054,37 +1053,37 @@ new_ec.namespaces = namespaces new_ec.incomplete_triples = incomplete_triples new_ec.language = language new_ec.term_mappings = term_mappings new_ec.default_vocabulary = default_vocabulary - new_ec.collection_mappings = collection_mappings - add_debug(element, "[Step 12] new ec") + new_ec.list_mapping = list_mapping + add_debug(element, "[Step 13] new ec") end element.children.each do |child| # recurse only if it's an element traverse(child, new_ec) if child.class == Nokogiri::XML::Element end - # Collections: after traversing through child elements, for each collection associated with + # Step 14: after traversing through child elements, for each list associated with # a property - collection_mappings.each do |p, c| - # if that collection is different from the evaluation context - ec_col = evaluation_context.collection_mappings[p] if evaluation_context.collection_mappings - add_debug(element) {"collections: time to create #{c}? #{(ec_col != c).inspect}"} - if ec_col != c - add_debug(element) {"collection(#{p}) create #{c}"} - # Generate an rdf:List with the elements of that collection. - c.each_statement do |st| + (list_mapping || {}).each do |p, l| + # if that list is different from the evaluation context + ec_list = evaluation_context.list_mapping[p] if evaluation_context.list_mapping + add_debug(element) {"[Step 14] time to create #{l.inspect}? #{(ec_list != l).inspect}"} + if ec_list != l + add_debug(element) {"[Step 14] list(#{p}) create #{l.inspect}"} + # Generate an rdf:List with the elements of that list. + l.each_statement do |st| add_triple(element, st.subject, st.predicate, st.object) unless st.object == RDF.List end - # Generate a triple relating new_subject, property and the collection BNode, - # or rdf:nil if the collection is empty. - if c.empty? + # Generate a triple relating new_subject, property and the list BNode, + # or rdf:nil if the list is empty. + if l.empty? add_triple(element, new_subject, p, RDF.nil) else - add_triple(element, new_subject, p, c.subject) + add_triple(element, new_subject, p, l.subject) end end end end end