lib/rdf/rdfa/reader.rb in rdf-rdfa-2.2.2 vs lib/rdf/rdfa/reader.rb in rdf-rdfa-2.2.3

- old
+ new

@@ -29,11 +29,11 @@ format Format include Expansion include RDF::Util::Logger XHTML = "http://www.w3.org/1999/xhtml" - + # Content model for @about and @resource. In RDFa 1.0, this was URIorSafeCURIE SafeCURIEorCURIEorIRI = { :"rdfa1.0" => [:safe_curie, :uri, :bnode], :"rdfa1.1" => [:safe_curie, :curie, :uri, :bnode], } @@ -81,21 +81,21 @@ # Host language # @!attribute [r] host_language # @return [:xml, :xhtml1, :xhtml5, :html4, :html5, :svg] attr_reader :host_language - + # Version # @!attribute [r] version # @return [:"rdfa1.0", :"rdfa1.1"] attr_reader :version - + # Repository used for collecting triples. # @!attribute [r] repository # @return [RDF::Repository] attr_reader :repository - + # Returns the XML implementation module for this reader instance. # # @!attribute [rw] implementation # @return [Module] attr_reader :implementation @@ -122,11 +122,11 @@ # but it will usually change during the course of processing. # # @!attribute [rw] parent_subject # @return [RDF::URI] attr_accessor :parent_subject - + ## # The parent object. # # In some situations the object of a statement becomes the subject of any nested statements, # and this property is used to convey this value. @@ -136,26 +136,26 @@ # and this property is used to convey this value. # # @!attribute [rw] parent_object # @return [RDF::URI] attr_accessor :parent_object - + ## # A list of current, in-scope URI mappings. # # @!attribute [rw] uri_mappings # @return [Hash{Symbol => String}] attr_accessor :uri_mappings - + ## # A list of current, in-scope Namespaces. This is the subset of uri_mappings # which are defined using xmlns. # # @!attribute [rw] namespaces # @return [Hash{String => Namespace}] attr_accessor :namespaces - + ## # A list of incomplete triples. # # A triple can be incomplete when no object resource # is provided alongside a predicate that requires a resource (i.e., @rel or @rev). @@ -163,29 +163,29 @@ # which will be when the next subject is specified (part of the process called chaining). # # @!attribute [rw] incomplete_triples # @return [Array<Array<RDF::URI, RDF::Resource>>] attr_accessor :incomplete_triples - + ## # The language. Note that there is no default language. # # @!attribute [rw] language # @return [Symbol] attr_accessor :language - + ## # The term mappings, a list of terms and their associated URIs. # # This specification does not define an initial list. # Host Languages may define an initial list. # If a Host Language provides an initial list, it should do so via an RDFa Context document. # # @!attribute [rw] term_mappings # @return [Hash{Symbol => RDF::URI}] attr_accessor :term_mappings - + ## # The default vocabulary # # A value to use as the prefix URI when a term is used. # This specification does not define an initial setting for the default vocabulary. @@ -228,11 +228,11 @@ @uri_mappings = from.uri_mappings.clone @incomplete_triples = from.incomplete_triples.clone @namespaces = from.namespaces.clone @list_mapping = from.list_mapping # Don't clone end - + def inspect v = ['base', 'parent_subject', 'parent_object', 'language', 'default_vocabulary'].map do |a| "#{a}=#{o = self.send(a); o.respond_to?(:to_ntriples) ? o.to_ntriples : o.inspect}" end v << "uri_mappings[#{uri_mappings.keys.length}]" @@ -327,12 +327,24 @@ end self.extend(@implementation) detect_host_language_version(input, options) - add_info(@doc, "version = #{@version}, host_language = #{@host_language}, library = #{@library}, rdfagraph = #{@options[:rdfagraph].inspect}, expand = #{@options[:vocab_expansion]}") + parse_lib = if @library == :nokogiri && @host_language == :html5 + begin + require 'nokogumbo' unless defined?(::Nokogumbo) + :nokobumbo + rescue LoadError + :nokogiri + end + else + @library + end + parse_lib = @library == :nokogiri && defined?(::Nokogumbo) ? :nokogumbo : @library + add_info(@doc, "version = #{@version}, host_language = #{@host_language}, library = #{parse_lib}, rdfagraph = #{@options[:rdfagraph].inspect}, expand = #{@options[:vocab_expansion]}") + begin initialize_xml(input, options) rescue add_error(nil, "Malformed document: #{$!.message}") end @@ -417,11 +429,11 @@ reader.new(doc, options).each(&block) else add_debug(el, "=> no reader found") end end - + # Look for Embedded RDF/XML unless @root.xpath("//rdf:RDF", "rdf" => "http://www.w3.org/1999/02/22-rdf-syntax-ns#").empty? extract_script(@root, @doc, "application/rdf+xml", @options) do |statement| @repository << statement end @@ -435,11 +447,11 @@ extract_script(el, text, type, @options) do |statement| @repository << statement end end - + # Look for Embedded microdata unless @root.xpath("//@itemscope").empty? begin require 'rdf/microdata' add_debug(@doc, "process microdata") @@ -452,11 +464,11 @@ # Perform property copying copy_properties(@repository) if @options[:reference_folding] # Perform vocabulary expansion expand(@repository) if @options[:vocab_expansion] - + @processed = true end # Return statements in the default graph for # statements in the associated named or default graph from the @@ -491,24 +503,24 @@ block.call(*statement.to_triple) end end enum_for(:each_triple) end - + private # Keep track of allocated BNodes def bnode(value = nil) @bnode_cache ||= {} @bnode_cache[value.to_s] ||= RDF::Node.new(value) end - + # Figure out the document path, if it is an Element or Attribute def node_path(node) "<#{base_uri}>#{node.respond_to?(:display_path) ? node.display_path : node}" end - + # Add debug event to debug array, if specified # # @param [#display_path, #to_s] node XML Node or string for showing context # @param [String] message # @yieldreturn [String] appended to message, to allow for lazy-evaulation of message @@ -517,19 +529,19 @@ end def add_info(node, message, process_class = RDF::RDFA.Info, &block) add_processor_message(node, message, process_class, &block) end - + def add_warning(node, message, process_class = RDF::RDFA.Warning) add_processor_message(node, message, process_class) end - + def add_error(node, message, process_class = RDF::RDFA.Error) add_processor_message(node, message, process_class) end - + def add_processor_message(node, message, process_class, &block) case process_class when RDF::RDFA.Error then log_error(node_path(node), message, &block) when RDF::RDFA.Warning then log_warn(node_path(node), message, &block) when RDF::RDFA.Info then log_info(node_path(node), message, &block) @@ -550,11 +562,11 @@ RDF::Statement.new(n, RDF::RDFA.context, nc, graph_name: RDF::RDFA.ProcessorGraph), RDF::Statement.new(nc, RDF["type"], RDF::PTR.XPathPointer, graph_name: RDF::RDFA.ProcessorGraph), RDF::Statement.new(nc, RDF::PTR.expression, node.path, graph_name: RDF::RDFA.ProcessorGraph) ] end - + @repository.insert(*processor_statements) if cb = @options[:processor_callback] processor_statements.each {|s| cb.call(s)} end end @@ -589,11 +601,11 @@ add_debug("") {"parse_whole_doc: base='#{base}'"} end # initialize the evaluation context with the appropriate base evaluation_context = EvaluationContext.new(base, @host_defaults) - + if @version != :"rdfa1.0" # Process default vocabularies load_initial_contexts(@host_defaults[:initial_contexts]) do |which, value| add_debug(root) { "parse_whole_document, #{which}: #{value.inspect}"} case which @@ -601,15 +613,15 @@ when :term_mappings then evaluation_context.term_mappings.merge!(value) when :default_vocabulary then evaluation_context.default_vocabulary = value end end end - + traverse(root, evaluation_context) add_debug("", "parse_whole_doc: traversal complete'") end - + # Parse and process URI mappings, Term mappings and a default vocabulary from @context # # Yields each mapping def load_initial_contexts(initial_contexts) initial_contexts. @@ -723,11 +735,11 @@ def traverse(element, evaluation_context) if element.nil? add_error(element, "Can't parse nil element") return nil end - + add_debug(element) { "ec: #{evaluation_context.inspect}" } # local variables [7.5 Step 1] recurse = true skip = false @@ -805,16 +817,16 @@ end add_debug(element) { "[Step 2] default_vocaulary: #{default_vocabulary.inspect}" } end - + # Local term mappings [7.5 Step 3] # Next, the current element is then examined for URI mapping s and these are added to the local list of URI mappings. # Note that a URI mapping will simply overwrite any current mapping in the list that has the same name extract_mappings(element, uri_mappings, namespaces) - + # Language information [7.5 Step 4] language = element.language || language language = nil if language.to_s.empty? add_debug(element) {"HTML5 [3.2.3.3] lang: #{language.inspect}"} if language @@ -842,18 +854,18 @@ revs = process_uris(element, attrs[:rev], evaluation_context, base, uri_mappings: uri_mappings, term_mappings: term_mappings, vocab: default_vocabulary, restrictions: TERMorCURIEorAbsIRI.fetch(@version, [])) - + add_debug(element) do "rels: #{rels.join(" ")}, revs: #{revs.join(" ")}" end unless (rels + revs).empty? if !(attrs[:rel] || attrs[:rev]) # Establishing a new subject if no rel/rev [7.5 Step 5] - + if @version == :"rdfa1.0" new_subject = if attrs[:about] process_uri(element, attrs[:about], evaluation_context, base, uri_mappings: uri_mappings, restrictions: SafeCURIEorCURIEorIRI.fetch(@version, [])) @@ -987,11 +999,11 @@ uri_mappings: uri_mappings, restrictions: SafeCURIEorCURIEorIRI.fetch(@version, [])) new_subject ||= process_uri(element, attrs[:src], evaluation_context, base, uri_mappings: uri_mappings, restrictions: [:uri]) if @version == :"rdfa1.0" - + # if the @typeof attribute is present, set typed resource to new subject typed_resource = new_subject if attrs[:typeof] # If no URI is provided then the first match from the following rules will apply new_subject ||= if element == root && base @@ -1006,11 +1018,11 @@ else # if it's null, it's null and nothing changes evaluation_context.parent_object # no skip flag set this time end - + # Then the current object resource is set to the URI obtained from the first match from the following rules: current_object_resource = process_uri(element, attrs[:resource], evaluation_context, base, uri_mappings: uri_mappings, restrictions: SafeCURIEorCURIEorIRI.fetch(@version, [])) if attrs[:resource] current_object_resource ||= process_uri(element, attrs[:href], evaluation_context, base, @@ -1032,11 +1044,11 @@ "[Step 6] new_subject: #{new_subject}, " + "current_object_resource = #{current_object_resource.nil? ? 'nil' : current_object_resource} " + "typed_resource: #{typed_resource.to_ntriples rescue 'nil'}, " } end - + # [Step 7] If in any of the previous steps a typed resource was set to a non-null value, it is now used to provide a subject for type values; if typed_resource # Typeof is TERMorCURIEorAbsIRIs types = process_uris(element, attrs[:typeof], evaluation_context, base, uri_mappings: uri_mappings, @@ -1080,19 +1092,19 @@ else # Predicates for the current object resource can be set by using one or both of the @rel and the @rev attributes but, in case of the @rel attribute, only if the @inlist is not present: add_triple(element, new_subject, r, current_object_resource) end end - + revs.each do |r| add_triple(element, current_object_resource, r, new_subject) end elsif attrs[:rel] || attrs[:rev] # Incomplete triples and bnode creation [Step 10] add_debug(element) {"[Step 10] incompletes: rels: #{rels}, revs: #{revs}"} current_object_resource = RDF::Node.new - + # predicate: full IRI # direction: forward/reverse # lists: Save into list, don't generate triple rels.each do |r| @@ -1106,16 +1118,16 @@ incomplete_triples << {list: list_mapping[r], direction: :none} else incomplete_triples << {predicate: r, direction: :forward} end end - + revs.each do |r| incomplete_triples << {predicate: r, direction: :reverse} end end - + # Establish current object literal [Step 11] # # If the current element has a @inlist attribute, add the property to the # list associated with that property, creating a new list if necessary. if attrs[:property] @@ -1265,15 +1277,15 @@ add_debug(element) {"[Step 11] lists(#{p}): create #{list_mapping[p].inspect}"} end add_debug(element) {"[Step 11] add #{current_property_value.to_ntriples} to #{p.to_ntriples} #{list_mapping[p].inspect}"} list_mapping[p] << current_property_value elsif new_subject - add_triple(element, new_subject, p, current_property_value) + add_triple(element, new_subject, p, current_property_value) end end end - + if !skip and new_subject && !evaluation_context.incomplete_triples.empty? # Complete the incomplete triples from the evaluation context [Step 12] add_debug(element) do "[Step 12] complete incomplete triples: " + "new_subject=#{new_subject.to_ntriples}, " + @@ -1327,16 +1339,16 @@ new_ec.term_mappings = term_mappings new_ec.default_vocabulary = default_vocabulary new_ec.list_mapping = list_mapping add_debug(element, "[Step 13] new ec") end - + element.children.each do |child| # recurse only if it's an element traverse(child, new_ec) if child.element? end - + # Step 14: after traversing through child elements, for each list associated with # a property (list_mapping || {}).each do |p, l| # if that list is different from the evaluation context ec_list = evaluation_context.list_mapping[p] if evaluation_context.list_mapping @@ -1442,19 +1454,19 @@ add_warning(element, "Undefined prefix #{$1}") else add_warning(element, "Relative URI #{value}") end end - + # [7.4.3] General Use of Terms in Attributes def process_term(element, value, options) if options[:vocab] # If there is a local default vocabulary, the IRI is obtained by concatenating that value and the term return uri(options[:vocab] + value) elsif options[:term_mappings].is_a?(Hash) # If the term is in the local term mappings, use the associated URI (case sensitive). return uri(options[:term_mappings][value.to_s.to_sym]) if options[:term_mappings].has_key?(value.to_s.to_sym) - + # Otherwise, check for case-insensitive match options[:term_mappings].each_pair do |term, uri| return uri(uri) if term.to_s.downcase == value.to_s.downcase end end