require 'rdf/xsd' module RDF::TriX ## # TriX parser. # # This class supports [REXML][], [LibXML][] and [Nokogiri][] for XML # processing, and will automatically select the most performant # implementation (Nokogiri or LibXML) that is available. If need be, you # can explicitly override the used implementation by passing in a # `:library` option to `Reader.new` or `Reader.open`. # # [REXML]: https://www.germane-software.com/software/rexml/ # [LibXML]: https://rubygems.org/gems/libxml-ruby/ # [Nokogiri]: https://nokogiri.org/ # # @example Loading TriX parsing support # require 'rdf/trix' # # @example Obtaining a TriX reader class # RDF::Reader.for(:trix) #=> RDF::TriX::Reader # RDF::Reader.for("etc/doap.xml") # RDF::Reader.for(:file_name => "etc/doap.xml") # RDF::Reader.for(:file_extension => "xml") # RDF::Reader.for(:content_type => "application/trix") # # @example Instantiating a Nokogiri-based reader # RDF::TriX::Reader.new(input, :library => :nokogiri) # # @example Instantiating a LibXML-based reader # RDF::TriX::Reader.new(input, :library => :libxml) # # @example Instantiating a REXML-based reader # RDF::TriX::Reader.new(input, :library => :rexml) # # @example Parsing RDF statements from a TriX file # RDF::TriX::Reader.open("etc/doap.xml") do |reader| # reader.each_statement do |statement| # puts statement.inspect # end # end # # @example Parsing RDF statements from a TriX string # data = StringIO.new(File.read("etc/doap.xml")) # RDF::TriX::Reader.new(data) do |reader| # reader.each_statement do |statement| # puts statement.inspect # end # end # # @see https://www.w3.org/2004/03/trix/ class Reader < RDF::Reader format RDF::TriX::Format ## # Returns the XML implementation module for this reader instance. # # @return [Module] attr_reader :implementation ## # Returns the Base URI as provided, or found from xml:base # # @return [RDF::URI] attr_reader :base_uri ## # Initializes the TriX reader instance. # # @param [IO, File, String] input # @param [Hash{Symbol => Object}] options # any additional options (see `RDF::Reader#initialize`) # @option options [Symbol] :library (:nokogiri, :libxml, or :rexml) # @option options [#to_s] :base_uri (nil) # the base URI to use when resolving relative URIs # @yield [reader] `self` # @yieldparam [RDF::Reader] reader # @yieldreturn [void] ignored def initialize(input = $stdin, **options, &block) super do @library = case options[:library] when nil # Use Nokogiri or LibXML when available, and REXML otherwise: begin require 'nokogiri' :nokogiri rescue LoadError => e begin require 'libxml' :libxml rescue LoadError => e :rexml end end when :nokogiri, :libxml, :rexml options[:library] else raise ArgumentError.new("expected :rexml, :libxml or :nokogiri, but got #{options[:library].inspect}") end require "rdf/trix/reader/#{@library}" @implementation = case @library when :nokogiri then Nokogiri when :libxml then LibXML when :rexml then REXML end self.extend(@implementation) begin initialize_xml(input, **options) rescue log_error("Malformed document: #{$!.message}") end if block_given? case block.arity when 0 then instance_eval(&block) else block.call(self) end end end end ## # @private # @see RDF::Reader#each_graph def each_graph(&block) if block_given? base = read_base @base_uri = base_uri ? base : base_uri.join(base) find_graphs do |graph_element| graph_name = read_graph(graph_element) graph_name = base_uri.join(graph_name) if base_uri && graph_name && graph_name.relative? graph = RDF::Graph.new(graph_name: graph_name) read_statements(graph_element) { |statement| graph << statement } block.call(graph) end if validate? && log_statistics[:error] raise RDF::ReaderError, "Errors found during processing" end end enum_graph end ## # @private # @see RDF::Reader#each_statement def each_statement(&block) if block_given? base = read_base @base_uri = base_uri ? base_uri.join(base) : base find_graphs do |graph_element| read_statements(graph_element, &block) end if validate? && log_statistics[:error] raise RDF::ReaderError, "Errors found during processing" end end enum_statement end ## # @private # @see RDF::Reader#each_triple def each_triple(&block) if block_given? each_statement do |statement| block.call(*statement.to_triple) end end enum_triple end ## # @private # @see RDF::Reader#each_quad def each_quad(&block) if block_given? each_statement do |statement| block.call(*statement.to_quad) end end enum_quad end ## # Yield each statement from a graph # # @param [Object] element # @yield statement # @yieldparam [RDF::Statement] statement def read_statements(graph_element, &block) graph_name = read_graph(graph_element) graph_name = base_uri.join(graph_name) if base_uri && graph_name && graph_name.relative? triple_elements(graph_element).each do |triple_element| block.call(read_triple(triple_element, graph_name: graph_name)) end end ## # Read a <triple> # @param [Hash{String => Object}] element # @return [RDF::Statement] statement def read_triple(element, graph_name: nil) terms = element_elements(element)[0..2].map do |element| parse_element(element.name, element, element_content(element)) end RDF::Statement(*terms, graph_name: graph_name) end ## # Returns the RDF value of the given TriX element. # # @param [String] name # @param [Hash{String => Object}] element # @param [String] content # @return [RDF::Value] def parse_element(name, element, content) case name.to_sym when :id RDF::Node.intern(content.strip) when :uri uri = RDF::URI.new(content.strip) # TODO: interned URIs uri = base_uri.join(uri) if base_uri && uri.relative? uri.validate! if validate? uri.canonicalize! if canonicalize? uri when :triple # RDF-star log_error "expected 'triple' element" unless @options[:rdfstar] read_triple(element) when :typedLiteral content = element.children.c14nxl(library: @library) if element['datatype'] == RDF.XMLLiteral literal = RDF::Literal.new(content, :datatype => RDF::URI(element['datatype'])) literal.validate! if validate? literal.canonicalize! if canonicalize? literal when :plainLiteral literal = case when lang = element['xml:lang'] || element['lang'] RDF::Literal.new(content, :language => lang) else RDF::Literal.new(content) end literal.validate! if validate? literal.canonicalize! if canonicalize? literal else log_error "expected element name to be 'id', 'uri', 'triple', 'typedLiteral', or 'plainLiteral', but got #{name.inspect}" end end end # Reader end # RDF::TriX