module RdfContext # Generic RdfContext Parser class class Parser attr_reader :debug # URI of parsed document # @return [RdfContext::URIRef] attr_reader :uri # Source of parsed document # @return [Nokogiri::XML::Document, #read] attr_accessor :doc # Graph instance containing parsed statements # @return [RdfContext::Graph] attr_accessor :graph # Graph instance containing informational, warning and error statements # @return [RdfContext::Graph] attr_accessor :processor_graph ## # Creates a new parser # # @option options [Graph] :processor_graph (nil) Graph to record information, warnings and errors. # @option options [:rdfxml, :html, :n3] :type (nil) # @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise def initialize(options = {}) # initialize the triplestore @processor_graph = options[:processor_graph] if options[:processor_graph] @debug = options[:debug] # XXX deprecated @strict = options[:strict] @named_bnodes = {} end # Instantiate Parser and parse document # # @param [#read, #to_s] stream the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document # @param [String] uri (nil) the URI of the document # @option options [Graph] :graph (Graph.new) Graph to parse into, otherwise a new Graph instance is created # @option options [Graph] :processor_graph (nil) Graph to record information, warnings and errors. # @option options [:rdfxml, :html, :n3] :type (nil) # @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise # @return [Graph] Returns the graph containing parsed triples # @yield [triple] # @yieldparam [Triple] triple # @raise [Error]:: Raises RdfError if _strict_ # @return [Graph]:: Returns the graph containing parsed triples # @raise [Error]:: Raises RdfError if _strict_ def self.parse(stream, uri = nil, options = {}, &block) # :yields: triple parser = self.new(options) parser.parse(stream, uri, options, &block) end # Parse RDF document from a string or input stream to closure or graph. # # If the parser is called with a block, triples are passed to the block rather # than added to the graph. # # Virtual Class, prototype for Parser subclass. # # @param [#read, #to_s] stream the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document # @param [String] uri (nil) the URI of the document # @option options [Graph] :graph (Graph.new) Graph to parse into, otherwise a new Graph instance is created # @option options [Graph] :processor_graph (nil) Graph to record information, warnings and errors. # @option options [:rdfxml, :html, :n3] :type (nil) # @option options [Boolean] :strict (false) Raise Error if true, continue with lax parsing, otherwise # @return [Graph] Returns the graph containing parsed triples # @yield [triple] # @yieldparam [Triple] triple # @raise [Error]:: Raises RdfError if _strict_ # @return [Graph]:: Returns the graph containing parsed triples # @raise [Error]:: Raises RdfError if _strict_ def parse(stream, uri = nil, options = {}, &block) # :yields: triple @graph = options[:graph] || Graph.new(:identifier => @uri) if self.class == Parser options[:strict] ||= @strict if @strict options[:graph] ||= @graph options[:debug] ||= @debug if @debug # XXX deprecated # Intuit type, if not provided options[:type] ||= detect_format(stream, uri) # Create a delegate of a specific parser class @delegate ||= case options[:type].to_s when "n3", "ntriples", "turtle", "ttl", "n3", "notation3" then N3Parser.new(options) when "rdfa", "html", "xhtml" then RdfaParser.new(options) when "xml", "rdf", "rdfxml" then RdfXmlParser.new(options) else RdfXmlParser.new(options) # raise ParserException.new("type option must be one of :rdfxml, :html, or :n3") end @delegate.parse(stream, uri, options, &block) else # Common parser operations @uri = URIRef.new(uri.to_s) unless uri.nil? @strict = options[:strict] if options.has_key?(:strict) @debug = options[:debug] if options.has_key?(:debug) end end # @return [Graph] def graph; @delegate ? @delegate.graph : (@graph || Graph.new); end # @return [Graph] def processor_graph; @delegate ? @delegate.processor_graph : (@processor_graph || Graph.new); end # @return [Array] def debug; @delegate ? @delegate.debug : @debug; end # Return N3 Parser instance # @return [N3Parser] def self.n3_parser(options = {}); N3Parser.new(options); end # Return RDF/XML Parser instance # @return [RdfXmlParser] def self.rdfxml_parser(options = {}); RdfXmlParser.new(options); end # Return Rdfa Parser instance # @return [RdfaParser] def self.rdfa_parser(options = {}); RdfaParser.new(options); end # Heuristically detect the format of the uri # @param [#read, #to_s] stream # @param [#to_s] uri (nil) # @return [:rdfxml, :rdfa, :n3] def detect_format(stream, uri = nil) uri ||= stream.path if stream.respond_to?(:path) format = case uri.to_s when /\.(rdf|xml)$/ then :rdfxml when /\.(html|xhtml)$/ then :rdfa when /\.(nt|n3|txt)$/ then :n3 else # Got to look into the file to see if stream.respond_to?(:read) stream.rewind string = stream.read(1000) stream.rewind else string = stream.to_s end case string when /<\w+:RDF/ then :rdfxml when / e add_debug(node, "add_triple raised #{e.class}: #{e.message}") puts e.backtrace if ::RdfContext::debug? raise if @strict end end end