require File.join(File.dirname(__FILE__), 'graph') module RdfContext # Generic RdfContext Parser class class Parser attr_reader :debug attr_accessor :doc, :graph ## # Creates a new parser for N3 (or Turtle). # # @param [Hash] options:: Options from # options[:graph]:: Graph to parse into, otherwise a new RdfContext::Graph instance is created # options[:debug]:: Array to place debug messages # options[:type]:: One of _rdfxml_, _html_, or _n3_ # options[:strict]:: Raise Error if true, continue with lax parsing, otherwise def initialize(options = {}) # initialize the triplestore @graph = options[:graph] @debug = options[:debug] @strict = options[:strict] @named_bnodes = {} end # Instantiate Parser and parse document # # @param [IO, String] stream:: the RDF IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document # @param [String] uri:: the URI of the document # @param [Hash] options:: Options from # options[:debug]:: Array to place debug messages # options[:type]:: One of _rdfxml_, _html_, or _n3_ # options[:strict]:: Raise Error if true, continue with lax parsing, otherwise # @return [Graph]:: Returns the graph containing parsed triples # @raise [Error]:: Raises RdfError if _strict_ def self.parse(stream, uri = nil, options = {}, &block) # :yields: triple parser = self.new(options) parser.parse(stream, uri, options, &block) end # Parse RDF document from a string or input stream to closure or graph. # # If the parser is called with a block, triples are passed to the block rather # than added to the graph. # # Virtual Class, prototype for Parser subclass. # # @param [IO, String] stream:: the RDF IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document # @param [String] uri:: the URI of the document # @param [Hash] options:: Options from # options[:debug]:: Array to place debug messages # options[:strict]:: Raise Error if true, continue with lax parsing, otherwise # @return [Graph]:: Returns the graph containing parsed triples # @raise [Error]:: Raises RdfError if _strict_ def parse(stream, uri = nil, options = {}, &block) # :yields: triple if self.class == Parser options[:strict] ||= @strict if @strict options[:graph] ||= @graph if @graph options[:debug] ||= @debug if @debug # Intuit type, if not provided options[:type] ||= detect_format(stream, uri) # Create a delegate of a specific parser class @delegate ||= case options[:type].to_s when "n3", "ntriples", "turtle" then N3Parser.new(options) when "rdfa", "html", "xhtml" then RdfaParser.new(options) when "xml", "rdf", "rdfxml" then RdfXmlParser.new(options) else RdfXmlParser.new(options) # raise ParserException.new("type option must be one of :rdfxml, :html, or :n3") end @delegate.parse(stream, uri, options, &block) else # Common parser operations @uri = Addressable::URI.parse(uri.to_s).to_s unless uri.nil? @strict = options[:strict] if options.has_key?(:strict) @debug = options[:debug] if options.has_key?(:debug) @graph ||= Graph.new(:identifier => @uri) end end def graph; @delegate ? @delegate.graph : (@graph || Graph.new); end def debug; @delegate ? @delegate.debug : @debug; end # Return N3 Parser instance def self.n3_parser(options = {}); N3Parser.new(options); end # Return RDF/XML Parser instance def self.rdfxml_parser(options = {}); RdfXmlParser.new(options); end # Return Rdfa Parser instance def self.rdfa_parser(options = {}); RdfaParser.new(options); end # Heuristically detect the format of the uri def detect_format(stream, uri = nil) format = case uri.to_s when /\.(rdf|xml)$/ then :rdfxml when /\.(html|xhtml)$/ then :rdfa when /\.(nt|n3|txt)$/ then :n3 else # Got to look into the file to see if stream.is_a?(IO) || stream.is_a?(StringIO) stream.rewind string = stream.read(1000) stream.rewind else string = stream.to_s end case string when /<\w+:RDF/ then :rdfxml when / e add_debug(node, "add_triple raised #{e.class}: #{e.message}") puts e.backtrace if $DEBUG raise if @strict end end end