require File.join(File.dirname(__FILE__), 'graph')
module RdfContext
# Generic RdfContext Parser class
class Parser
attr_reader :debug
attr_accessor :doc, :graph
##
# Creates a new parser for N3 (or Turtle).
#
# @param [Hash] options:: Options from
# options[:graph]:: Graph to parse into, otherwise a new RdfContext::Graph instance is created
# options[:debug]:: Array to place debug messages
# options[:type]:: One of _rdfxml_, _html_, or _n3_
# options[:strict]:: Raise Error if true, continue with lax parsing, otherwise
def initialize(options = {})
# initialize the triplestore
@graph = options[:graph]
@debug = options[:debug]
@strict = options[:strict]
@named_bnodes = {}
end
# Instantiate Parser and parse document
#
# @param [IO, String] stream:: the RDF IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document
# @param [String] uri:: the URI of the document
# @param [Hash] options:: Options from
# options[:debug]:: Array to place debug messages
# options[:type]:: One of _rdfxml_, _html_, or _n3_
# options[:strict]:: Raise Error if true, continue with lax parsing, otherwise
# @return [Graph]:: Returns the graph containing parsed triples
# @raise [Error]:: Raises RdfError if _strict_
def self.parse(stream, uri = nil, options = {}, &block) # :yields: triple
parser = self.new(options)
parser.parse(stream, uri, options, &block)
end
# Parse RDF document from a string or input stream to closure or graph.
#
# If the parser is called with a block, triples are passed to the block rather
# than added to the graph.
#
# Virtual Class, prototype for Parser subclass.
#
# @param [IO, String] stream:: the RDF IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document
# @param [String] uri:: the URI of the document
# @param [Hash] options:: Options from
# options[:debug]:: Array to place debug messages
# options[:strict]:: Raise Error if true, continue with lax parsing, otherwise
# @return [Graph]:: Returns the graph containing parsed triples
# @raise [Error]:: Raises RdfError if _strict_
def parse(stream, uri = nil, options = {}, &block) # :yields: triple
if self.class == Parser
options[:strict] ||= @strict if @strict
options[:graph] ||= @graph if @graph
options[:debug] ||= @debug if @debug
# Intuit type, if not provided
options[:type] ||= detect_format(stream, uri)
# Create a delegate of a specific parser class
@delegate ||= case options[:type].to_s
when "n3", "ntriples", "turtle" then N3Parser.new(options)
when "rdfa", "html", "xhtml" then RdfaParser.new(options)
when "xml", "rdf", "rdfxml" then RdfXmlParser.new(options)
else
RdfXmlParser.new(options)
# raise ParserException.new("type option must be one of :rdfxml, :html, or :n3")
end
@delegate.parse(stream, uri, options, &block)
else
# Common parser operations
@uri = Addressable::URI.parse(uri.to_s).to_s unless uri.nil?
@strict = options[:strict] if options.has_key?(:strict)
@debug = options[:debug] if options.has_key?(:debug)
@graph ||= Graph.new(:identifier => @uri)
end
end
def graph; @delegate ? @delegate.graph : (@graph || Graph.new); end
def debug; @delegate ? @delegate.debug : @debug; end
# Return N3 Parser instance
def self.n3_parser(options = {}); N3Parser.new(options); end
# Return RDF/XML Parser instance
def self.rdfxml_parser(options = {}); RdfXmlParser.new(options); end
# Return Rdfa Parser instance
def self.rdfa_parser(options = {}); RdfaParser.new(options); end
# Heuristically detect the format of the uri
def detect_format(stream, uri = nil)
format = case uri.to_s
when /\.(rdf|xml)$/ then :rdfxml
when /\.(html|xhtml)$/ then :rdfa
when /\.(nt|n3|txt)$/ then :n3
else
# Got to look into the file to see
if stream.is_a?(IO) || stream.is_a?(StringIO)
stream.rewind
string = stream.read(1000)
stream.rewind
else
string = stream.to_s
end
case string
when /<\w+:RDF/ then :rdfxml
when / e
add_debug(node, "add_triple raised #{e.class}: #{e.message}")
puts e.backtrace if $DEBUG
raise if @strict
end
end
end