lib/rdf/rdfa/reader.rb in rdf-rdfa-0.0.2 vs lib/rdf/rdfa/reader.rb in rdf-rdfa-0.0.3
- old
+ new
@@ -1,18 +1,15 @@
require 'nokogiri' # FIXME: Implement using different modules as in RDF::TriX
-require 'rdf'
-require 'rdf/rdfa/vocab'
module RDF::RDFa
##
# An RDFa parser in Ruby
#
# @author [Gregg Kellogg](http://kellogg-assoc.com/)
class Reader < RDF::Reader
format Format
- autoload :VERSION, 'rdf/rdfa/version'
-
+
NC_REGEXP = Regexp.new(
%{^
(?!\\\\u0301) # ́ is a non-spacing acute accent.
# It is legal within an XML Name, but not as the first character.
( [a-zA-Z_]
@@ -24,16 +21,10 @@
$},
Regexp::EXTENDED)
XML_LITERAL = RDF['XMLLiteral']
- attr_reader :debug
-
- ##
- # @return [RDF::Graph]
- attr_reader :graph
-
# Host language, One of:
# :xhtml_rdfa_1_0
# :xhtml_rdfa_1_1
attr_reader :host_language
@@ -87,13 +78,13 @@
@default_voabulary = host_defaults.fetch(:voabulary, nil)
end
# Copy this Evaluation Context
def initialize_copy(from)
- # clone the evaluation context correctly
- @uri_mappings = from.uri_mappings.clone
- @incomplete_triples = from.incomplete_triples.clone
+ # clone the evaluation context correctly
+ @uri_mappings = from.uri_mappings.clone
+ @incomplete_triples = from.incomplete_triples.clone
end
def inspect
v = %w(base parent_subject parent_object language default_vocabulary).map {|a| "#{a}='#{self.send(a).nil? ? '<nil>' : self.send(a)}'"}
v << "uri_mappings[#{uri_mappings.keys.length}]"
@@ -101,81 +92,38 @@
v << "term_mappings[#{term_mappings.keys.length}]"
v.join(",")
end
end
- # Parse XHTML+RDFa document from a string or input stream to closure or graph.
- #
- # If the parser is called with a block, triples are passed to the block rather
- # than added to the graph.
- #
- # Optionally, the stream may be a Nokogiri::HTML::Document or Nokogiri::XML::Document
- # With a block, yeilds each statement with URI, BNode or Literal elements
- #
- # @param [IO] stream:: the HTML+RDFa IO stream, string, Nokogiri::HTML::Document or Nokogiri::XML::Document
- # @param [String] uri:: the URI of the document
- # @param [Hash] options:: Parser options, one of
- # <em>options[:debug]</em>:: Array to place debug messages
- # <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
- # @return [Graph]:: Returns the graph containing parsed triples
- # @raise [Error]:: Raises RdfError if _strict_
-
##
# Initializes the RDFa reader instance.
#
- # @param [IO, File, String] input
- # @param [Hash{Symbol => Object}] options
+ # @param [IO, File, String]:: input
+ # @param [Hash{Symbol => Object}]:: options
+ # <em>options[:debug]</em>:: Array to place debug messages
+ # <em>options[:strict]</em>:: Raise Error if true, continue with lax parsing, otherwise
+ # <em>options[:base_uri]</em>:: Base URI to use for relative URIs.
# @yield [reader]
# @yieldparam [Reader] reader
+ # @raise [RDF::ReaderError]:: Raises RDF::ReaderError if _strict_
def initialize(input = $stdin, options = {}, &block)
- super
-
- @graph = RDF::Graph.new
+ super do
@debug = options[:debug]
@strict = options[:strict]
- @base_uri = options[:base_uri]
- @base_uri = RDF::URI.parse(@base_uri) if @base_uri.is_a?(String)
- @named_bnodes = {}
+ @base_uri = RDF::URI.new(options[:base_uri])
@@vocabulary_cache ||= {}
@doc = case input
when Nokogiri::HTML::Document then input
when Nokogiri::XML::Document then input
else Nokogiri::XML.parse(input, @base_uri.to_s)
end
- raise ParserException, "Empty document" if @doc.nil? && @strict
- @callback = block
-
- # Determine host language
- # XXX - right now only XHTML defined
- @host_language = case @doc.root.attributes["version"].to_s
- when /XHTML+RDFa/ then :xhtml
- end
-
- # If none found, assume xhtml
- @host_language ||= :xhtml
-
- @host_defaults = {}
- @host_defaults = case @host_language
- when :xhtml
- {
- :vocabulary => RDF::XHV["uri"],
- :prefix => "xhv",
- :term_mappings => %w(
- alternate appendix bookmark cite chapter contents copyright first glossary help icon index
- last license meta next p3pv1 prev role section stylesheet subsection start top up
- ).inject({}) { |hash, term| hash[term] = RDF::XHV[term]; hash },
- }
- else
- {}
- end
-
- # parse
- parse_whole_document(@doc, @base_uri)
-
+ raise RDF::ReaderError, "Synax errors:\n#{@doc.errors}" if !@doc.errors.empty? && @strict
+ raise RDF::ReaderError, "Empty document" if (@doc.nil? || @doc.root.nil?) && @strict
block.call(self) if block_given?
+ end
end
# XXX Invoke the parser, and allow add_triple to make the callback?
##
@@ -183,11 +131,38 @@
#
# @yield [statement]
# @yieldparam [RDF::Statement] statement
# @return [void]
def each_statement(&block)
- @graph.each_statement(&block)
+ @callback = block
+
+ # Determine host language
+ # XXX - right now only XHTML defined
+ @host_language = case @doc.root.attributes["version"].to_s
+ when /XHTML+RDFa/ then :xhtml
+ end
+
+ # If none found, assume xhtml
+ @host_language ||= :xhtml
+
+ @host_defaults = {}
+ @host_defaults = case @host_language
+ when :xhtml
+ {
+ :vocabulary => RDF::XHV.to_s,
+ :prefix => "xhv",
+ :term_mappings => %w(
+ alternate appendix bookmark cite chapter contents copyright first glossary help icon index
+ last license meta next p3pv1 prev role section stylesheet subsection start top up
+ ).inject({}) { |hash, term| hash[term] = RDF::XHV[term]; hash },
+ }
+ else
+ {}
+ end
+
+ # parse
+ parse_whole_document(@doc, @base_uri)
end
##
# Iterates the given block for each RDF triple in the input.
#
@@ -195,11 +170,13 @@
# @yieldparam [RDF::Resource] subject
# @yieldparam [RDF::URI] predicate
# @yieldparam [RDF::Value] object
# @return [void]
def each_triple(&block)
- @graph.each_triple(&block)
+ each_statement do |statement|
+ block.call(*statement.to_triple)
+ end
end
private
# Figure out the document path, if it is a Nokogiri::XML::Element or Attribute
@@ -225,21 +202,15 @@
# @param [Nokogiri::XML::Node, any] node:: XML Node or string for showing context
# @param [URI, BNode] subject:: the subject of the statement
# @param [URI] predicate:: the predicate of the statement
# @param [URI, BNode, Literal] object:: the object of the statement
# @return [Statement]:: Added statement
- # @raise [Exception]:: Checks parameter types and raises if they are incorrect if parsing mode is _strict_.
+ # @raise [ReaderError]:: Checks parameter types and raises if they are incorrect if parsing mode is _strict_.
def add_triple(node, subject, predicate, object)
statement = RDF::Statement.new(subject, predicate, object)
add_debug(node, "statement: #{statement}")
- @graph << statement
- statement
- # FIXME: rescue RdfException => e
- rescue Exception => e
- add_debug(node, "add_triple raised #{e.class}: #{e.message}")
- puts e.backtrace if $DEBUG
- raise if @strict
+ @callback.call(statement)
end
# Parsing an RDFa document (this is *not* the recursive method)
def parse_whole_document(doc, base)
@@ -281,61 +252,44 @@
:term_mappings => {}
}
um = @@vocabulary_cache[profile][:uri_mappings]
tm = @@vocabulary_cache[profile][:term_mappings]
add_debug(element, "extract_mappings: profile open <#{profile}>")
- require 'patron' unless defined?(Patron)
- sess = Patron::Session.new
- sess.timeout = 10
- resp = sess.get(profile)
- raise RuntimeError, "HTTP returned status #{resp.status} when reading #{profile}" if resp.status >= 400
-
- # Parse profile, and extract mappings from graph
+
old_debug, old_verbose, = $DEBUG, $verbose
$DEBUG, $verbose = false, false
- p_graph = Parser.parse(resp.body, profile)
- ttl = p_graph.serialize(:format => :ttl) if @debug || $DEBUG
+ # FIXME: format shouldn't need to be specified here
+ p_graph = RDF::Graph.load(profile, :base_uri => profile, :format => :rdfa)
$DEBUG, $verbose = old_debug, old_verbose
- add_debug(element, ttl) if ttl
- p_graph.subjects.each do |subject|
- props = p_graph.properties(subject)
- #puts props.inspect
-
- # If one of the objects is not a Literal or if there are additional rdfa:uri or rdfa:term
- # predicates sharing the same subject, no mapping is created.
- uri = props[RDF::RDFA["uri"].to_s]
- term = props[RDF::RDFA["term"].to_s]
- prefix = props[RDF::RDFA["prefix"].to_s]
+ p_graph.each_subject do |subject|
+ # If one of the objects is not a Literal no mapping is created.
+ uri = p_graph.first_object([subject, RDF::RDFA['uri'], nil])
+ term = p_graph.first_object([subject, RDF::RDFA['term'], nil])
+ prefix = p_graph.first_object([subject, RDF::RDFA['prefix'], nil])
add_debug(element, "extract_mappings: uri=#{uri.inspect}, term=#{term.inspect}, prefix=#{prefix.inspect}")
next if !uri || (!term && !prefix)
- raise ParserException, "multi-valued rdf:uri" if uri.length != 1
- raise ParserException, "multi-valued rdf:term." if term && term.length != 1
- raise ParserException, "multi-valued rdf:prefix" if prefix && prefix.length != 1
+ raise RDF::ReaderError, "rdf:uri must be a Literal" unless uri.is_a?(RDF::Literal)
+ raise RDF::ReaderError, "rdf:term must be a Literal" unless term.nil? || term.is_a?(RDF::Literal)
+ raise RDF::ReaderError, "rdf:prefix must be a Literal" unless prefix.nil? || prefix.is_a?(RDF::Literal)
- uri = uri.first
- term = term.first if term
- prefix = prefix.first if prefix
- raise ParserException, "rdf:uri must be a Literal" unless uri.is_a?(Literal)
- raise ParserException, "rdf:term must be a Literal" unless term.nil? || term.is_a?(Literal)
- raise ParserException, "rdf:prefix must be a Literal" unless prefix.nil? || prefix.is_a?(Literal)
-
# For every extracted triple that is the common subject of an rdfa:prefix and an rdfa:uri
# predicate, create a mapping from the object literal of the rdfa:prefix predicate to the
# object literal of the rdfa:uri predicate. Add or update this mapping in the local list of
# URI mappings after transforming the 'prefix' component to lower-case.
# For every extracted
- um[prefix.to_s.downcase] = RDF::URI.new(uri) if prefix
+ um[prefix.value.downcase] = uri.value if prefix
# triple that is the common subject of an rdfa:term and an rdfa:uri predicate, create a
# mapping from the object literal of the rdfa:term predicate to the object literal of the
# rdfa:uri predicate. Add or update this mapping in the local term mappings.
- tm[term.to_s] = RDF::URI.new(uri) if term
+ tm[term.value] = RDF::URI.new(uri.value) if term
end
- rescue ParserException
- add_debug(element, "extract_mappings: profile subject #{subject.to_s}: #{e.message}")
- raise if @strict
+ # FIXME: subject isn't in scope here
+ #rescue RDF::ReaderError
+ # add_debug(element, "extract_mappings: profile subject #{subject.to_s}: #{e.message}")
+ # raise if @strict
rescue RuntimeError => e
add_debug(element, "extract_mappings: profile: #{e.message}")
raise if @strict
end
end
@@ -351,13 +305,12 @@
# and the URI is not processed in any way; in particular if it is a relative path it is
# not resolved against the current base.
element.namespaces.each do |attr_name, attr_value|
begin
abbr, prefix = attr_name.split(":")
- uri_mappings[prefix.to_s.downcase] = RDF::URI.new(attr_value) if abbr.downcase == "xmlns" && prefix
- # FIXME: rescue RdfException => e
- rescue Exception => e
+ uri_mappings[prefix.to_s.downcase] = attr_value.to_s if abbr.downcase == "xmlns" && prefix
+ rescue ReaderError => e
add_debug(element, "extract_mappings raised #{e.class}: #{e.message}")
raise if @strict
end
end
@@ -370,22 +323,22 @@
prefix, uri = mappings.shift.downcase, mappings.shift
#puts "uri_mappings prefix #{prefix} <#{uri}>"
next unless prefix.match(/:$/)
prefix.chop!
- uri_mappings[prefix] = RDF::URI.new(uri)
+ uri_mappings[prefix] = uri
end
- add_debug(element, "uri_mappings: #{uri_mappings.values.map{|ns|ns.to_s}.join(", ")}")
- add_debug(element, "term_mappings: #{term_mappings.keys.join(", ")}")
+ add_debug(element, "uri_mappings: #{uri_mappings.map{|k,v|"#{k}='#{v}'"}.join(", ")}")
+ add_debug(element, "term_mappings: #{term_mappings.map{|k,v|"#{k}='#{v}'"}.join(", ")}")
end
# The recursive helper function
def traverse(element, evaluation_context)
if element.nil?
add_debug(element, "traverse nil element")
- raise ParserException, "Can't parse nil element" if @strict
+ raise RDF::ReaderError, "Can't parse nil element" if @strict
return nil
end
add_debug(element, "traverse, ec: #{evaluation_context.inspect}")
@@ -426,11 +379,11 @@
unless vocab.nil?
default_vocabulary = if vocab.to_s.empty?
# Set default_vocabulary to host language default
@host_defaults.fetch(:voabulary, nil)
else
- RDF::URI.new(vocab)
+ vocab.to_s
end
add_debug(element, "[Step 2] traverse, default_vocaulary: #{default_vocabulary.inspect}")
end
# Local term mappings [7.5 Steps 3 & 4]
@@ -536,11 +489,11 @@
if new_subject and typeof
# Typeof is TERMorCURIEorURIs
types = process_uris(element, typeof, evaluation_context, :uri_mappings => uri_mappings, :term_mappings => term_mappings, :vocab => default_vocabulary)
add_debug(element, "typeof: #{typeof}")
types.each do |one_type|
- add_triple(element, new_subject, RDF_TYPE, one_type)
+ add_triple(element, new_subject, RDF.type, one_type)
end
end
# Generate triples with given object [Step 9]
if current_object_resource
@@ -679,11 +632,11 @@
# If it is a valid CURIE, the resulting URI is used; otherwise, the value will be processed as a URI.
uri = curie_to_resource_or_bnode(element, value, options[:uri_mappings], evaluation_context.parent_subject)
if uri
add_debug(element, "process_uri: #{value} => CURIE => <#{uri}>")
else
- #FIXME: uri = URIRef.new(value, evaluation_context.base)
+ ## FIXME: throw exception if there is no base uri set?
uri = RDF::URI.new(evaluation_context.base + value)
add_debug(element, "process_uri: #{value} => URI => <#{uri}>")
end
uri
end
@@ -701,11 +654,11 @@
# If the term is in the local term mappings, use the associated URI.
# XXX Spec Confusion: are terms always downcased? Or only for XHTML Vocab?
options[:term_mappings][value.to_s.downcase]
when options[:vocab]
# Otherwise, if there is a local default vocabulary the URI is obtained by concatenating that value and the term.
- options[:vocab].join(value)
+ RDF::URI.new(options[:vocab] + value)
else
# Finally, if there is no local default vocabulary, the term has no associated URI and must be ignored.
nil
end
end
@@ -715,27 +668,25 @@
# URI mappings for CURIEs default to XHV, rather than the default doc namespace
prefix, reference = curie.to_s.split(":")
# consider the bnode situation
if prefix == "_"
- # we force a non-nil name, otherwise it generates a new name
- # FIXME: BNode.new(reference || "", @named_bnodes)
- RDF::Node.new(reference || nil)
+ RDF::Node.new(reference)
elsif curie.to_s.match(/^:/)
# Default prefix
if uri_mappings[""]
- uri_mappings[""].join(reference)
+ RDF::URI.new(uri_mappings[""] + reference)
elsif @host_defaults[:prefix]
- @host_defaults[:prefix].join(reference)
+ RDF::URI.new(@host_defaults[:prefix] + reference)
end
elsif !curie.to_s.match(/:/)
# No prefix, undefined (in this context, it is evaluated as a term elsewhere)
nil
else
# Prefixes always downcased
ns = uri_mappings[prefix.to_s.downcase]
if ns
- ns.join(reference)
+ RDF::URI.new(ns +reference)
else
add_debug(element, "curie_to_resource_or_bnode No namespace mapping for #{prefix.downcase}")
nil
end
end
\ No newline at end of file