lib/rdf/turtle/reader.rb in rdf-turtle-1.0.0 vs lib/rdf/turtle/reader.rb in rdf-turtle-1.0.2
- old
+ new
@@ -1,205 +1,210 @@
require 'rdf/turtle/meta'
-require 'rdf/ll1/parser'
+require 'ebnf/ll1/parser'
module RDF::Turtle
##
# A parser for the Turtle 2
class Reader < RDF::Reader
format Format
include RDF::Turtle::Meta
- include RDF::LL1::Parser
+ include EBNF::LL1::Parser
include RDF::Turtle::Terminals
# Terminals passed to lexer. Order matters!
- terminal(:ANON, ANON) do |reader, prod, token, input|
- input[:resource] = reader.bnode
+ terminal(:ANON, ANON) do |prod, token, input|
+ input[:resource] = self.bnode
end
- terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |reader, prod, token, input|
- input[:resource] = reader.bnode(token.value[2..-1])
+ terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input|
+ input[:resource] = self.bnode(token.value[2..-1])
end
- terminal(:IRIREF, IRIREF, :unescape => true) do |reader, prod, token, input|
+ terminal(:IRIREF, IRIREF, :unescape => true) do |prod, token, input|
begin
- input[:resource] = reader.process_iri(token.value[1..-2])
+ input[:resource] = process_iri(token.value[1..-2])
rescue ArgumentError => e
raise RDF::ReaderError, e.message
end
end
- terminal(:DOUBLE, DOUBLE) do |reader, prod, token, input|
+ terminal(:DOUBLE, DOUBLE) do |prod, token, input|
# Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
# zero if necessary
value = token.value.sub(/\.([eE])/, '.0\1')
- input[:resource] = reader.literal(value, :datatype => RDF::XSD.double)
+ input[:resource] = literal(value, :datatype => RDF::XSD.double)
end
- terminal(:DECIMAL, DECIMAL) do |reader, prod, token, input|
+ terminal(:DECIMAL, DECIMAL) do |prod, token, input|
# Note that a Turtle Decimal may begin with a '.', so tack on a leading
# zero if necessary
value = token.value
value = "0#{token.value}" if token.value[0,1] == "."
- input[:resource] = reader.literal(value, :datatype => RDF::XSD.decimal)
+ input[:resource] = literal(value, :datatype => RDF::XSD.decimal)
end
- terminal(:INTEGER, INTEGER) do |reader, prod, token, input|
- input[:resource] = reader.literal(token.value, :datatype => RDF::XSD.integer)
+ terminal(:INTEGER, INTEGER) do |prod, token, input|
+ input[:resource] = literal(token.value, :datatype => RDF::XSD.integer)
end
# Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
- terminal(:PNAME_LN, PNAME_LN, :unescape => true) do |reader, prod, token, input|
+ terminal(:PNAME_LN, PNAME_LN, :unescape => true) do |prod, token, input|
prefix, suffix = token.value.split(":", 2)
- input[:resource] = reader.pname(prefix, suffix)
+ input[:resource] = pname(prefix, suffix)
end
# Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
- terminal(:PNAME_NS, PNAME_NS) do |reader, prod, token, input|
+ terminal(:PNAME_NS, PNAME_NS) do |prod, token, input|
prefix = token.value[0..-2]
# Two contexts, one when prefix is being defined, the other when being used
case prod
when :prefixID, :sparqlPrefix
input[:prefix] = prefix
else
- input[:resource] = reader.pname(prefix, '')
+ input[:resource] = pname(prefix, '')
end
end
- terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, :unescape => true) do |reader, prod, token, input|
+ terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, :unescape => true) do |prod, token, input|
input[:string_value] = token.value[3..-4]
end
- terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, :unescape => true) do |reader, prod, token, input|
+ terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, :unescape => true) do |prod, token, input|
input[:string_value] = token.value[3..-4]
end
- terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, :unescape => true) do |reader, prod, token, input|
+ terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, :unescape => true) do |prod, token, input|
input[:string_value] = token.value[1..-2]
end
- terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, :unescape => true) do |reader, prod, token, input|
+ terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, :unescape => true) do |prod, token, input|
input[:string_value] = token.value[1..-2]
end
# String terminals
- terminal(nil, %r([\(\),.;\[\]a]|\^\^|@base|@prefix|true|false)) do |reader, prod, token, input|
+ terminal(nil, %r([\(\),.;\[\]a]|\^\^|@base|@prefix|true|false)) do |prod, token, input|
case token.value
when 'a' then input[:resource] = RDF.type
when 'true', 'false' then input[:resource] = RDF::Literal::Boolean.new(token.value)
when '@base', '@prefix' then input[:lang] = token.value[1..-1]
else input[:string] = token.value
end
end
- terminal(:LANGTAG, LANGTAG) do |reader, prod, token, input|
+ terminal(:LANGTAG, LANGTAG) do |prod, token, input|
input[:lang] = token.value[1..-1]
end
- terminal(:SPARQL_PREFIX, SPARQL_PREFIX) do |reader, prod, token, input|
+ terminal(:SPARQL_PREFIX, SPARQL_PREFIX) do |prod, token, input|
input[:string_value] = token.value.downcase
end
- terminal(:SPARQL_BASE, SPARQL_BASE) do |reader, prod, token, input|
+ terminal(:SPARQL_BASE, SPARQL_BASE) do |prod, token, input|
input[:string_value] = token.value.downcase
end
# Productions
# [4] prefixID defines a prefix mapping
- production(:prefixID) do |reader, phase, input, current, callback|
- next unless phase == :finish
+ production(:prefixID) do |input, current, callback|
prefix = current[:prefix]
iri = current[:resource]
callback.call(:trace, "prefixID", lambda {"Defined prefix #{prefix.inspect} mapping to #{iri.inspect}"})
- reader.prefix(prefix, iri)
+ prefix(prefix, iri)
end
# [5] base set base_uri
- production(:base) do |reader, phase, input, current, callback|
- next unless phase == :finish
+ production(:base) do |input, current, callback|
iri = current[:resource]
callback.call(:trace, "base", lambda {"Defined base as #{iri}"})
- reader.options[:base_uri] = iri
+ options[:base_uri] = iri
end
# [28s] sparqlPrefix ::= [Pp][Rr][Ee][Ff][Ii][Xx] PNAME_NS IRIREF
- production(:sparqlPrefix) do |reader, phase, input, current, callback|
- next unless phase == :finish
+ production(:sparqlPrefix) do |input, current, callback|
prefix = current[:prefix]
iri = current[:resource]
callback.call(:trace, "sparqlPrefix", lambda {"Defined prefix #{prefix.inspect} mapping to #{iri.inspect}"})
- reader.prefix(prefix, iri)
+ prefix(prefix, iri)
end
# [29s] sparqlBase ::= [Bb][Aa][Ss][Ee] IRIREF
- production(:sparqlBase) do |reader, phase, input, current, callback|
- next unless phase == :finish
+ production(:sparqlBase) do |input, current, callback|
iri = current[:resource]
callback.call(:trace, ":sparqlBase", lambda {"Defined base as #{iri}"})
- reader.options[:base_uri] = iri
+ options[:base_uri] = iri
end
# [6] triples
- production(:triples) do |reader, phase, input, current, callback|
+ start_production(:triples) do |input, current, callback|
# Note production as triples for blankNodePropertyList
# to set :subject instead of :resource
current[:triples] = true
end
+ production(:triples) do |input, current, callback|
+ # Note production as triples for blankNodePropertyList
+ # to set :subject instead of :resource
+ current[:triples] = true
+ end
# [9] verb ::= predicate | "a"
- production(:verb) do |reader, phase, input, current, callback|
- input[:predicate] = current[:resource] if phase == :finish
+ production(:verb) do |input, current, callback|
+ input[:predicate] = current[:resource]
end
# [10] subject ::= IRIref | BlankNode | collection
- production(:subject) do |reader, phase, input, current, callback|
- current[:triples] = nil if phase == :start
- input[:subject] = current[:resource] if phase == :finish
+ start_production(:subject) do |input, current, callback|
+ current[:triples] = nil
end
+ production(:subject) do |input, current, callback|
+ input[:subject] = current[:resource]
+ end
+
# [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal
- production(:object) do |reader, phase, input, current, callback|
- next unless phase == :finish
+ production(:object) do |input, current, callback|
if input[:object_list]
# Part of an rdf:List collection
input[:object_list] << current[:resource]
else
callback.call(:trace, "object", lambda {"current: #{current.inspect}"})
callback.call(:statement, "object", input[:subject], input[:predicate], current[:resource])
end
end
# [14] blankNodePropertyList ::= "[" predicateObjectList "]"
- production(:blankNodePropertyList) do |reader, phase, input, current, callback|
- if phase == :start
- current[:subject] = reader.bnode
- elsif input[:triples]
+ start_production(:blankNodePropertyList) do |input, current, callback|
+ current[:subject] = self.bnode
+ end
+
+ production(:blankNodePropertyList) do |input, current, callback|
+ if input[:triples]
input[:subject] = current[:subject]
else
input[:resource] = current[:subject]
end
end
# [15] collection ::= "(" object* ")"
- production(:collection) do |reader, phase, input, current, callback|
- if phase == :start
- current[:object_list] = [] # Tells the object production to collect and not generate statements
- else
- # Create an RDF list
- bnode = reader.bnode
- objects = current[:object_list]
- list = RDF::List.new(bnode, nil, objects)
- list.each_statement do |statement|
- # Spec Confusion, referenced section "Collection" is missing from the spec.
- # Anicdodal evidence indicates that some expect each node to be of type rdf:list,
- # but existing Notation3 and Turtle tests (http://www.w3.org/2001/sw/DataAccess/df1/tests/manifest.ttl) do not.
- next if statement.predicate == RDF.type && statement.object == RDF.List
- callback.call(:statement, "collection", statement.subject, statement.predicate, statement.object)
- end
- bnode = RDF.nil if list.empty?
-
- # Return bnode as resource
- input[:resource] = bnode
+ start_production(:collection) do |input, current, callback|
+ # Tells the object production to collect and not generate statements
+ current[:object_list] = []
+ end
+
+ production(:collection) do |input, current, callback|
+ # Create an RDF list
+ bnode = self.bnode
+ objects = current[:object_list]
+ list = RDF::List.new(bnode, nil, objects)
+ list.each_statement do |statement|
+ # Spec Confusion, referenced section "Collection" is missing from the spec.
+ # Anicdodal evidence indicates that some expect each node to be of type rdf:list,
+ # but existing Notation3 and Turtle tests (http://www.w3.org/2001/sw/DataAccess/df1/tests/manifest.ttl) do not.
+ next if statement.predicate == RDF.type && statement.object == RDF.List
+ callback.call(:statement, "collection", statement.subject, statement.predicate, statement.object)
end
+ bnode = RDF.nil if list.empty?
+
+ # Return bnode as resource
+ input[:resource] = bnode
end
# [16] RDFLiteral ::= String ( LanguageTag | ( "^^" IRIref ) )?
- production(:RDFLiteral) do |reader, phase, input, current, callback|
- next unless phase == :finish
+ production(:RDFLiteral) do |input, current, callback|
opts = {}
opts[:datatype] = current[:resource] if current[:resource]
opts[:language] = current[:lang] if current[:lang]
- input[:resource] = reader.literal(current[:string_value], opts)
+ input[:resource] = literal(current[:string_value], opts)
end
##
# Initializes a new reader instance.
#
@@ -227,11 +232,15 @@
# @option options [Boolean] :debug
# Detailed debug output
# @return [RDF::Turtle::Reader]
def initialize(input = nil, options = {}, &block)
super do
- @options = {:anon_base => "b0", :validate => false}.merge(options)
+ @options = {
+ :anon_base => "b0",
+ :validate => false,
+ :debug => RDF::Turtle.debug?,
+ }.merge(options)
@options = {:prefixes => {nil => ""}}.merge(@options) unless @options[:validate]
debug("base IRI") {base_uri.inspect}
debug("validate") {validate?.inspect}
@@ -270,11 +279,11 @@
add_statement(loc, RDF::Statement.from(data))
when :trace
debug(loc, *(data.dup << {:level => 0}))
end
end
- rescue ArgumentError, RDF::LL1::Parser::Error => e
+ rescue ArgumentError, EBNF::LL1::Parser::Error => e
progress("Parsing completed with errors:\n\t#{e.message}")
raise RDF::ReaderError, e.message if validate?
end
##
@@ -296,12 +305,12 @@
# @param [Nokogiri::XML::Node, any] node XML Node or string for showing context
# @param [RDF::Statement] statement the subject of the statement
# @return [RDF::Statement] Added statement
# @raise [RDF::ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
def add_statement(node, statement)
- error(node, "Statement is invalid: #{statement.inspect}") unless statement.valid?
- progress(node) {"generate statement: #{statement}"}
+ error(node, "Statement is invalid: #{statement.inspect.inspect}") unless statement.valid?
+ progress(node) {"generate statement: #{statement.to_ntriples}"}
@callback.call(statement)
end
def process_iri(iri)
iri(base_uri, iri)
@@ -374,21 +383,25 @@
# @overload debug(message)
# @param [String] message ("")
#
# @yieldreturn [String] added to message
def debug(*args)
- return unless @options[:debug] || RDF::Turtle.debug?
+ return unless @options[:debug]
options = args.last.is_a?(Hash) ? args.pop : {}
debug_level = options.fetch(:level, 1)
return unless debug_level <= DEBUG_LEVEL
depth = options[:depth] || self.depth
message = args.pop
message = message.call if message.is_a?(Proc)
args << message if message
args << yield if block_given?
message = "#{args.join(': ')}"
str = "[#{@lineno}]#{' ' * depth}#{message}"
- @options[:debug] << str if @options[:debug].is_a?(Array)
- $stderr.puts(str) if RDF::Turtle.debug?
+ case @options[:debug]
+ when Array
+ @options[:debug] << str
+ when TrueClass
+ $stderr.puts str
+ end
end
end # class Reader
end # module RDF::Turtle