lib/rdf/trig/reader.rb in rdf-trig-1.0.0.beta3 vs lib/rdf/trig/reader.rb in rdf-trig-1.0.0
- old
+ new
@@ -8,38 +8,219 @@
# Leverages the Turtle reader
class Reader < RDF::Turtle::Reader
format Format
include RDF::TriG::Meta
+ # Terminals passed to lexer. Order matters!
+ terminal(:ANON, ANON) do |prod, token, input|
+ input[:resource] = self.bnode
+ end
+ terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input|
+ input[:resource] = self.bnode(token.value[2..-1])
+ end
+ terminal(:IRIREF, IRIREF, :unescape => true) do |prod, token, input|
+ input[:resource] = process_iri(token.value[1..-2])
+ end
+ terminal(:DOUBLE, DOUBLE) do |prod, token, input|
+ # Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
+ # zero if necessary
+ value = token.value.sub(/\.([eE])/, '.0\1')
+ input[:resource] = literal(value, :datatype => RDF::XSD.double)
+ end
+ terminal(:DECIMAL, DECIMAL) do |prod, token, input|
+ # Note that a Turtle Decimal may begin with a '.', so tack on a leading
+ # zero if necessary
+ value = token.value
+ value = "0#{token.value}" if token.value[0,1] == "."
+ input[:resource] = literal(value, :datatype => RDF::XSD.decimal)
+ end
+ terminal(:INTEGER, INTEGER) do |prod, token, input|
+ input[:resource] = literal(token.value, :datatype => RDF::XSD.integer)
+ end
+ # Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
+ terminal(:PNAME_LN, PNAME_LN, :unescape => true) do |prod, token, input|
+ prefix, suffix = token.value.split(":", 2)
+ input[:resource] = pname(prefix, suffix)
+ end
+ # Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
+ terminal(:PNAME_NS, PNAME_NS) do |prod, token, input|
+ prefix = token.value[0..-2]
+
+ # Two contexts, one when prefix is being defined, the other when being used
+ case prod
+ when :prefixID, :sparqlPrefix
+ input[:prefix] = prefix
+ else
+ input[:resource] = pname(prefix, '')
+ end
+ end
+ terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, :unescape => true) do |prod, token, input|
+ input[:string_value] = token.value[3..-4]
+ end
+ terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, :unescape => true) do |prod, token, input|
+ input[:string_value] = token.value[3..-4]
+ end
+ terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, :unescape => true) do |prod, token, input|
+ input[:string_value] = token.value[1..-2]
+ end
+ terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, :unescape => true) do |prod, token, input|
+ input[:string_value] = token.value[1..-2]
+ end
+
# String terminals
terminal(nil, %r([\{\}\(\),.;\[\]a]|\^\^|@base|@prefix|true|false)) do |prod, token, input|
case token.value
when 'a' then input[:resource] = RDF.type
when 'true', 'false' then input[:resource] = RDF::Literal::Boolean.new(token.value)
when '@base', '@prefix' then input[:lang] = token.value[1..-1]
else input[:string] = token.value
end
end
+ terminal(:LANGTAG, LANGTAG) do |prod, token, input|
+ input[:lang] = token.value[1..-1]
+ end
+
+ terminal(:SPARQL_PREFIX, SPARQL_PREFIX) do |prod, token, input|
+ input[:string_value] = token.value.downcase
+ end
+ terminal(:SPARQL_BASE, SPARQL_BASE) do |prod, token, input|
+ input[:string_value] = token.value.downcase
+ end
+
# Productions
# [3g] graph defines the basic creation of context
start_production(:graph) do |input, current, callback|
callback.call(:context, "graph", nil)
end
production(:graph) do |input, current, callback|
callback.call(:context, "graph", nil)
end
-
+
# [4g] graphIri
# Normally, just returns the IRIref, but if called from [3g], also
# sets the context for triples defined within that graph
production(:graphIri) do |input, current, callback|
# If input contains set_graph_iri, use the returned value to set @context
debug("graphIri") {"Set graph context to #{current[:resource]}"}
callback.call(:context, "graphIri", current[:resource])
end
-
+
+
+ # Productions
+ # [4] prefixID defines a prefix mapping
+ production(:prefixID) do |input, current, callback|
+ prefix = current[:prefix]
+ iri = current[:resource]
+ debug("prefixID") {"Defined prefix #{prefix.inspect} mapping to #{iri.inspect}"}
+ prefix(prefix, iri)
+ end
+
+ # [5] base set base_uri
+ production(:base) do |input, current, callback|
+ iri = current[:resource]
+ debug("base") {"Defined base as #{iri}"}
+ options[:base_uri] = iri
+ end
+
+ # [28s] sparqlPrefix ::= [Pp][Rr][Ee][Ff][Ii][Xx] PNAME_NS IRIREF
+ production(:sparqlPrefix) do |input, current, callback|
+ prefix = current[:prefix]
+ iri = current[:resource]
+ debug("sparqlPrefix") {"Defined prefix #{prefix.inspect} mapping to #{iri.inspect}"}
+ prefix(prefix, iri)
+ end
+
+ # [29s] sparqlBase ::= [Bb][Aa][Ss][Ee] IRIREF
+ production(:sparqlBase) do |input, current, callback|
+ iri = current[:resource]
+ debug("base") {"Defined base as #{iri}"}
+ options[:base_uri] = iri
+ end
+
+ # [6] triples
+ start_production(:triples) do |input, current, callback|
+ # Note production as triples for blankNodePropertyList
+ # to set :subject instead of :resource
+ current[:triples] = true
+ end
+ production(:triples) do |input, current, callback|
+ # Note production as triples for blankNodePropertyList
+ # to set :subject instead of :resource
+ current[:triples] = true
+ end
+
+ # [9] verb ::= predicate | "a"
+ production(:verb) do |input, current, callback|
+ input[:predicate] = current[:resource]
+ end
+
+ # [10] subject ::= IRIref | BlankNode | collection
+ start_production(:subject) do |input, current, callback|
+ current[:triples] = nil
+ end
+
+ production(:subject) do |input, current, callback|
+ input[:subject] = current[:resource]
+ end
+
+ # [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal
+ production(:object) do |input, current, callback|
+ if input[:object_list]
+ # Part of an rdf:List collection
+ input[:object_list] << current[:resource]
+ else
+ debug("object") {"current: #{current.inspect}"}
+ callback.call(:statement, "object", input[:subject], input[:predicate], current[:resource])
+ end
+ end
+
+ # [14] blankNodePropertyList ::= "[" predicateObjectList "]"
+ start_production(:blankNodePropertyList) do |input, current, callback|
+ current[:subject] = self.bnode
+ end
+
+ production(:blankNodePropertyList) do |input, current, callback|
+ if input[:triples]
+ input[:subject] = current[:subject]
+ else
+ input[:resource] = current[:subject]
+ end
+ end
+
+ # [15] collection ::= "(" object* ")"
+ start_production(:collection) do |input, current, callback|
+ # Tells the object production to collect and not generate statements
+ current[:object_list] = []
+ end
+
+ production(:collection) do |input, current, callback|
+ # Create an RDF list
+ bnode = self.bnode
+ objects = current[:object_list]
+ list = RDF::List.new(bnode, nil, objects)
+ list.each_statement do |statement|
+ # Spec Confusion, referenced section "Collection" is missing from the spec.
+ # Anicdodal evidence indicates that some expect each node to be of type rdf:list,
+ # but existing Notation3 and Turtle tests (http://www.w3.org/2001/sw/DataAccess/df1/tests/manifest.ttl) do not.
+ next if statement.predicate == RDF.type && statement.object == RDF.List
+ callback.call(:statement, "collection", statement.subject, statement.predicate, statement.object)
+ end
+ bnode = RDF.nil if list.empty?
+
+ # Return bnode as resource
+ input[:resource] = bnode
+ end
+
+ # [16] RDFLiteral ::= String ( LanguageTag | ( "^^" IRIref ) )?
+ production(:RDFLiteral) do |input, current, callback|
+ opts = {}
+ opts[:datatype] = current[:resource] if current[:resource]
+ opts[:language] = current[:lang] if current[:lang]
+ input[:resource] = literal(current[:string_value], opts)
+ end
+
##
# Iterates the given block for each RDF statement in the input.
#
# @yield [statement]
# @yieldparam [RDF::Statement] statement
@@ -47,28 +228,40 @@
def each_statement(&block)
@callback = block
parse(@input, START.to_sym, @options.merge(:branch => BRANCH,
:first => FIRST,
- :follow => FOLLOW)
+ :follow => FOLLOW,
+ :reset_on_start => true)
) do |context, *data|
- loc = data.shift
case context
when :context
- @context = data[0]
+ @context = data[1]
when :statement
data << @context if @context
debug("each_statement") {"data: #{data.inspect}, context: #{@context.inspect}"}
+ loc = data.shift
add_statement(loc, RDF::Statement.from(data))
when :trace
- debug(loc, *data)
+ level, lineno, depth, *args = data
+ message = "#{args.join(': ')}"
+ d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
+ str = "[#{lineno}](#{level})#{d_str}#{message}"
+ case @options[:debug]
+ when Array
+ @options[:debug] << str
+ when TrueClass
+ $stderr.puts str
+ when Integer
+ $stderr.puts(str) if level <= @options[:debug]
+ end
end
end
rescue EBNF::LL1::Parser::Error => e
debug("Parsing completed with errors:\n\t#{e.message}")
raise RDF::ReaderError, e.message if validate?
end
-
+
##
# Iterates the given block for each RDF quad in the input.
#
# @yield [subject, predicate, object, context]
# @yieldparam [RDF::Resource] subject