lib/rdf/turtle/reader.rb in rdf-turtle-1.1.7 vs lib/rdf/turtle/reader.rb in rdf-turtle-1.1.8
- old
+ new
@@ -1,206 +1,47 @@
-require 'rdf/turtle/meta'
-require 'ebnf/ll1/parser'
+# coding: utf-8
+require 'ebnf/ll1/lexer'
module RDF::Turtle
##
# A parser for the Turtle 2
class Reader < RDF::Reader
format Format
- include RDF::Turtle::Meta
include EBNF::LL1::Parser
include RDF::Turtle::Terminals
# Terminals passed to lexer. Order matters!
- terminal(:ANON, ANON) do |prod, token, input|
- input[:resource] = self.bnode
- end
- terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input|
- input[:resource] = self.bnode(token.value[2..-1])
- end
- terminal(:IRIREF, IRIREF, unescape: true) do |prod, token, input|
- input[:resource] = process_iri(token.value[1..-2])
- end
- terminal(:DOUBLE, DOUBLE) do |prod, token, input|
- # Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
- # zero if necessary
- value = token.value.sub(/\.([eE])/, '.0\1')
- input[:resource] = literal(value, datatype: RDF::XSD.double)
- end
- terminal(:DECIMAL, DECIMAL) do |prod, token, input|
- # Note that a Turtle Decimal may begin with a '.', so tack on a leading
- # zero if necessary
- value = token.value
- value = "0#{token.value}" if token.value[0,1] == "."
- input[:resource] = literal(value, datatype: RDF::XSD.decimal)
- end
- terminal(:INTEGER, INTEGER) do |prod, token, input|
- input[:resource] = literal(token.value, datatype: RDF::XSD.integer)
- end
- # Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
- terminal(:PNAME_LN, PNAME_LN, unescape: true) do |prod, token, input|
- prefix, suffix = token.value.split(":", 2)
- input[:resource] = pname(prefix, suffix)
- end
- # Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
- terminal(:PNAME_NS, PNAME_NS) do |prod, token, input|
- prefix = token.value[0..-2]
-
- # Two contexts, one when prefix is being defined, the other when being used
- case prod
- when :prefixID, :sparqlPrefix
- input[:prefix] = prefix
- else
- input[:resource] = pname(prefix, '')
- end
- end
- terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, unescape: true) do |prod, token, input|
- input[:string_value] = token.value[3..-4]
- end
- terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true) do |prod, token, input|
- input[:string_value] = token.value[3..-4]
- end
- terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true) do |prod, token, input|
- input[:string_value] = token.value[1..-2]
- end
- terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true) do |prod, token, input|
- input[:string_value] = token.value[1..-2]
- end
+ terminal(:ANON, ANON)
+ terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL)
+ terminal(:IRIREF, IRIREF, unescape: true)
+ terminal(:DOUBLE, DOUBLE)
+ terminal(:DECIMAL, DECIMAL)
+ terminal(:INTEGER, INTEGER)
+ terminal(:PNAME_LN, PNAME_LN, unescape: true)
+ terminal(:PNAME_NS, PNAME_NS)
+ terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, unescape: true)
+ terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true)
+ terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true)
+ terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true)
# String terminals
- terminal(nil, %r([\(\),.;\[\]Aa]|\^\^|true|false)) do |prod, token, input|
- case token.value
- when 'A', 'a' then input[:resource] = RDF.type
- when 'true', 'false' then input[:resource] = RDF::Literal::Boolean.new(token.value)
- when '@base', '@prefix' then input[:lang] = token.value[1..-1]
- when '.' then input[:terminated] = true
- else input[:string] = token.value
- end
- end
+ terminal(nil, %r([\(\),.;\[\]Aa]|\^\^|true|false))
- terminal(:PREFIX, PREFIX) do |prod, token, input|
- input[:string_value] = token.value
- end
- terminal(:BASE, BASE) do |prod, token, input|
- input[:string_value] = token.value
- end
+ terminal(:PREFIX, PREFIX)
+ terminal(:BASE, BASE)
+ terminal(:LANGTAG, LANGTAG)
- terminal(:LANGTAG, LANGTAG) do |prod, token, input|
- input[:lang] = token.value[1..-1]
- end
+ ##
+ # Accumulated errors found during processing
+ # @return [Array<String>]
+ attr_reader :errors
- # Productions
- # [4] prefixID defines a prefix mapping
- production(:prefixID) do |input, current, callback|
- prefix = current[:prefix]
- iri = current[:resource]
- lexical = current[:string_value]
- terminated = current[:terminated]
- debug("prefixID") {"Defined prefix #{prefix.inspect} mapping to #{iri.inspect}"}
- if lexical.start_with?('@') && lexical != '@prefix'
- error(:prefixID, "should be downcased")
- elsif lexical == '@prefix'
- error(:prefixID, "directive not terminated") unless terminated
- else
- error(:prefixID, "directive should not be terminated") if terminated
- end
- prefix(prefix, iri)
- end
-
- # [5] base set base_uri
- production(:base) do |input, current, callback|
- iri = current[:resource]
- lexical = current[:string_value]
- terminated = current[:terminated]
- debug("base") {"Defined base as #{iri}"}
- if lexical.start_with?('@') && lexical != '@base'
- error(:base, "should be downcased")
- elsif lexical == '@base'
- error(:base, "directive not terminated") unless terminated
- else
- error(:base, "directive should not be terminated") if terminated
- end
- options[:base_uri] = iri
- end
-
- # [6] triples
- start_production(:triples) do |input, current, callback|
- # Note production as triples for blankNodePropertyList
- # to set :subject instead of :resource
- current[:triples] = true
- end
- production(:triples) do |input, current, callback|
- # Note production as triples for blankNodePropertyList
- # to set :subject instead of :resource
- current[:triples] = true
- end
+ ##
+ # Accumulated warnings found during processing
+ # @return [Array<String>]
+ attr_reader :warnings
- # [9] verb ::= predicate | "a"
- production(:verb) do |input, current, callback|
- input[:predicate] = current[:resource]
- end
-
- # [10] subject ::= IRIref | BlankNode | collection
- start_production(:subject) do |input, current, callback|
- current[:triples] = nil
- end
-
- production(:subject) do |input, current, callback|
- input[:subject] = current[:resource]
- end
-
- # [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal
- production(:object) do |input, current, callback|
- if input[:object_list]
- # Part of an rdf:List collection
- input[:object_list] << current[:resource]
- else
- debug("object") {"current: #{current.inspect}"}
- callback.call(:statement, "object", input[:subject], input[:predicate], current[:resource])
- end
- end
-
- # [14] blankNodePropertyList ::= "[" predicateObjectList "]"
- start_production(:blankNodePropertyList) do |input, current, callback|
- current[:subject] = self.bnode
- end
-
- production(:blankNodePropertyList) do |input, current, callback|
- if input[:triples]
- input[:subject] = current[:subject]
- else
- input[:resource] = current[:subject]
- end
- end
-
- # [15] collection ::= "(" object* ")"
- start_production(:collection) do |input, current, callback|
- # Tells the object production to collect and not generate statements
- current[:object_list] = []
- end
-
- production(:collection) do |input, current, callback|
- # Create an RDF list
- objects = current[:object_list]
- list = RDF::List[*objects]
- list.each_statement do |statement|
- next if statement.predicate == RDF.type && statement.object == RDF.List
- callback.call(:statement, "collection", statement.subject, statement.predicate, statement.object)
- end
-
- # Return bnode as resource
- input[:resource] = list.subject
- end
-
- # [16] RDFLiteral ::= String ( LanguageTag | ( "^^" IRIref ) )?
- production(:RDFLiteral) do |input, current, callback|
- opts = {}
- opts[:datatype] = current[:resource] if current[:resource]
- opts[:language] = current[:lang] if current[:lang]
- input[:resource] = literal(current[:string_value], opts)
- end
-
##
# Redirect for Freebase Reader
#
# @private
def self.new(input = nil, options = {}, &block)
@@ -227,17 +68,17 @@
# the prefix mappings to use (for acessing intermediate parser productions)
# @option options [#to_s] :base_uri (nil)
# the base URI to use when resolving relative URIs (for acessing intermediate parser productions)
# @option options [#to_s] :anon_base ("b0")
# Basis for generating anonymous Nodes
- # @option options [Boolean] :resolve_uris (false)
- # Resolve prefix and relative IRIs, otherwise, when serializing the parsed SSE
- # as S-Expressions, use the original prefixed and relative URIs along with `base` and `prefix`
- # definitions.
# @option options [Boolean] :validate (false)
# whether to validate the parsed statements and values. If not validating,
# the parser will attempt to recover from errors.
+ # @option options [Array] :errors
+ # array for placing errors found when parsing
+ # @option options [Array] :warnings
+ # array for placing warnings found when parsing
# @option options [Boolean] :progress
# Show progress of parser productions
# @option options [Boolean, Integer, Array] :debug
# Detailed debug output. If set to an Integer, output is restricted
# to messages of that priority: `0` for errors, `1` for warnings,
@@ -253,10 +94,15 @@
anon_base: "b0",
validate: false,
whitespace: WS,
}.merge(options)
@options = {prefixes: {nil => ""}}.merge(@options) unless @options[:validate]
+ @errors = @options[:errors] || []
+ @warnings = @options[:warnings] || []
+ @depth = 0
+ @prod_stack = []
+
@options[:debug] ||= case
when RDF::Turtle.debug? then true
when @options[:progress] then 2
when @options[:validate] then 1
end
@@ -266,10 +112,12 @@
debug("validate") {validate?.inspect}
debug("canonicalize") {canonicalize?.inspect}
debug("intern") {intern?.inspect}
+ @lexer = EBNF::LL1::Lexer.new(input, self.class.patterns, @options)
+
if block_given?
case block.arity
when 0 then instance_eval(&block)
else block.call(self)
end
@@ -287,45 +135,32 @@
# @yield [statement]
# @yieldparam [RDF::Statement] statement
# @return [void]
def each_statement(&block)
if block_given?
+ @recovering = false
@callback = block
- parse(@input, START.to_sym, @options.merge(branch: BRANCH,
- first: FIRST,
- follow: FOLLOW,
- reset_on_start: true)
- ) do |context, *data|
- case context
- when :statement
- loc = data.shift
- s = RDF::Statement.from(data, lineno: lineno)
- add_statement(loc, s) unless !s.valid? && validate?
- when :trace
- level, lineno, depth, *args = data
- message = "#{args.join(': ')}"
- d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
- str = "[#{lineno}](#{level})#{d_str}#{message}"
- case @options[:debug]
- when Array
- @options[:debug] << str
- when TrueClass
- $stderr.puts str
- when Integer
- $stderr.puts(str) if level <= @options[:debug]
- end
+ begin
+ while (@lexer.first rescue true)
+ read_statement
end
+ rescue EBNF::LL1::Lexer::Error, SyntaxError, EOFError, Recovery
+ # Terminate loop if EOF found while recovering
end
+
+ if validate?
+ if !warnings.empty? && !@options[:warnings]
+ $stderr.puts "Warnings: #{warnings.join("\n")}"
+ end
+ if !errors.empty?
+ $stderr.puts "Errors: #{errors.join("\n")}" unless @options[:errors]
+ raise RDF::ReaderError, "Errors found during processing"
+ end
+ end
end
enum_for(:each_statement)
- rescue EBNF::LL1::Parser::Error, EBNF::LL1::Lexer::Error => e
- if validate?
- raise RDF::ReaderError.new(e.message, lineno: e.lineno, token: e.token)
- else
- $stderr.puts e.message
- end
end
##
# Iterates the given block for each RDF triple in the input.
#
@@ -343,30 +178,33 @@
enum_for(:each_triple)
end
# add a statement, object can be literal or URI or bnode
#
- # @param [Nokogiri::XML::Node, any] node XML Node or string for showing context
+ # @param [Symbol] production
# @param [RDF::Statement] statement the subject of the statement
# @return [RDF::Statement] Added statement
# @raise [RDF::ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
- def add_statement(node, statement)
- error(node, "Statement is invalid: #{statement.inspect.inspect}") if validate? && statement.invalid?
- progress(node) {"generate statement: #{statement.to_ntriples}"}
+ def add_statement(production, statement)
+ error("Statement is invalid: #{statement.inspect.inspect}", production: produciton) if validate? && statement.invalid?
@callback.call(statement) if statement.subject &&
statement.predicate &&
statement.object &&
(validate? ? statement.valid? : true)
end
# Process a URI against base
def process_iri(iri)
- value = base_uri.join(iri)
+ iri = iri.value[1..-2] if iri === :IRIREF
+ value = RDF::URI(iri)
+ value = base_uri.join(value) if value.relative?
value.validate! if validate?
value.canonicalize! if canonicalize?
value = RDF::URI.intern(value) if intern?
value
+ rescue ArgumentError => e
+ error("process_iri", e)
end
# Create a literal
def literal(value, options = {})
debug("literal") do
@@ -374,10 +212,12 @@
"options: #{options.inspect}, " +
"validate: #{validate?.inspect}, " +
"c14n?: #{canonicalize?.inspect}"
end
RDF::Literal.new(value, options.merge(validate: validate?, canonicalize: canonicalize?))
+ rescue ArgumentError => e
+ error("Argument Error #{e.message}", production: :literal, token: @lexer.first)
end
##
# Override #prefix to take a relative IRI
#
@@ -395,11 +235,11 @@
def pname(prefix, suffix)
# Prefixes must be defined, except special case for empty prefix being alias for current @base
if prefix(prefix)
base = prefix(prefix).to_s
elsif !prefix(prefix)
- error("pname", "undefined prefix #{prefix.inspect}")
+ error("undefined prefix", production: :pname, token: prefix)
base = ''
end
suffix = suffix.to_s.sub(/^\#/, "") if base.index("#")
debug("pname") {"base: '#{base}', suffix: '#{suffix}'"}
process_iri(base + suffix.to_s)
@@ -408,8 +248,446 @@
# Keep track of allocated BNodes
def bnode(value = nil)
return RDF::Node.new unless value
@bnode_cache ||= {}
@bnode_cache[value.to_s] ||= RDF::Node.new(value)
+ end
+
+ protected
+ # @return [void]
+ def read_statement
+ prod(:statement, %w{.}) do
+ error("read_statement", "Unexpected end of file") unless token = @lexer.first
+ case token.type
+ when :BASE, :PREFIX
+ read_directive || error("Failed to parse directive", production: :directive, token: token)
+ else
+ read_triples || error("Expected token", production: :statement, token: token)
+ if !@recovering || @lexer.first === '.'
+ # If recovering, we will have eaten the closing '.'
+ token = @lexer.shift
+ unless token && token.value == '.'
+ error("Expected '.' following triple", production: :statement, token: token)
+ end
+ end
+ end
+ end
+ end
+
+ # @return [void]
+ def read_directive
+ prod(:directive, %w{.}) do
+ token = @lexer.first
+ case token.type
+ when :BASE
+ prod(:base) do
+ @lexer.shift
+ terminated = token.value == '@base'
+ iri = @lexer.shift
+ error("Expected IRIREF", :production => :base, token: iri) unless iri === :IRIREF
+ @options[:base_uri] = process_iri(iri)
+ error("base", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@base'
+
+ if terminated
+ error("base", "Expected #{token} to be terminated") unless @lexer.first === '.'
+ @lexer.shift
+ elsif @lexer.first === '.'
+ error("base", "Expected #{token} not to be terminated")
+ else
+ true
+ end
+ end
+ when :PREFIX
+ prod(:prefixID, %w{.}) do
+ @lexer.shift
+ pfx, iri = @lexer.shift, @lexer.shift
+ terminated = token.value == '@prefix'
+ error("Expected PNAME_NS", :production => :prefix, token: pfx) unless pfx === :PNAME_NS
+ error("Expected IRIREF", :production => :prefix, token: iri) unless iri === :IRIREF
+ debug("prefixID") {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"}
+ prefix(pfx.value[0..-2], process_iri(iri))
+ error("prefixId", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@prefix'
+
+ if terminated
+ error("prefixID", "Expected #{token} to be terminated") unless @lexer.first === '.'
+ @lexer.shift
+ elsif @lexer.first === '.'
+ error("prefixID", "Expected #{token} not to be terminated")
+ else
+ true
+ end
+ end
+ end
+ end
+ end
+
+ # @return [Object] returns the last verb matched, or subject BNode on predicateObjectList?
+ def read_triples
+ prod(:triples, %w{.}) do
+ error("read_triples", "Unexpected end of file") unless token = @lexer.first
+ case token.type || token.value
+ when '['
+ # blankNodePropertyList predicateObjectList?
+ subject = read_blankNodePropertyList || error("Failed to parse blankNodePropertyList", production: :triples, token: @lexer.first)
+ read_predicateObjectList(subject) || subject
+ else
+ # subject predicateObjectList
+ subject = read_subject || error("Failed to parse subject", production: :triples, token: @lexer.first)
+ read_predicateObjectList(subject) || error("Expected predicateObjectList", production: :triples, token: @lexer.first)
+ end
+ end
+ end
+
+ # @param [RDF::Resource] subject
+ # @return [RDF::URI] the last matched verb
+ def read_predicateObjectList(subject)
+ prod(:predicateObjectList, %{;}) do
+ last_verb = nil
+ while verb = read_verb
+ last_verb = verb
+ prod(:_predicateObjectList_5) do
+ read_objectList(subject, verb) || error("Expected objectList", production: :predicateObjectList, token: @lexer.first)
+ end
+ break unless @lexer.first === ';'
+ @lexer.shift while @lexer.first === ';'
+ end
+ last_verb
+ end
+ end
+
+ # @return [RDF::Term] the last matched subject
+ def read_objectList(subject, predicate)
+ prod(:objectList, %{,}) do
+ last_object = nil
+ while object = prod(:_objectList_2) {read_object(subject, predicate)}
+ last_object = object
+ break unless @lexer.first === ','
+ @lexer.shift while @lexer.first === ','
+ end
+ last_object
+ end
+ end
+
+ # @return [RDF::URI]
+ def read_verb
+ error("read_verb", "Unexpected end of file") unless token = @lexer.first
+ case token.type || token.value
+ when 'a' then prod(:verb) {@lexer.shift && RDF.type}
+ else prod(:verb) {read_iri}
+ end
+ end
+
+ # @return [RDF::Resource]
+ def read_subject
+ prod(:subject) do
+ read_iri ||
+ read_BlankNode ||
+ read_collection ||
+ error( "Expected subject", production: :subject, token: @lexer.first)
+ end
+ end
+
+ # @return [void]
+ def read_object(subject = nil, predicate = nil)
+ prod(:object) do
+ if object = read_iri ||
+ read_BlankNode ||
+ read_collection ||
+ read_blankNodePropertyList ||
+ read_literal
+
+ add_statement(:object, RDF::Statement(subject, predicate, object)) if subject && predicate
+ object
+ end
+ end
+ end
+
+ # @return [RDF::Literal]
+ def read_literal
+ error("Unexpected end of file", production: :literal) unless token = @lexer.first
+ case token.type || token.value
+ when :INTEGER then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.integer)}
+ when :DECIMAL
+ prod(:litearl) do
+ value = @lexer.shift.value
+ value = "0#{value}" if value.start_with?(".")
+ literal(value, datatype: RDF::XSD.decimal)
+ end
+ when :DOUBLE then prod(:literal) {literal(@lexer.shift.value.sub(/\.([eE])/, '.0\1'), datatype: RDF::XSD.double)}
+ when "true", "false" then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.boolean)}
+ when :STRING_LITERAL_QUOTE, :STRING_LITERAL_SINGLE_QUOTE
+ prod(:literal) do
+ value = @lexer.shift.value[1..-2]
+ error("read_literal", "Unexpected end of file") unless token = @lexer.first
+ case token.type || token.value
+ when :LANGTAG
+ literal(value, language: @lexer.shift.value[1..-1].to_sym)
+ when '^^'
+ @lexer.shift
+ literal(value, datatype: read_iri)
+ else
+ literal(value)
+ end
+ end
+ when :STRING_LITERAL_LONG_QUOTE, :STRING_LITERAL_LONG_SINGLE_QUOTE
+ prod(:literal) do
+ value = @lexer.shift.value[3..-4]
+ error("read_literal", "Unexpected end of file") unless token = @lexer.first
+ case token.type || token.value
+ when :LANGTAG
+ literal(value, language: @lexer.shift.value[1..-1].to_sym)
+ when '^^'
+ @lexer.shift
+ literal(value, datatype: read_iri)
+ else
+ literal(value)
+ end
+ end
+ end
+ end
+
+ # @return [RDF::Node]
+ def read_blankNodePropertyList
+ token = @lexer.first
+ if token === '['
+ prod(:blankNodePropertyList, %{]}) do
+ @lexer.shift
+ progress("blankNodePropertyList") {"token: #{token.inspect}"}
+ node = bnode
+ read_predicateObjectList(node)
+ error("blankNodePropertyList", "Expected closing ']'") unless @lexer.first === ']'
+ @lexer.shift
+ node
+ end
+ end
+ end
+
+ # @return [RDF::Node]
+ def read_collection
+ if @lexer.first === '('
+ prod(:collection, %{)}) do
+ @lexer.shift
+ token = @lexer.first
+ progress("collection") {"token: #{token.inspect}"}
+ objects = []
+ while object = read_object
+ objects << object
+ end
+ list = RDF::List.new(nil, nil, objects)
+ list.each_statement do |statement|
+ add_statement("collection", statement)
+ end
+ error("collection", "Expected closing ')'") unless @lexer.first === ')'
+ @lexer.shift
+ list.subject
+ end
+ end
+ end
+
+ # @return [RDF::URI]
+ def read_iri
+ token = @lexer.first
+ case token && token.type
+ when :IRIREF then prod(:iri) {process_iri(@lexer.shift)}
+ when :PNAME_LN, :PNAME_NS then prod(:iri) {pname(*@lexer.shift.value.split(':', 2))}
+ end
+ end
+
+ # @return [RDF::Node]
+ def read_BlankNode
+ token = @lexer.first
+ case token && token.type
+ when :BLANK_NODE_LABEL then prod(:BlankNode) {bnode(@lexer.shift.value[2..-1])}
+ when :ANON then @lexer.shift && prod(:BlankNode) {bnode}
+ end
+ end
+
+ def prod(production, recover_to = [])
+ @prod_stack << {prod: production, recover_to: recover_to}
+ @depth += 1
+ @recovering = false
+ progress("#{production}(start)") {"token: #{@lexer.first.inspect}"}
+ yield
+ rescue EBNF::LL1::Lexer::Error, SyntaxError, Recovery => e
+ # Lexer encountered an illegal token or the parser encountered
+ # a terminal which is inappropriate for the current production.
+ # Perform error recovery to find a reasonable terminal based
+ # on the follow sets of the relevant productions. This includes
+ # remaining terms from the current production and the stacked
+ # productions
+ case e
+ when EBNF::LL1::Lexer::Error
+ @lexer.recover
+ begin
+ error("Lexer error", "With input '#{e.input}': #{e.message}",
+ production: production,
+ token: e.token)
+ rescue SyntaxError
+ end
+ end
+ raise EOFError, "End of input found when recovering" if @lexer.first.nil?
+ debug("recovery", "current token: #{@lexer.first.inspect}", :level => 4)
+
+ unless e.is_a?(Recovery)
+ # Get the list of follows for this sequence, this production and the stacked productions.
+ debug("recovery", "stack follows:", :level => 4)
+ @prod_stack.reverse.each do |prod|
+ debug("recovery", :level => 4) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"}
+ end
+ end
+
+ # Find all follows to the top of the stack
+ follows = @prod_stack.map {|prod| Array(prod[:recover_to])}.flatten.compact.uniq
+
+ # Skip tokens until one is found in follows
+ while (token = (@lexer.first rescue @lexer.recover)) && follows.none? {|t| token === t}
+ skipped = @lexer.shift
+ progress("recovery") {"skip #{skipped.inspect}"}
+ end
+ debug("recovery") {"found #{token.inspect} in follows"}
+
+ # Re-raise the error unless token is a follows of this production
+ raise Recovery unless Array(recover_to).any? {|t| token === t}
+
+ # Skip that token to get something reasonable to start the next production with
+ @lexer.shift
+ ensure
+ progress("#{production}(finish)")
+ @depth -= 1
+ @prod_stack.pop
+ end
+
+ ##
+ # Warning information, used as level `1` debug messages.
+ #
+ # @param [String] node Relevant location associated with message
+ # @param [String] message Error string
+ # @param [Hash] options
+ # @option options [URI, #to_s] :production
+ # @option options [Token] :token
+ # @see {#debug}
+ def warn(node, message, options = {})
+ m = "WARNING "
+ m += "[line: #{@lineno}] " if @lineno
+ m += message
+ m += " (found #{options[:token].inspect})" if options[:token]
+ m += ", production = #{options[:production].inspect}" if options[:production]
+ @warnings << m unless @recovering
+ debug(node, m, options.merge(:level => 1))
+ end
+
+ ##
+ # Error information, used as level `0` debug messages.
+ #
+ # @overload debug(node, message, options)
+ # @param [String] node Relevant location associated with message
+ # @param [String] message Error string
+ # @param [Hash] options
+ # @option options [URI, #to_s] :production
+ # @option options [Token] :token
+ # @see {#debug}
+ def error(*args)
+ return if @recovering
+ options = args.last.is_a?(Hash) ? args.pop : {}
+ lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
+ message = "#{args.join(': ')}"
+ m = "ERROR "
+ m += "[line: #{lineno}] " if lineno
+ m += message
+ m += " (found #{options[:token].inspect})" if options[:token]
+ m += ", production = #{options[:production].inspect}" if options[:production]
+ @recovering = true
+ @errors << m
+ debug(m, options.merge(level: 0))
+ raise SyntaxError.new(m, lineno: lineno, token: options[:token], production: options[:production])
+ end
+
+ ##
+ # Progress output when debugging.
+ #
+ # The call is ignored, unless `@options[:debug]` is set, in which
+ # case it records tracing information as indicated. Additionally,
+ # if `@options[:debug]` is an Integer, the call is aborted if the
+ # `:level` option is less than than `:level`.
+ #
+ # @overload debug(node, message, options)
+ # @param [Array<String>] args Relevant location associated with message
+ # @param [Hash] options
+ # @option options [Integer] :depth
+ # Recursion depth for indenting output
+ # @option options [Integer] :level
+ # Level assigned to message, by convention, level `0` is for
+ # errors, level `1` is for warnings, level `2` is for parser
+ # progress information, and anything higher is for various levels
+ # of debug information.
+ #
+ # @yieldparam [:trace] trace
+ # @yieldparam [Integer] level
+ # @yieldparam [Integer] lineno
+ # @yieldparam [Integer] depth Recursive depth of productions
+ # @yieldparam [Array<String>] args
+ # @yieldreturn [String] added to message
+ def debug(*args)
+ return unless @options[:debug]
+ options = args.last.is_a?(Hash) ? args.pop : {}
+ debug_level = options.fetch(:level, 3)
+ return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug]
+
+ depth = options[:depth] || @depth
+ args << yield if block_given?
+
+ message = "#{args.join(': ')}"
+ d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
+ str = "[#{lineno}](#{debug_level})#{d_str}#{message}"
+ case @options[:debug]
+ when Array
+ @options[:debug] << str
+ when TrueClass
+ $stderr.puts str
+ when Integer
+ case debug_level
+ when 0 then return if @options[:errors]
+ when 1 then return if @options[:warnings]
+ end
+ $stderr.puts(str) if debug_level <= @options[:debug]
+ end
+ end
+
+ # Used for internal error recovery
+ class Recovery < StandardError; end
+
+ class SyntaxError < RDF::ReaderError
+ ##
+ # The current production.
+ #
+ # @return [Symbol]
+ attr_reader :production
+
+ ##
+ # The invalid token which triggered the error.
+ #
+ # @return [String]
+ attr_reader :token
+
+ ##
+ # The line number where the error occurred.
+ #
+ # @return [Integer]
+ attr_reader :lineno
+
+ ##
+ # Initializes a new syntax error instance.
+ #
+ # @param [String, #to_s] message
+ # @param [Hash{Symbol => Object}] options
+ # @option options [Symbol] :production (nil)
+ # @option options [String] :token (nil)
+ # @option options [Integer] :lineno (nil)
+ def initialize(message, options = {})
+ @production = options[:production]
+ @token = options[:token]
+ @lineno = options[:lineno] || (@token.lineno if @token.respond_to?(:lineno))
+ super(message.to_s)
+ end
end
end # class Reader
end # module RDF::Turtle