lib/rdf/turtle/reader.rb in rdf-turtle-1.1.7 vs lib/rdf/turtle/reader.rb in rdf-turtle-1.1.8

- old
+ new

@@ -1,206 +1,47 @@ -require 'rdf/turtle/meta' -require 'ebnf/ll1/parser' +# coding: utf-8 +require 'ebnf/ll1/lexer' module RDF::Turtle ## # A parser for the Turtle 2 class Reader < RDF::Reader format Format - include RDF::Turtle::Meta include EBNF::LL1::Parser include RDF::Turtle::Terminals # Terminals passed to lexer. Order matters! - terminal(:ANON, ANON) do |prod, token, input| - input[:resource] = self.bnode - end - terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input| - input[:resource] = self.bnode(token.value[2..-1]) - end - terminal(:IRIREF, IRIREF, unescape: true) do |prod, token, input| - input[:resource] = process_iri(token.value[1..-2]) - end - terminal(:DOUBLE, DOUBLE) do |prod, token, input| - # Note that a Turtle Double may begin with a '.[eE]', so tack on a leading - # zero if necessary - value = token.value.sub(/\.([eE])/, '.0\1') - input[:resource] = literal(value, datatype: RDF::XSD.double) - end - terminal(:DECIMAL, DECIMAL) do |prod, token, input| - # Note that a Turtle Decimal may begin with a '.', so tack on a leading - # zero if necessary - value = token.value - value = "0#{token.value}" if token.value[0,1] == "." - input[:resource] = literal(value, datatype: RDF::XSD.decimal) - end - terminal(:INTEGER, INTEGER) do |prod, token, input| - input[:resource] = literal(token.value, datatype: RDF::XSD.integer) - end - # Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences" - terminal(:PNAME_LN, PNAME_LN, unescape: true) do |prod, token, input| - prefix, suffix = token.value.split(":", 2) - input[:resource] = pname(prefix, suffix) - end - # Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences" - terminal(:PNAME_NS, PNAME_NS) do |prod, token, input| - prefix = token.value[0..-2] - - # Two contexts, one when prefix is being defined, the other when being used - case prod - when :prefixID, :sparqlPrefix - input[:prefix] = prefix - else - input[:resource] = pname(prefix, '') - end - end - terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, unescape: true) do |prod, token, input| - input[:string_value] = token.value[3..-4] - end - terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true) do |prod, token, input| - input[:string_value] = token.value[3..-4] - end - terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true) do |prod, token, input| - input[:string_value] = token.value[1..-2] - end - terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true) do |prod, token, input| - input[:string_value] = token.value[1..-2] - end + terminal(:ANON, ANON) + terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) + terminal(:IRIREF, IRIREF, unescape: true) + terminal(:DOUBLE, DOUBLE) + terminal(:DECIMAL, DECIMAL) + terminal(:INTEGER, INTEGER) + terminal(:PNAME_LN, PNAME_LN, unescape: true) + terminal(:PNAME_NS, PNAME_NS) + terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, unescape: true) + terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true) + terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true) + terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true) # String terminals - terminal(nil, %r([\(\),.;\[\]Aa]|\^\^|true|false)) do |prod, token, input| - case token.value - when 'A', 'a' then input[:resource] = RDF.type - when 'true', 'false' then input[:resource] = RDF::Literal::Boolean.new(token.value) - when '@base', '@prefix' then input[:lang] = token.value[1..-1] - when '.' then input[:terminated] = true - else input[:string] = token.value - end - end + terminal(nil, %r([\(\),.;\[\]Aa]|\^\^|true|false)) - terminal(:PREFIX, PREFIX) do |prod, token, input| - input[:string_value] = token.value - end - terminal(:BASE, BASE) do |prod, token, input| - input[:string_value] = token.value - end + terminal(:PREFIX, PREFIX) + terminal(:BASE, BASE) + terminal(:LANGTAG, LANGTAG) - terminal(:LANGTAG, LANGTAG) do |prod, token, input| - input[:lang] = token.value[1..-1] - end + ## + # Accumulated errors found during processing + # @return [Array<String>] + attr_reader :errors - # Productions - # [4] prefixID defines a prefix mapping - production(:prefixID) do |input, current, callback| - prefix = current[:prefix] - iri = current[:resource] - lexical = current[:string_value] - terminated = current[:terminated] - debug("prefixID") {"Defined prefix #{prefix.inspect} mapping to #{iri.inspect}"} - if lexical.start_with?('@') && lexical != '@prefix' - error(:prefixID, "should be downcased") - elsif lexical == '@prefix' - error(:prefixID, "directive not terminated") unless terminated - else - error(:prefixID, "directive should not be terminated") if terminated - end - prefix(prefix, iri) - end - - # [5] base set base_uri - production(:base) do |input, current, callback| - iri = current[:resource] - lexical = current[:string_value] - terminated = current[:terminated] - debug("base") {"Defined base as #{iri}"} - if lexical.start_with?('@') && lexical != '@base' - error(:base, "should be downcased") - elsif lexical == '@base' - error(:base, "directive not terminated") unless terminated - else - error(:base, "directive should not be terminated") if terminated - end - options[:base_uri] = iri - end - - # [6] triples - start_production(:triples) do |input, current, callback| - # Note production as triples for blankNodePropertyList - # to set :subject instead of :resource - current[:triples] = true - end - production(:triples) do |input, current, callback| - # Note production as triples for blankNodePropertyList - # to set :subject instead of :resource - current[:triples] = true - end + ## + # Accumulated warnings found during processing + # @return [Array<String>] + attr_reader :warnings - # [9] verb ::= predicate | "a" - production(:verb) do |input, current, callback| - input[:predicate] = current[:resource] - end - - # [10] subject ::= IRIref | BlankNode | collection - start_production(:subject) do |input, current, callback| - current[:triples] = nil - end - - production(:subject) do |input, current, callback| - input[:subject] = current[:resource] - end - - # [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal - production(:object) do |input, current, callback| - if input[:object_list] - # Part of an rdf:List collection - input[:object_list] << current[:resource] - else - debug("object") {"current: #{current.inspect}"} - callback.call(:statement, "object", input[:subject], input[:predicate], current[:resource]) - end - end - - # [14] blankNodePropertyList ::= "[" predicateObjectList "]" - start_production(:blankNodePropertyList) do |input, current, callback| - current[:subject] = self.bnode - end - - production(:blankNodePropertyList) do |input, current, callback| - if input[:triples] - input[:subject] = current[:subject] - else - input[:resource] = current[:subject] - end - end - - # [15] collection ::= "(" object* ")" - start_production(:collection) do |input, current, callback| - # Tells the object production to collect and not generate statements - current[:object_list] = [] - end - - production(:collection) do |input, current, callback| - # Create an RDF list - objects = current[:object_list] - list = RDF::List[*objects] - list.each_statement do |statement| - next if statement.predicate == RDF.type && statement.object == RDF.List - callback.call(:statement, "collection", statement.subject, statement.predicate, statement.object) - end - - # Return bnode as resource - input[:resource] = list.subject - end - - # [16] RDFLiteral ::= String ( LanguageTag | ( "^^" IRIref ) )? - production(:RDFLiteral) do |input, current, callback| - opts = {} - opts[:datatype] = current[:resource] if current[:resource] - opts[:language] = current[:lang] if current[:lang] - input[:resource] = literal(current[:string_value], opts) - end - ## # Redirect for Freebase Reader # # @private def self.new(input = nil, options = {}, &block) @@ -227,17 +68,17 @@ # the prefix mappings to use (for acessing intermediate parser productions) # @option options [#to_s] :base_uri (nil) # the base URI to use when resolving relative URIs (for acessing intermediate parser productions) # @option options [#to_s] :anon_base ("b0") # Basis for generating anonymous Nodes - # @option options [Boolean] :resolve_uris (false) - # Resolve prefix and relative IRIs, otherwise, when serializing the parsed SSE - # as S-Expressions, use the original prefixed and relative URIs along with `base` and `prefix` - # definitions. # @option options [Boolean] :validate (false) # whether to validate the parsed statements and values. If not validating, # the parser will attempt to recover from errors. + # @option options [Array] :errors + # array for placing errors found when parsing + # @option options [Array] :warnings + # array for placing warnings found when parsing # @option options [Boolean] :progress # Show progress of parser productions # @option options [Boolean, Integer, Array] :debug # Detailed debug output. If set to an Integer, output is restricted # to messages of that priority: `0` for errors, `1` for warnings, @@ -253,10 +94,15 @@ anon_base: "b0", validate: false, whitespace: WS, }.merge(options) @options = {prefixes: {nil => ""}}.merge(@options) unless @options[:validate] + @errors = @options[:errors] || [] + @warnings = @options[:warnings] || [] + @depth = 0 + @prod_stack = [] + @options[:debug] ||= case when RDF::Turtle.debug? then true when @options[:progress] then 2 when @options[:validate] then 1 end @@ -266,10 +112,12 @@ debug("validate") {validate?.inspect} debug("canonicalize") {canonicalize?.inspect} debug("intern") {intern?.inspect} + @lexer = EBNF::LL1::Lexer.new(input, self.class.patterns, @options) + if block_given? case block.arity when 0 then instance_eval(&block) else block.call(self) end @@ -287,45 +135,32 @@ # @yield [statement] # @yieldparam [RDF::Statement] statement # @return [void] def each_statement(&block) if block_given? + @recovering = false @callback = block - parse(@input, START.to_sym, @options.merge(branch: BRANCH, - first: FIRST, - follow: FOLLOW, - reset_on_start: true) - ) do |context, *data| - case context - when :statement - loc = data.shift - s = RDF::Statement.from(data, lineno: lineno) - add_statement(loc, s) unless !s.valid? && validate? - when :trace - level, lineno, depth, *args = data - message = "#{args.join(': ')}" - d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth - str = "[#{lineno}](#{level})#{d_str}#{message}" - case @options[:debug] - when Array - @options[:debug] << str - when TrueClass - $stderr.puts str - when Integer - $stderr.puts(str) if level <= @options[:debug] - end + begin + while (@lexer.first rescue true) + read_statement end + rescue EBNF::LL1::Lexer::Error, SyntaxError, EOFError, Recovery + # Terminate loop if EOF found while recovering end + + if validate? + if !warnings.empty? && !@options[:warnings] + $stderr.puts "Warnings: #{warnings.join("\n")}" + end + if !errors.empty? + $stderr.puts "Errors: #{errors.join("\n")}" unless @options[:errors] + raise RDF::ReaderError, "Errors found during processing" + end + end end enum_for(:each_statement) - rescue EBNF::LL1::Parser::Error, EBNF::LL1::Lexer::Error => e - if validate? - raise RDF::ReaderError.new(e.message, lineno: e.lineno, token: e.token) - else - $stderr.puts e.message - end end ## # Iterates the given block for each RDF triple in the input. # @@ -343,30 +178,33 @@ enum_for(:each_triple) end # add a statement, object can be literal or URI or bnode # - # @param [Nokogiri::XML::Node, any] node XML Node or string for showing context + # @param [Symbol] production # @param [RDF::Statement] statement the subject of the statement # @return [RDF::Statement] Added statement # @raise [RDF::ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_. - def add_statement(node, statement) - error(node, "Statement is invalid: #{statement.inspect.inspect}") if validate? && statement.invalid? - progress(node) {"generate statement: #{statement.to_ntriples}"} + def add_statement(production, statement) + error("Statement is invalid: #{statement.inspect.inspect}", production: produciton) if validate? && statement.invalid? @callback.call(statement) if statement.subject && statement.predicate && statement.object && (validate? ? statement.valid? : true) end # Process a URI against base def process_iri(iri) - value = base_uri.join(iri) + iri = iri.value[1..-2] if iri === :IRIREF + value = RDF::URI(iri) + value = base_uri.join(value) if value.relative? value.validate! if validate? value.canonicalize! if canonicalize? value = RDF::URI.intern(value) if intern? value + rescue ArgumentError => e + error("process_iri", e) end # Create a literal def literal(value, options = {}) debug("literal") do @@ -374,10 +212,12 @@ "options: #{options.inspect}, " + "validate: #{validate?.inspect}, " + "c14n?: #{canonicalize?.inspect}" end RDF::Literal.new(value, options.merge(validate: validate?, canonicalize: canonicalize?)) + rescue ArgumentError => e + error("Argument Error #{e.message}", production: :literal, token: @lexer.first) end ## # Override #prefix to take a relative IRI # @@ -395,11 +235,11 @@ def pname(prefix, suffix) # Prefixes must be defined, except special case for empty prefix being alias for current @base if prefix(prefix) base = prefix(prefix).to_s elsif !prefix(prefix) - error("pname", "undefined prefix #{prefix.inspect}") + error("undefined prefix", production: :pname, token: prefix) base = '' end suffix = suffix.to_s.sub(/^\#/, "") if base.index("#") debug("pname") {"base: '#{base}', suffix: '#{suffix}'"} process_iri(base + suffix.to_s) @@ -408,8 +248,446 @@ # Keep track of allocated BNodes def bnode(value = nil) return RDF::Node.new unless value @bnode_cache ||= {} @bnode_cache[value.to_s] ||= RDF::Node.new(value) + end + + protected + # @return [void] + def read_statement + prod(:statement, %w{.}) do + error("read_statement", "Unexpected end of file") unless token = @lexer.first + case token.type + when :BASE, :PREFIX + read_directive || error("Failed to parse directive", production: :directive, token: token) + else + read_triples || error("Expected token", production: :statement, token: token) + if !@recovering || @lexer.first === '.' + # If recovering, we will have eaten the closing '.' + token = @lexer.shift + unless token && token.value == '.' + error("Expected '.' following triple", production: :statement, token: token) + end + end + end + end + end + + # @return [void] + def read_directive + prod(:directive, %w{.}) do + token = @lexer.first + case token.type + when :BASE + prod(:base) do + @lexer.shift + terminated = token.value == '@base' + iri = @lexer.shift + error("Expected IRIREF", :production => :base, token: iri) unless iri === :IRIREF + @options[:base_uri] = process_iri(iri) + error("base", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@base' + + if terminated + error("base", "Expected #{token} to be terminated") unless @lexer.first === '.' + @lexer.shift + elsif @lexer.first === '.' + error("base", "Expected #{token} not to be terminated") + else + true + end + end + when :PREFIX + prod(:prefixID, %w{.}) do + @lexer.shift + pfx, iri = @lexer.shift, @lexer.shift + terminated = token.value == '@prefix' + error("Expected PNAME_NS", :production => :prefix, token: pfx) unless pfx === :PNAME_NS + error("Expected IRIREF", :production => :prefix, token: iri) unless iri === :IRIREF + debug("prefixID") {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"} + prefix(pfx.value[0..-2], process_iri(iri)) + error("prefixId", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@prefix' + + if terminated + error("prefixID", "Expected #{token} to be terminated") unless @lexer.first === '.' + @lexer.shift + elsif @lexer.first === '.' + error("prefixID", "Expected #{token} not to be terminated") + else + true + end + end + end + end + end + + # @return [Object] returns the last verb matched, or subject BNode on predicateObjectList? + def read_triples + prod(:triples, %w{.}) do + error("read_triples", "Unexpected end of file") unless token = @lexer.first + case token.type || token.value + when '[' + # blankNodePropertyList predicateObjectList? + subject = read_blankNodePropertyList || error("Failed to parse blankNodePropertyList", production: :triples, token: @lexer.first) + read_predicateObjectList(subject) || subject + else + # subject predicateObjectList + subject = read_subject || error("Failed to parse subject", production: :triples, token: @lexer.first) + read_predicateObjectList(subject) || error("Expected predicateObjectList", production: :triples, token: @lexer.first) + end + end + end + + # @param [RDF::Resource] subject + # @return [RDF::URI] the last matched verb + def read_predicateObjectList(subject) + prod(:predicateObjectList, %{;}) do + last_verb = nil + while verb = read_verb + last_verb = verb + prod(:_predicateObjectList_5) do + read_objectList(subject, verb) || error("Expected objectList", production: :predicateObjectList, token: @lexer.first) + end + break unless @lexer.first === ';' + @lexer.shift while @lexer.first === ';' + end + last_verb + end + end + + # @return [RDF::Term] the last matched subject + def read_objectList(subject, predicate) + prod(:objectList, %{,}) do + last_object = nil + while object = prod(:_objectList_2) {read_object(subject, predicate)} + last_object = object + break unless @lexer.first === ',' + @lexer.shift while @lexer.first === ',' + end + last_object + end + end + + # @return [RDF::URI] + def read_verb + error("read_verb", "Unexpected end of file") unless token = @lexer.first + case token.type || token.value + when 'a' then prod(:verb) {@lexer.shift && RDF.type} + else prod(:verb) {read_iri} + end + end + + # @return [RDF::Resource] + def read_subject + prod(:subject) do + read_iri || + read_BlankNode || + read_collection || + error( "Expected subject", production: :subject, token: @lexer.first) + end + end + + # @return [void] + def read_object(subject = nil, predicate = nil) + prod(:object) do + if object = read_iri || + read_BlankNode || + read_collection || + read_blankNodePropertyList || + read_literal + + add_statement(:object, RDF::Statement(subject, predicate, object)) if subject && predicate + object + end + end + end + + # @return [RDF::Literal] + def read_literal + error("Unexpected end of file", production: :literal) unless token = @lexer.first + case token.type || token.value + when :INTEGER then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.integer)} + when :DECIMAL + prod(:litearl) do + value = @lexer.shift.value + value = "0#{value}" if value.start_with?(".") + literal(value, datatype: RDF::XSD.decimal) + end + when :DOUBLE then prod(:literal) {literal(@lexer.shift.value.sub(/\.([eE])/, '.0\1'), datatype: RDF::XSD.double)} + when "true", "false" then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.boolean)} + when :STRING_LITERAL_QUOTE, :STRING_LITERAL_SINGLE_QUOTE + prod(:literal) do + value = @lexer.shift.value[1..-2] + error("read_literal", "Unexpected end of file") unless token = @lexer.first + case token.type || token.value + when :LANGTAG + literal(value, language: @lexer.shift.value[1..-1].to_sym) + when '^^' + @lexer.shift + literal(value, datatype: read_iri) + else + literal(value) + end + end + when :STRING_LITERAL_LONG_QUOTE, :STRING_LITERAL_LONG_SINGLE_QUOTE + prod(:literal) do + value = @lexer.shift.value[3..-4] + error("read_literal", "Unexpected end of file") unless token = @lexer.first + case token.type || token.value + when :LANGTAG + literal(value, language: @lexer.shift.value[1..-1].to_sym) + when '^^' + @lexer.shift + literal(value, datatype: read_iri) + else + literal(value) + end + end + end + end + + # @return [RDF::Node] + def read_blankNodePropertyList + token = @lexer.first + if token === '[' + prod(:blankNodePropertyList, %{]}) do + @lexer.shift + progress("blankNodePropertyList") {"token: #{token.inspect}"} + node = bnode + read_predicateObjectList(node) + error("blankNodePropertyList", "Expected closing ']'") unless @lexer.first === ']' + @lexer.shift + node + end + end + end + + # @return [RDF::Node] + def read_collection + if @lexer.first === '(' + prod(:collection, %{)}) do + @lexer.shift + token = @lexer.first + progress("collection") {"token: #{token.inspect}"} + objects = [] + while object = read_object + objects << object + end + list = RDF::List.new(nil, nil, objects) + list.each_statement do |statement| + add_statement("collection", statement) + end + error("collection", "Expected closing ')'") unless @lexer.first === ')' + @lexer.shift + list.subject + end + end + end + + # @return [RDF::URI] + def read_iri + token = @lexer.first + case token && token.type + when :IRIREF then prod(:iri) {process_iri(@lexer.shift)} + when :PNAME_LN, :PNAME_NS then prod(:iri) {pname(*@lexer.shift.value.split(':', 2))} + end + end + + # @return [RDF::Node] + def read_BlankNode + token = @lexer.first + case token && token.type + when :BLANK_NODE_LABEL then prod(:BlankNode) {bnode(@lexer.shift.value[2..-1])} + when :ANON then @lexer.shift && prod(:BlankNode) {bnode} + end + end + + def prod(production, recover_to = []) + @prod_stack << {prod: production, recover_to: recover_to} + @depth += 1 + @recovering = false + progress("#{production}(start)") {"token: #{@lexer.first.inspect}"} + yield + rescue EBNF::LL1::Lexer::Error, SyntaxError, Recovery => e + # Lexer encountered an illegal token or the parser encountered + # a terminal which is inappropriate for the current production. + # Perform error recovery to find a reasonable terminal based + # on the follow sets of the relevant productions. This includes + # remaining terms from the current production and the stacked + # productions + case e + when EBNF::LL1::Lexer::Error + @lexer.recover + begin + error("Lexer error", "With input '#{e.input}': #{e.message}", + production: production, + token: e.token) + rescue SyntaxError + end + end + raise EOFError, "End of input found when recovering" if @lexer.first.nil? + debug("recovery", "current token: #{@lexer.first.inspect}", :level => 4) + + unless e.is_a?(Recovery) + # Get the list of follows for this sequence, this production and the stacked productions. + debug("recovery", "stack follows:", :level => 4) + @prod_stack.reverse.each do |prod| + debug("recovery", :level => 4) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"} + end + end + + # Find all follows to the top of the stack + follows = @prod_stack.map {|prod| Array(prod[:recover_to])}.flatten.compact.uniq + + # Skip tokens until one is found in follows + while (token = (@lexer.first rescue @lexer.recover)) && follows.none? {|t| token === t} + skipped = @lexer.shift + progress("recovery") {"skip #{skipped.inspect}"} + end + debug("recovery") {"found #{token.inspect} in follows"} + + # Re-raise the error unless token is a follows of this production + raise Recovery unless Array(recover_to).any? {|t| token === t} + + # Skip that token to get something reasonable to start the next production with + @lexer.shift + ensure + progress("#{production}(finish)") + @depth -= 1 + @prod_stack.pop + end + + ## + # Warning information, used as level `1` debug messages. + # + # @param [String] node Relevant location associated with message + # @param [String] message Error string + # @param [Hash] options + # @option options [URI, #to_s] :production + # @option options [Token] :token + # @see {#debug} + def warn(node, message, options = {}) + m = "WARNING " + m += "[line: #{@lineno}] " if @lineno + m += message + m += " (found #{options[:token].inspect})" if options[:token] + m += ", production = #{options[:production].inspect}" if options[:production] + @warnings << m unless @recovering + debug(node, m, options.merge(:level => 1)) + end + + ## + # Error information, used as level `0` debug messages. + # + # @overload debug(node, message, options) + # @param [String] node Relevant location associated with message + # @param [String] message Error string + # @param [Hash] options + # @option options [URI, #to_s] :production + # @option options [Token] :token + # @see {#debug} + def error(*args) + return if @recovering + options = args.last.is_a?(Hash) ? args.pop : {} + lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno)) + message = "#{args.join(': ')}" + m = "ERROR " + m += "[line: #{lineno}] " if lineno + m += message + m += " (found #{options[:token].inspect})" if options[:token] + m += ", production = #{options[:production].inspect}" if options[:production] + @recovering = true + @errors << m + debug(m, options.merge(level: 0)) + raise SyntaxError.new(m, lineno: lineno, token: options[:token], production: options[:production]) + end + + ## + # Progress output when debugging. + # + # The call is ignored, unless `@options[:debug]` is set, in which + # case it records tracing information as indicated. Additionally, + # if `@options[:debug]` is an Integer, the call is aborted if the + # `:level` option is less than than `:level`. + # + # @overload debug(node, message, options) + # @param [Array<String>] args Relevant location associated with message + # @param [Hash] options + # @option options [Integer] :depth + # Recursion depth for indenting output + # @option options [Integer] :level + # Level assigned to message, by convention, level `0` is for + # errors, level `1` is for warnings, level `2` is for parser + # progress information, and anything higher is for various levels + # of debug information. + # + # @yieldparam [:trace] trace + # @yieldparam [Integer] level + # @yieldparam [Integer] lineno + # @yieldparam [Integer] depth Recursive depth of productions + # @yieldparam [Array<String>] args + # @yieldreturn [String] added to message + def debug(*args) + return unless @options[:debug] + options = args.last.is_a?(Hash) ? args.pop : {} + debug_level = options.fetch(:level, 3) + return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug] + + depth = options[:depth] || @depth + args << yield if block_given? + + message = "#{args.join(': ')}" + d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth + str = "[#{lineno}](#{debug_level})#{d_str}#{message}" + case @options[:debug] + when Array + @options[:debug] << str + when TrueClass + $stderr.puts str + when Integer + case debug_level + when 0 then return if @options[:errors] + when 1 then return if @options[:warnings] + end + $stderr.puts(str) if debug_level <= @options[:debug] + end + end + + # Used for internal error recovery + class Recovery < StandardError; end + + class SyntaxError < RDF::ReaderError + ## + # The current production. + # + # @return [Symbol] + attr_reader :production + + ## + # The invalid token which triggered the error. + # + # @return [String] + attr_reader :token + + ## + # The line number where the error occurred. + # + # @return [Integer] + attr_reader :lineno + + ## + # Initializes a new syntax error instance. + # + # @param [String, #to_s] message + # @param [Hash{Symbol => Object}] options + # @option options [Symbol] :production (nil) + # @option options [String] :token (nil) + # @option options [Integer] :lineno (nil) + def initialize(message, options = {}) + @production = options[:production] + @token = options[:token] + @lineno = options[:lineno] || (@token.lineno if @token.respond_to?(:lineno)) + super(message.to_s) + end end end # class Reader end # module RDF::Turtle