lib/rdf/turtle/reader.rb in rdf-turtle-1.99.0 vs lib/rdf/turtle/reader.rb in rdf-turtle-2.0.0.beta1

- old
+ new

@@ -6,10 +6,11 @@ # A parser for the Turtle 2 class Reader < RDF::Reader format Format include EBNF::LL1::Parser include RDF::Turtle::Terminals + include RDF::Util::Logger # Terminals passed to lexer. Order matters! terminal(:ANON, ANON) terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) terminal(:IRIREF, IRIREF, unescape: true) @@ -29,20 +30,23 @@ terminal(:PREFIX, PREFIX) terminal(:BASE, BASE) terminal(:LANGTAG, LANGTAG) ## - # Accumulated errors found during processing - # @return [Array<String>] - attr_reader :errors + # Reader options + # @see http://www.rubydoc.info/github/ruby-rdf/rdf/RDF/Reader#options-class_method + def self.options + super + [ + RDF::CLI::Option.new( + symbol: :freebase, + datatype: TrueClass, + on: ["--freebase"], + description: "Use optimized Freebase reader.") {true}, + ] + end ## - # Accumulated warnings found during processing - # @return [Array<String>] - attr_reader :warnings - - ## # Redirect for Freebase Reader # # @private def self.new(input = nil, options = {}, &block) klass = if options[:freebase] @@ -71,50 +75,32 @@ # @option options [#to_s] :anon_base ("b0") # Basis for generating anonymous Nodes # @option options [Boolean] :validate (false) # whether to validate the parsed statements and values. If not validating, # the parser will attempt to recover from errors. - # @option options [Array] :errors - # array for placing errors found when parsing - # @option options [Array] :warnings - # array for placing warnings found when parsing - # @option options [Boolean] :progress - # Show progress of parser productions - # @option options [Boolean, Integer, Array] :debug - # Detailed debug output. If set to an Integer, output is restricted - # to messages of that priority: `0` for errors, `1` for warnings, - # `2` for processor tracing, and anything else for various levels - # of debug. If set to an Array, information is collected in the array - # instead of being output to `$stderr`. + # @option options [Logger, #write, #<<] :logger + # Record error/info/debug output # @option options [Boolean] :freebase (false) # Use optimized Freebase reader # @return [RDF::Turtle::Reader] def initialize(input = nil, options = {}, &block) super do @options = { anon_base: "b0", validate: false, whitespace: WS, + log_depth: 0, }.merge(options) @options = {prefixes: {nil => ""}}.merge(@options) unless @options[:validate] - @errors = @options[:errors] || [] - @warnings = @options[:warnings] || [] - @depth = 0 @prod_stack = [] - @options[:debug] ||= case - when RDF::Turtle.debug? then true - when @options[:progress] then 2 - when @options[:validate] then 1 - end - @options[:base_uri] = RDF::URI(base_uri || "") - debug("base IRI") {base_uri.inspect} + log_debug("base IRI") {base_uri.inspect} - debug("validate") {validate?.inspect} - debug("canonicalize") {canonicalize?.inspect} - debug("intern") {intern?.inspect} + log_debug("validate") {validate?.inspect} + log_debug("canonicalize") {canonicalize?.inspect} + log_debug("intern") {intern?.inspect} @lexer = EBNF::LL1::Lexer.new(input, self.class.patterns, @options) if block_given? case block.arity @@ -135,29 +121,23 @@ # @yield [statement] # @yieldparam [RDF::Statement] statement # @return [void] def each_statement(&block) if block_given? - @recovering = false + log_recover @callback = block begin while (@lexer.first rescue true) read_statement end rescue EBNF::LL1::Lexer::Error, SyntaxError, EOFError, Recovery # Terminate loop if EOF found while recovering end - if validate? - if !warnings.empty? && !@options[:warnings] - $stderr.puts "Warnings: #{warnings.join("\n")}" - end - if !errors.empty? - $stderr.puts "Errors: #{errors.join("\n")}" unless @options[:errors] - raise RDF::ReaderError, "Errors found during processing" - end + if validate? && log_statistics[:error] + raise RDF::ReaderError, "Errors found during processing" end end enum_for(:each_statement) end @@ -205,11 +185,11 @@ error("process_iri", e) end # Create a literal def literal(value, options = {}) - debug("literal") do + log_debug("literal") do "value: #{value.inspect}, " + "options: #{options.inspect}, " + "validate: #{validate?.inspect}, " + "c14n?: #{canonicalize?.inspect}" end @@ -239,11 +219,11 @@ elsif !prefix(prefix) error("undefined prefix", production: :pname, token: prefix) base = '' end suffix = suffix.to_s.sub(/^\#/, "") if base.index("#") - debug("pname") {"base: '#{base}', suffix: '#{suffix}'"} + log_debug("pname") {"base: '#{base}', suffix: '#{suffix}'"} process_iri(base + suffix.to_s) end # Keep track of allocated BNodes def bnode(value = nil) @@ -260,11 +240,11 @@ case token.type when :BASE, :PREFIX read_directive || error("Failed to parse directive", production: :directive, token: token) else read_triples || error("Expected token", production: :statement, token: token) - if !@recovering || @lexer.first === '.' + if !log_recovering? || @lexer.first === '.' # If recovering, we will have eaten the closing '.' token = @lexer.shift unless token && token.value == '.' error("Expected '.' following triple", production: :statement, token: token) end @@ -301,11 +281,11 @@ @lexer.shift pfx, iri = @lexer.shift, @lexer.shift terminated = token.value == '@prefix' error("Expected PNAME_NS", production: :prefix, token: pfx) unless pfx === :PNAME_NS error("Expected IRIREF", production: :prefix, token: iri) unless iri === :IRIREF - debug("prefixID") {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"} + log_debug("prefixID") {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"} prefix(pfx.value[0..-2], process_iri(iri)) error("prefixId", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@prefix' if terminated error("prefixID", "Expected #{token} to be terminated") unless @lexer.first === '.' @@ -449,11 +429,11 @@ def read_blankNodePropertyList token = @lexer.first if token === '[' prod(:blankNodePropertyList, %{]}) do @lexer.shift - progress("blankNodePropertyList") {"token: #{token.inspect}"} + log_info("blankNodePropertyList") {"token: #{token.inspect}"} node = bnode read_predicateObjectList(node) error("blankNodePropertyList", "Expected closing ']'") unless @lexer.first === ']' @lexer.shift node @@ -465,16 +445,16 @@ def read_collection if @lexer.first === '(' prod(:collection, %{)}) do @lexer.shift token = @lexer.first - progress("collection") {"token: #{token.inspect}"} + log_info("collection") {"token: #{token.inspect}"} objects = [] while object = read_object objects << object end - list = RDF::List.new(nil, nil, objects) + list = RDF::List.new(values: objects) list.each_statement do |statement| add_statement("collection", statement) end error("collection", "Expected closing ')'") unless @lexer.first === ')' @lexer.shift @@ -501,13 +481,12 @@ end end def prod(production, recover_to = []) @prod_stack << {prod: production, recover_to: recover_to} - @depth += 1 - @recovering = false - progress("#{production}(start)") {"token: #{@lexer.first.inspect}"} + @options[:log_depth] += 1 + log_recover("#{production}(start)") {"token: #{@lexer.first.inspect}"} yield rescue EBNF::LL1::Lexer::Error, SyntaxError, Recovery => e # Lexer encountered an illegal token or the parser encountered # a terminal which is inappropriate for the current production. # Perform error recovery to find a reasonable terminal based @@ -523,134 +502,60 @@ token: e.token) rescue SyntaxError end end raise EOFError, "End of input found when recovering" if @lexer.first.nil? - debug("recovery", "current token: #{@lexer.first.inspect}", level: 4) + log_debug("recovery", "current token: #{@lexer.first.inspect}") unless e.is_a?(Recovery) # Get the list of follows for this sequence, this production and the stacked productions. - debug("recovery", "stack follows:", level: 4) + log_debug("recovery", "stack follows:") @prod_stack.reverse.each do |prod| - debug("recovery", level: 4) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"} + log_debug("recovery", level: 4) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"} end end # Find all follows to the top of the stack follows = @prod_stack.map {|prod| Array(prod[:recover_to])}.flatten.compact.uniq # Skip tokens until one is found in follows while (token = (@lexer.first rescue @lexer.recover)) && follows.none? {|t| token === t} skipped = @lexer.shift - progress("recovery") {"skip #{skipped.inspect}"} + log_debug("recovery") {"skip #{skipped.inspect}"} end - debug("recovery") {"found #{token.inspect} in follows"} + log_debug("recovery") {"found #{token.inspect} in follows"} # Re-raise the error unless token is a follows of this production raise Recovery unless Array(recover_to).any? {|t| token === t} # Skip that token to get something reasonable to start the next production with @lexer.shift ensure - progress("#{production}(finish)") - @depth -= 1 + log_info("#{production}(finish)") + @options[:log_depth] -= 1 @prod_stack.pop end ## - # Warning information, used as level `1` debug messages. - # - # @param [String] node Relevant location associated with message - # @param [String] message Error string - # @param [Hash] options - # @option options [URI, #to_s] :production - # @option options [Token] :token - # @see {#debug} - def warn(node, message, options = {}) - m = "WARNING " - m += "[line: #{@lineno}] " if @lineno - m += message - m += " (found #{options[:token].inspect})" if options[:token] - m += ", production = #{options[:production].inspect}" if options[:production] - @warnings << m unless @recovering - debug(node, m, options.merge(level: 1)) - end - - ## # Error information, used as level `0` debug messages. # - # @overload debug(node, message, options) + # @overload error(node, message, options) # @param [String] node Relevant location associated with message # @param [String] message Error string # @param [Hash] options # @option options [URI, #to_s] :production # @option options [Token] :token # @see {#debug} def error(*args) - return if @recovering - options = args.last.is_a?(Hash) ? args.pop : {} + ctx = "" + ctx += "(found #{options[:token].inspect})" if options[:token] + ctx += ", production = #{options[:production].inspect}" if options[:production] lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno)) - message = "#{args.join(': ')}" - m = "ERROR " - m += "[line: #{lineno}] " if lineno - m += message - m += " (found #{options[:token].inspect})" if options[:token] - m += ", production = #{options[:production].inspect}" if options[:production] - @recovering = true - @errors << m - debug(m, options.merge(level: 0)) - raise SyntaxError.new(m, lineno: lineno, token: options[:token], production: options[:production]) - end - - ## - # Progress output when debugging. - # - # The call is ignored, unless `@options[:debug]` is set, in which - # case it records tracing information as indicated. Additionally, - # if `@options[:debug]` is an Integer, the call is aborted if the - # `:level` option is less than than `:level`. - # - # @overload debug(node, message, options) - # @param [Array<String>] args Relevant location associated with message - # @param [Hash] options - # @option options [Integer] :depth - # Recursion depth for indenting output - # @option options [Integer] :level - # Level assigned to message, by convention, level `0` is for - # errors, level `1` is for warnings, level `2` is for parser - # progress information, and anything higher is for various levels - # of debug information. - # - # @yieldparam [:trace] trace - # @yieldparam [Integer] level - # @yieldparam [Integer] lineno - # @yieldparam [Integer] depth Recursive depth of productions - # @yieldparam [Array<String>] args - # @yieldreturn [String] added to message - def debug(*args) - return unless @options[:debug] - options = args.last.is_a?(Hash) ? args.pop : {} - debug_level = options.fetch(:level, 3) - return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug] - - depth = options[:depth] || @depth - args << yield if block_given? - - message = "#{args.join(': ')}" - d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth - str = "[#{lineno}](#{debug_level})#{d_str}#{message}" - case @options[:debug] - when Array - @options[:debug] << str - when TrueClass - $stderr.puts str - when Integer - case debug_level - when 0 then return if @options[:errors] - when 1 then return if @options[:warnings] - end - $stderr.puts(str) if debug_level <= @options[:debug] - end + log_error(*args, ctx, + lineno: lineno, + token: options[:token], + production: options[:production], + exception: SyntaxError) end # Used for internal error recovery class Recovery < StandardError; end