lib/rdf/turtle/reader.rb in rdf-turtle-1.99.0 vs lib/rdf/turtle/reader.rb in rdf-turtle-2.0.0.beta1
- old
+ new
@@ -6,10 +6,11 @@
# A parser for the Turtle 2
class Reader < RDF::Reader
format Format
include EBNF::LL1::Parser
include RDF::Turtle::Terminals
+ include RDF::Util::Logger
# Terminals passed to lexer. Order matters!
terminal(:ANON, ANON)
terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL)
terminal(:IRIREF, IRIREF, unescape: true)
@@ -29,20 +30,23 @@
terminal(:PREFIX, PREFIX)
terminal(:BASE, BASE)
terminal(:LANGTAG, LANGTAG)
##
- # Accumulated errors found during processing
- # @return [Array<String>]
- attr_reader :errors
+ # Reader options
+ # @see http://www.rubydoc.info/github/ruby-rdf/rdf/RDF/Reader#options-class_method
+ def self.options
+ super + [
+ RDF::CLI::Option.new(
+ symbol: :freebase,
+ datatype: TrueClass,
+ on: ["--freebase"],
+ description: "Use optimized Freebase reader.") {true},
+ ]
+ end
##
- # Accumulated warnings found during processing
- # @return [Array<String>]
- attr_reader :warnings
-
- ##
# Redirect for Freebase Reader
#
# @private
def self.new(input = nil, options = {}, &block)
klass = if options[:freebase]
@@ -71,50 +75,32 @@
# @option options [#to_s] :anon_base ("b0")
# Basis for generating anonymous Nodes
# @option options [Boolean] :validate (false)
# whether to validate the parsed statements and values. If not validating,
# the parser will attempt to recover from errors.
- # @option options [Array] :errors
- # array for placing errors found when parsing
- # @option options [Array] :warnings
- # array for placing warnings found when parsing
- # @option options [Boolean] :progress
- # Show progress of parser productions
- # @option options [Boolean, Integer, Array] :debug
- # Detailed debug output. If set to an Integer, output is restricted
- # to messages of that priority: `0` for errors, `1` for warnings,
- # `2` for processor tracing, and anything else for various levels
- # of debug. If set to an Array, information is collected in the array
- # instead of being output to `$stderr`.
+ # @option options [Logger, #write, #<<] :logger
+ # Record error/info/debug output
# @option options [Boolean] :freebase (false)
# Use optimized Freebase reader
# @return [RDF::Turtle::Reader]
def initialize(input = nil, options = {}, &block)
super do
@options = {
anon_base: "b0",
validate: false,
whitespace: WS,
+ log_depth: 0,
}.merge(options)
@options = {prefixes: {nil => ""}}.merge(@options) unless @options[:validate]
- @errors = @options[:errors] || []
- @warnings = @options[:warnings] || []
- @depth = 0
@prod_stack = []
- @options[:debug] ||= case
- when RDF::Turtle.debug? then true
- when @options[:progress] then 2
- when @options[:validate] then 1
- end
-
@options[:base_uri] = RDF::URI(base_uri || "")
- debug("base IRI") {base_uri.inspect}
+ log_debug("base IRI") {base_uri.inspect}
- debug("validate") {validate?.inspect}
- debug("canonicalize") {canonicalize?.inspect}
- debug("intern") {intern?.inspect}
+ log_debug("validate") {validate?.inspect}
+ log_debug("canonicalize") {canonicalize?.inspect}
+ log_debug("intern") {intern?.inspect}
@lexer = EBNF::LL1::Lexer.new(input, self.class.patterns, @options)
if block_given?
case block.arity
@@ -135,29 +121,23 @@
# @yield [statement]
# @yieldparam [RDF::Statement] statement
# @return [void]
def each_statement(&block)
if block_given?
- @recovering = false
+ log_recover
@callback = block
begin
while (@lexer.first rescue true)
read_statement
end
rescue EBNF::LL1::Lexer::Error, SyntaxError, EOFError, Recovery
# Terminate loop if EOF found while recovering
end
- if validate?
- if !warnings.empty? && !@options[:warnings]
- $stderr.puts "Warnings: #{warnings.join("\n")}"
- end
- if !errors.empty?
- $stderr.puts "Errors: #{errors.join("\n")}" unless @options[:errors]
- raise RDF::ReaderError, "Errors found during processing"
- end
+ if validate? && log_statistics[:error]
+ raise RDF::ReaderError, "Errors found during processing"
end
end
enum_for(:each_statement)
end
@@ -205,11 +185,11 @@
error("process_iri", e)
end
# Create a literal
def literal(value, options = {})
- debug("literal") do
+ log_debug("literal") do
"value: #{value.inspect}, " +
"options: #{options.inspect}, " +
"validate: #{validate?.inspect}, " +
"c14n?: #{canonicalize?.inspect}"
end
@@ -239,11 +219,11 @@
elsif !prefix(prefix)
error("undefined prefix", production: :pname, token: prefix)
base = ''
end
suffix = suffix.to_s.sub(/^\#/, "") if base.index("#")
- debug("pname") {"base: '#{base}', suffix: '#{suffix}'"}
+ log_debug("pname") {"base: '#{base}', suffix: '#{suffix}'"}
process_iri(base + suffix.to_s)
end
# Keep track of allocated BNodes
def bnode(value = nil)
@@ -260,11 +240,11 @@
case token.type
when :BASE, :PREFIX
read_directive || error("Failed to parse directive", production: :directive, token: token)
else
read_triples || error("Expected token", production: :statement, token: token)
- if !@recovering || @lexer.first === '.'
+ if !log_recovering? || @lexer.first === '.'
# If recovering, we will have eaten the closing '.'
token = @lexer.shift
unless token && token.value == '.'
error("Expected '.' following triple", production: :statement, token: token)
end
@@ -301,11 +281,11 @@
@lexer.shift
pfx, iri = @lexer.shift, @lexer.shift
terminated = token.value == '@prefix'
error("Expected PNAME_NS", production: :prefix, token: pfx) unless pfx === :PNAME_NS
error("Expected IRIREF", production: :prefix, token: iri) unless iri === :IRIREF
- debug("prefixID") {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"}
+ log_debug("prefixID") {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"}
prefix(pfx.value[0..-2], process_iri(iri))
error("prefixId", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@prefix'
if terminated
error("prefixID", "Expected #{token} to be terminated") unless @lexer.first === '.'
@@ -449,11 +429,11 @@
def read_blankNodePropertyList
token = @lexer.first
if token === '['
prod(:blankNodePropertyList, %{]}) do
@lexer.shift
- progress("blankNodePropertyList") {"token: #{token.inspect}"}
+ log_info("blankNodePropertyList") {"token: #{token.inspect}"}
node = bnode
read_predicateObjectList(node)
error("blankNodePropertyList", "Expected closing ']'") unless @lexer.first === ']'
@lexer.shift
node
@@ -465,16 +445,16 @@
def read_collection
if @lexer.first === '('
prod(:collection, %{)}) do
@lexer.shift
token = @lexer.first
- progress("collection") {"token: #{token.inspect}"}
+ log_info("collection") {"token: #{token.inspect}"}
objects = []
while object = read_object
objects << object
end
- list = RDF::List.new(nil, nil, objects)
+ list = RDF::List.new(values: objects)
list.each_statement do |statement|
add_statement("collection", statement)
end
error("collection", "Expected closing ')'") unless @lexer.first === ')'
@lexer.shift
@@ -501,13 +481,12 @@
end
end
def prod(production, recover_to = [])
@prod_stack << {prod: production, recover_to: recover_to}
- @depth += 1
- @recovering = false
- progress("#{production}(start)") {"token: #{@lexer.first.inspect}"}
+ @options[:log_depth] += 1
+ log_recover("#{production}(start)") {"token: #{@lexer.first.inspect}"}
yield
rescue EBNF::LL1::Lexer::Error, SyntaxError, Recovery => e
# Lexer encountered an illegal token or the parser encountered
# a terminal which is inappropriate for the current production.
# Perform error recovery to find a reasonable terminal based
@@ -523,134 +502,60 @@
token: e.token)
rescue SyntaxError
end
end
raise EOFError, "End of input found when recovering" if @lexer.first.nil?
- debug("recovery", "current token: #{@lexer.first.inspect}", level: 4)
+ log_debug("recovery", "current token: #{@lexer.first.inspect}")
unless e.is_a?(Recovery)
# Get the list of follows for this sequence, this production and the stacked productions.
- debug("recovery", "stack follows:", level: 4)
+ log_debug("recovery", "stack follows:")
@prod_stack.reverse.each do |prod|
- debug("recovery", level: 4) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"}
+ log_debug("recovery", level: 4) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"}
end
end
# Find all follows to the top of the stack
follows = @prod_stack.map {|prod| Array(prod[:recover_to])}.flatten.compact.uniq
# Skip tokens until one is found in follows
while (token = (@lexer.first rescue @lexer.recover)) && follows.none? {|t| token === t}
skipped = @lexer.shift
- progress("recovery") {"skip #{skipped.inspect}"}
+ log_debug("recovery") {"skip #{skipped.inspect}"}
end
- debug("recovery") {"found #{token.inspect} in follows"}
+ log_debug("recovery") {"found #{token.inspect} in follows"}
# Re-raise the error unless token is a follows of this production
raise Recovery unless Array(recover_to).any? {|t| token === t}
# Skip that token to get something reasonable to start the next production with
@lexer.shift
ensure
- progress("#{production}(finish)")
- @depth -= 1
+ log_info("#{production}(finish)")
+ @options[:log_depth] -= 1
@prod_stack.pop
end
##
- # Warning information, used as level `1` debug messages.
- #
- # @param [String] node Relevant location associated with message
- # @param [String] message Error string
- # @param [Hash] options
- # @option options [URI, #to_s] :production
- # @option options [Token] :token
- # @see {#debug}
- def warn(node, message, options = {})
- m = "WARNING "
- m += "[line: #{@lineno}] " if @lineno
- m += message
- m += " (found #{options[:token].inspect})" if options[:token]
- m += ", production = #{options[:production].inspect}" if options[:production]
- @warnings << m unless @recovering
- debug(node, m, options.merge(level: 1))
- end
-
- ##
# Error information, used as level `0` debug messages.
#
- # @overload debug(node, message, options)
+ # @overload error(node, message, options)
# @param [String] node Relevant location associated with message
# @param [String] message Error string
# @param [Hash] options
# @option options [URI, #to_s] :production
# @option options [Token] :token
# @see {#debug}
def error(*args)
- return if @recovering
- options = args.last.is_a?(Hash) ? args.pop : {}
+ ctx = ""
+ ctx += "(found #{options[:token].inspect})" if options[:token]
+ ctx += ", production = #{options[:production].inspect}" if options[:production]
lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno))
- message = "#{args.join(': ')}"
- m = "ERROR "
- m += "[line: #{lineno}] " if lineno
- m += message
- m += " (found #{options[:token].inspect})" if options[:token]
- m += ", production = #{options[:production].inspect}" if options[:production]
- @recovering = true
- @errors << m
- debug(m, options.merge(level: 0))
- raise SyntaxError.new(m, lineno: lineno, token: options[:token], production: options[:production])
- end
-
- ##
- # Progress output when debugging.
- #
- # The call is ignored, unless `@options[:debug]` is set, in which
- # case it records tracing information as indicated. Additionally,
- # if `@options[:debug]` is an Integer, the call is aborted if the
- # `:level` option is less than than `:level`.
- #
- # @overload debug(node, message, options)
- # @param [Array<String>] args Relevant location associated with message
- # @param [Hash] options
- # @option options [Integer] :depth
- # Recursion depth for indenting output
- # @option options [Integer] :level
- # Level assigned to message, by convention, level `0` is for
- # errors, level `1` is for warnings, level `2` is for parser
- # progress information, and anything higher is for various levels
- # of debug information.
- #
- # @yieldparam [:trace] trace
- # @yieldparam [Integer] level
- # @yieldparam [Integer] lineno
- # @yieldparam [Integer] depth Recursive depth of productions
- # @yieldparam [Array<String>] args
- # @yieldreturn [String] added to message
- def debug(*args)
- return unless @options[:debug]
- options = args.last.is_a?(Hash) ? args.pop : {}
- debug_level = options.fetch(:level, 3)
- return if @options[:debug].is_a?(Integer) && debug_level > @options[:debug]
-
- depth = options[:depth] || @depth
- args << yield if block_given?
-
- message = "#{args.join(': ')}"
- d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
- str = "[#{lineno}](#{debug_level})#{d_str}#{message}"
- case @options[:debug]
- when Array
- @options[:debug] << str
- when TrueClass
- $stderr.puts str
- when Integer
- case debug_level
- when 0 then return if @options[:errors]
- when 1 then return if @options[:warnings]
- end
- $stderr.puts(str) if debug_level <= @options[:debug]
- end
+ log_error(*args, ctx,
+ lineno: lineno,
+ token: options[:token],
+ production: options[:production],
+ exception: SyntaxError)
end
# Used for internal error recovery
class Recovery < StandardError; end