lib/rdf/turtle/reader.rb in rdf-turtle-1.0.3 vs lib/rdf/turtle/reader.rb in rdf-turtle-1.0.4
- old
+ new
@@ -16,15 +16,11 @@
end
terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input|
input[:resource] = self.bnode(token.value[2..-1])
end
terminal(:IRIREF, IRIREF, :unescape => true) do |prod, token, input|
- begin
- input[:resource] = process_iri(token.value[1..-2])
- rescue ArgumentError => e
- raise RDF::ReaderError, e.message
- end
+ input[:resource] = process_iri(token.value[1..-2])
end
terminal(:DOUBLE, DOUBLE) do |prod, token, input|
# Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
# zero if necessary
value = token.value.sub(/\.([eE])/, '.0\1')
@@ -203,10 +199,25 @@
opts[:language] = current[:lang] if current[:lang]
input[:resource] = literal(current[:string_value], opts)
end
##
+ # Redirect for Freebase Reader
+ #
+ # @private
+ def self.new(input = nil, options = {}, &block)
+ klass = if options[:freebase]
+ FreebaseReader
+ else
+ self
+ end
+ reader = klass.allocate
+ reader.send(:initialize, input, options, &block)
+ reader
+ end
+
+ ##
# Initializes a new reader instance.
#
# Note, the spec does not define a default mapping for the empty prefix,
# but it is so commonly used in examples that we define it to be the
# empty string anyway, except when validating.
@@ -226,22 +237,33 @@
# @option options [Boolean] :validate (false)
# whether to validate the parsed statements and values. If not validating,
# the parser will attempt to recover from errors.
# @option options [Boolean] :progress
# Show progress of parser productions
- # @option options [Boolean] :debug
- # Detailed debug output
+ # @option options [Boolean, Integer, Array] :debug
+ # Detailed debug output. If set to an Integer, output is restricted
+ # to messages of that priority: `0` for errors, `1` for warnings,
+ # `2` for processor tracing, and anything else for various levels
+ # of debug. If set to an Array, information is collected in the array
+ # instead of being output to `$stderr`.
+ # @option options [Boolean] :freebase (false)
+ # Use optimized Freebase reader
# @return [RDF::Turtle::Reader]
def initialize(input = nil, options = {}, &block)
super do
@options = {
:anon_base => "b0",
:validate => false,
- :debug => RDF::Turtle.debug?,
}.merge(options)
@options = {:prefixes => {nil => ""}}.merge(@options) unless @options[:validate]
+ @options[:debug] ||= case
+ when RDF::Turtle.debug? then true
+ when @options[:progress] then 2
+ when @options[:validate] then 1
+ end
+ @options[:base_uri] = RDF::URI(base_uri || "")
debug("base IRI") {base_uri.inspect}
debug("validate") {validate?.inspect}
debug("canonicalize") {canonicalize?.inspect}
debug("intern") {intern?.inspect}
@@ -271,17 +293,31 @@
parse(@input, START.to_sym, @options.merge(:branch => BRANCH,
:first => FIRST,
:follow => FOLLOW,
:reset_on_start => true)
) do |context, *data|
- loc = data.shift
case context
when :statement
- add_statement(loc, RDF::Statement.from(data))
+ loc = data.shift
+ s = RDF::Statement.from(data, :lineno => lineno)
+ add_statement(loc, s) unless !s.valid? && validate?
+ when :trace
+ level, lineno, depth, *args = data
+ message = "#{args.join(': ')}"
+ d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
+ str = "[#{lineno}](#{level})#{d_str}#{message}"
+ case @options[:debug]
+ when Array
+ @options[:debug] << str
+ when TrueClass
+ $stderr.puts str
+ when Integer
+ $stderr.puts(str) if level <= @options[:debug]
+ end
end
end
- rescue ArgumentError, EBNF::LL1::Parser::Error => e
+ rescue EBNF::LL1::Parser::Error => e
progress("Parsing completed with errors:\n\t#{e.message}")
raise RDF::ReaderError, e.message if validate?
end
##
@@ -303,28 +339,26 @@
# @param [Nokogiri::XML::Node, any] node XML Node or string for showing context
# @param [RDF::Statement] statement the subject of the statement
# @return [RDF::Statement] Added statement
# @raise [RDF::ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_.
def add_statement(node, statement)
- error(node, "Statement is invalid: #{statement.inspect.inspect}") unless statement.valid?
+ error(node, "Statement is invalid: #{statement.inspect.inspect}") if validate? && statement.invalid?
progress(node) {"generate statement: #{statement.to_ntriples}"}
- @callback.call(statement)
+ @callback.call(statement) if statement.subject &&
+ statement.predicate &&
+ statement.object &&
+ (validate? ? statement.valid? : true)
end
+ # Process a URI against base
def process_iri(iri)
- iri(base_uri, iri)
- end
-
- # Create IRIs
- def iri(value, append = nil)
- value = RDF::URI.new(value)
- value = value.join(append) if append
- value.validate! if validate? && value.respond_to?(:validate)
+ value = base_uri.join(iri)
+ value.validate! if validate?
value.canonicalize! if canonicalize?
value = RDF::URI.intern(value) if intern?
value
end
-
+
# Create a literal
def literal(value, options = {})
options = options.dup
# Internal representation is to not use xsd:string, although it could arguably go the other way.
options.delete(:datatype) if options[:datatype] == RDF::XSD.string