lib/rdf/turtle/reader.rb in rdf-turtle-1.0.3 vs lib/rdf/turtle/reader.rb in rdf-turtle-1.0.4

- old
+ new

@@ -16,15 +16,11 @@ end terminal(:BLANK_NODE_LABEL, BLANK_NODE_LABEL) do |prod, token, input| input[:resource] = self.bnode(token.value[2..-1]) end terminal(:IRIREF, IRIREF, :unescape => true) do |prod, token, input| - begin - input[:resource] = process_iri(token.value[1..-2]) - rescue ArgumentError => e - raise RDF::ReaderError, e.message - end + input[:resource] = process_iri(token.value[1..-2]) end terminal(:DOUBLE, DOUBLE) do |prod, token, input| # Note that a Turtle Double may begin with a '.[eE]', so tack on a leading # zero if necessary value = token.value.sub(/\.([eE])/, '.0\1') @@ -203,10 +199,25 @@ opts[:language] = current[:lang] if current[:lang] input[:resource] = literal(current[:string_value], opts) end ## + # Redirect for Freebase Reader + # + # @private + def self.new(input = nil, options = {}, &block) + klass = if options[:freebase] + FreebaseReader + else + self + end + reader = klass.allocate + reader.send(:initialize, input, options, &block) + reader + end + + ## # Initializes a new reader instance. # # Note, the spec does not define a default mapping for the empty prefix, # but it is so commonly used in examples that we define it to be the # empty string anyway, except when validating. @@ -226,22 +237,33 @@ # @option options [Boolean] :validate (false) # whether to validate the parsed statements and values. If not validating, # the parser will attempt to recover from errors. # @option options [Boolean] :progress # Show progress of parser productions - # @option options [Boolean] :debug - # Detailed debug output + # @option options [Boolean, Integer, Array] :debug + # Detailed debug output. If set to an Integer, output is restricted + # to messages of that priority: `0` for errors, `1` for warnings, + # `2` for processor tracing, and anything else for various levels + # of debug. If set to an Array, information is collected in the array + # instead of being output to `$stderr`. + # @option options [Boolean] :freebase (false) + # Use optimized Freebase reader # @return [RDF::Turtle::Reader] def initialize(input = nil, options = {}, &block) super do @options = { :anon_base => "b0", :validate => false, - :debug => RDF::Turtle.debug?, }.merge(options) @options = {:prefixes => {nil => ""}}.merge(@options) unless @options[:validate] + @options[:debug] ||= case + when RDF::Turtle.debug? then true + when @options[:progress] then 2 + when @options[:validate] then 1 + end + @options[:base_uri] = RDF::URI(base_uri || "") debug("base IRI") {base_uri.inspect} debug("validate") {validate?.inspect} debug("canonicalize") {canonicalize?.inspect} debug("intern") {intern?.inspect} @@ -271,17 +293,31 @@ parse(@input, START.to_sym, @options.merge(:branch => BRANCH, :first => FIRST, :follow => FOLLOW, :reset_on_start => true) ) do |context, *data| - loc = data.shift case context when :statement - add_statement(loc, RDF::Statement.from(data)) + loc = data.shift + s = RDF::Statement.from(data, :lineno => lineno) + add_statement(loc, s) unless !s.valid? && validate? + when :trace + level, lineno, depth, *args = data + message = "#{args.join(': ')}" + d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth + str = "[#{lineno}](#{level})#{d_str}#{message}" + case @options[:debug] + when Array + @options[:debug] << str + when TrueClass + $stderr.puts str + when Integer + $stderr.puts(str) if level <= @options[:debug] + end end end - rescue ArgumentError, EBNF::LL1::Parser::Error => e + rescue EBNF::LL1::Parser::Error => e progress("Parsing completed with errors:\n\t#{e.message}") raise RDF::ReaderError, e.message if validate? end ## @@ -303,28 +339,26 @@ # @param [Nokogiri::XML::Node, any] node XML Node or string for showing context # @param [RDF::Statement] statement the subject of the statement # @return [RDF::Statement] Added statement # @raise [RDF::ReaderError] Checks parameter types and raises if they are incorrect if parsing mode is _validate_. def add_statement(node, statement) - error(node, "Statement is invalid: #{statement.inspect.inspect}") unless statement.valid? + error(node, "Statement is invalid: #{statement.inspect.inspect}") if validate? && statement.invalid? progress(node) {"generate statement: #{statement.to_ntriples}"} - @callback.call(statement) + @callback.call(statement) if statement.subject && + statement.predicate && + statement.object && + (validate? ? statement.valid? : true) end + # Process a URI against base def process_iri(iri) - iri(base_uri, iri) - end - - # Create IRIs - def iri(value, append = nil) - value = RDF::URI.new(value) - value = value.join(append) if append - value.validate! if validate? && value.respond_to?(:validate) + value = base_uri.join(iri) + value.validate! if validate? value.canonicalize! if canonicalize? value = RDF::URI.intern(value) if intern? value end - + # Create a literal def literal(value, options = {}) options = options.dup # Internal representation is to not use xsd:string, although it could arguably go the other way. options.delete(:datatype) if options[:datatype] == RDF::XSD.string