lib/rdf/turtle/reader.rb in rdf-turtle-3.1.0 vs lib/rdf/turtle/reader.rb in rdf-turtle-3.1.1

- old
+ new

@@ -23,11 +23,11 @@ terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, unescape: true, partial_regexp: /^"""/) terminal(:STRING_LITERAL_QUOTE, STRING_LITERAL_QUOTE, unescape: true) terminal(:STRING_LITERAL_SINGLE_QUOTE, STRING_LITERAL_SINGLE_QUOTE, unescape: true) # String terminals - terminal(nil, %r([\(\),.;\[\]Aa]|\^\^|true|false)) + terminal(nil, %r([\(\),.;\[\]Aa]|\^\^|true|false|<<|>>)) terminal(:PREFIX, PREFIX) terminal(:BASE, BASE) terminal(:LANGTAG, LANGTAG) @@ -85,11 +85,11 @@ def initialize(input = nil, **options, &block) super do @options = { anon_base: "b0", whitespace: WS, - log_depth: 0, + depth: 0, }.merge(@options) @prod_stack = [] @options[:base_uri] = RDF::URI(base_uri || "") log_debug("base IRI") {base_uri.inspect} @@ -183,11 +183,11 @@ error("process_iri", e) end # Create a literal def literal(value, **options) - log_debug("literal") do + log_debug("literal", depth: @options[:depth]) do "value: #{value.inspect}, " + "options: #{options.inspect}, " + "validate: #{validate?.inspect}, " + "c14n?: #{canonicalize?.inspect}" end @@ -219,11 +219,11 @@ else error("undefined prefix", production: :pname, token: prefix) '' end suffix = suffix.to_s.sub(/^\#/, "") if base.index("#") - log_debug("pname") {"base: '#{base}', suffix: '#{suffix}'"} + log_debug("pname", depth: options[:depth]) {"base: '#{base}', suffix: '#{suffix}'"} process_iri(base + suffix.to_s) end # Keep track of allocated BNodes def bnode(value = nil) @@ -281,11 +281,11 @@ @lexer.shift pfx, iri = @lexer.shift, @lexer.shift terminated = token.value == '@prefix' error("Expected PNAME_NS", production: :prefix, token: pfx) unless pfx === :PNAME_NS error("Expected IRIREF", production: :prefix, token: iri) unless iri === :IRIREF - log_debug("prefixID") {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"} + log_debug("prefixID", depth: options[:depth]) {"Defined prefix #{pfx.inspect} mapping to #{iri.inspect}"} prefix(pfx.value[0..-2], process_iri(iri)) error("prefixId", "#{token} should be downcased") if token.value.start_with?('@') && token.value != '@prefix' if terminated error("prefixID", "Expected #{token} to be terminated") unless @lexer.first === '.' @@ -360,10 +360,11 @@ def read_subject prod(:subject) do read_iri || read_BlankNode || read_collection || + read_rdfstar || error( "Expected subject", production: :subject, token: @lexer.first) end end # @return [void] @@ -371,18 +372,41 @@ prod(:object) do if object = read_iri || read_BlankNode || read_collection || read_blankNodePropertyList || - read_literal + read_literal || + read_rdfstar add_statement(:object, RDF::Statement(subject, predicate, object)) if subject && predicate object end end end + # Read an RDF* reified statement + # @return [RDF::Statement] + def read_rdfstar + return unless @options[:rdfstar] + if @lexer.first.value == '<<' + prod(:rdfstar) do + @lexer.shift # eat << + subject = read_subject || error("Failed to parse subject", production: :rdfstar, token: @lexer.first) + predicate = read_verb || error("Failed to parse predicate", production: :rdfstar, token: @lexer.first) + object = read_object || error("Failed to parse object", production: :rdfstar, token: @lexer.first) + unless @lexer.first.value == '>>' + error("Failed to end of embedded triple", production: :rdfstar, token: @lexer.first) + end + @lexer.shift + statement = RDF::Statement(subject, predicate, object) + # Emit the statement if in Property Graph mode + add_statement(:rdfstar, statement) if @options[:rdfstar] == :PG + statement + end + end + end + # @return [RDF::Literal] def read_literal error("Unexpected end of file", production: :literal) unless token = @lexer.first case token.type || token.value when :INTEGER then prod(:literal) {literal(@lexer.shift.value, datatype: RDF::XSD.integer)} @@ -429,11 +453,11 @@ def read_blankNodePropertyList token = @lexer.first if token === '[' prod(:blankNodePropertyList, %{]}) do @lexer.shift - log_info("blankNodePropertyList") {"token: #{token.inspect}"} + log_info("blankNodePropertyList", depth: options[:depth]) {"token: #{token.inspect}"} node = bnode read_predicateObjectList(node) error("blankNodePropertyList", "Expected closing ']'") unless @lexer.first === ']' @lexer.shift node @@ -445,11 +469,11 @@ def read_collection if @lexer.first === '(' prod(:collection, %{)}) do @lexer.shift token = @lexer.first - log_info("collection") {"token: #{token.inspect}"} + log_info("collection", depth: options[:depth]) {"token: #{token.inspect}"} objects = [] while object = read_object objects << object end list = RDF::List.new(values: objects) @@ -481,12 +505,12 @@ end end def prod(production, recover_to = []) @prod_stack << {prod: production, recover_to: recover_to} - @options[:log_depth] += 1 - log_recover("#{production}(start)") {"token: #{@lexer.first.inspect}"} + @options[:depth] += 1 + log_recover("#{production}(start)", depth: options[:depth]) {"token: #{@lexer.first.inspect}"} yield rescue EBNF::LL1::Lexer::Error, SyntaxError, Recovery => e # Lexer encountered an illegal token or the parser encountered # a terminal which is inappropriate for the current production. # Perform error recovery to find a reasonable terminal based @@ -502,38 +526,38 @@ token: e.token) rescue SyntaxError end end raise EOFError, "End of input found when recovering" if @lexer.first.nil? - log_debug("recovery", "current token: #{@lexer.first.inspect}") + log_debug("recovery", "current token: #{@lexer.first.inspect}", depth: options[:depth]) unless e.is_a?(Recovery) # Get the list of follows for this sequence, this production and the stacked productions. - log_debug("recovery", "stack follows:") + log_debug("recovery", "stack follows:", depth: options[:depth]) @prod_stack.reverse.each do |prod| - log_debug("recovery", level: 4) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"} + log_debug("recovery", level: 4, depth: options[:depth]) {" #{prod[:prod]}: #{prod[:recover_to].inspect}"} end end # Find all follows to the top of the stack follows = @prod_stack.map {|prod| Array(prod[:recover_to])}.flatten.compact.uniq # Skip tokens until one is found in follows while (token = (@lexer.first rescue @lexer.recover)) && follows.none? {|t| token === t} skipped = @lexer.shift - log_debug("recovery") {"skip #{skipped.inspect}"} + log_debug("recovery", depth: options[:depth]) {"skip #{skipped.inspect}"} end - log_debug("recovery") {"found #{token.inspect} in follows"} + log_debug("recovery", depth: options[:depth]) {"found #{token.inspect} in follows"} # Re-raise the error unless token is a follows of this production raise Recovery unless Array(recover_to).any? {|t| token === t} # Skip that token to get something reasonable to start the next production with @lexer.shift ensure - log_info("#{production}(finish)") - @options[:log_depth] -= 1 + log_info("#{production}(finish)", depth: options[:depth]) + @options[:depth] -= 1 @prod_stack.pop end ## # Error information, used as level `0` debug messages. @@ -552,10 +576,11 @@ lineno = @lineno || (options[:token].lineno if options[:token].respond_to?(:lineno)) || @lexer.lineno log_error(*args, ctx, lineno: lineno, token: options[:token], production: options[:production], - exception: SyntaxError) + depth: options[:depth], + exception: SyntaxError,) end # Used for internal error recovery class Recovery < StandardError; end