reader.rb in rdf-trig-1.0.0

- old
+ new

@@ -8,38 +8,219 @@
   # Leverages the Turtle reader
   class Reader < RDF::Turtle::Reader
     format Format
     include RDF::TriG::Meta
 
+    # Terminals passed to lexer. Order matters!
+    terminal(:ANON,                 ANON) do |prod, token, input|
+      input[:resource] = self.bnode
+    end
+    terminal(:BLANK_NODE_LABEL,     BLANK_NODE_LABEL) do |prod, token, input|
+      input[:resource] = self.bnode(token.value[2..-1])
+    end
+    terminal(:IRIREF,               IRIREF, :unescape => true) do |prod, token, input|
+      input[:resource] = process_iri(token.value[1..-2])
+    end
+    terminal(:DOUBLE,               DOUBLE) do |prod, token, input|
+      # Note that a Turtle Double may begin with a '.[eE]', so tack on a leading
+      # zero if necessary
+      value = token.value.sub(/\.([eE])/, '.0\1')
+      input[:resource] = literal(value, :datatype => RDF::XSD.double)
+    end
+    terminal(:DECIMAL,              DECIMAL) do |prod, token, input|
+      # Note that a Turtle Decimal may begin with a '.', so tack on a leading
+      # zero if necessary
+      value = token.value
+      value = "0#{token.value}" if token.value[0,1] == "."
+      input[:resource] = literal(value, :datatype => RDF::XSD.decimal)
+    end
+    terminal(:INTEGER,              INTEGER) do |prod, token, input|
+      input[:resource] = literal(token.value, :datatype => RDF::XSD.integer)
+    end
+    # Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
+    terminal(:PNAME_LN,             PNAME_LN, :unescape => true) do |prod, token, input|
+      prefix, suffix = token.value.split(":", 2)
+      input[:resource] = pname(prefix, suffix)
+    end
+    # Spec confusion: spec says : "Literals , prefixed names and IRIs may also contain escape sequences"
+    terminal(:PNAME_NS,             PNAME_NS) do |prod, token, input|
+      prefix = token.value[0..-2]
+
+      # Two contexts, one when prefix is being defined, the other when being used
+      case prod
+      when :prefixID, :sparqlPrefix
+        input[:prefix] = prefix
+      else
+        input[:resource] = pname(prefix, '')
+      end
+    end
+    terminal(:STRING_LITERAL_LONG_SINGLE_QUOTE, STRING_LITERAL_LONG_SINGLE_QUOTE, :unescape => true) do |prod, token, input|
+      input[:string_value] = token.value[3..-4]
+    end
+    terminal(:STRING_LITERAL_LONG_QUOTE, STRING_LITERAL_LONG_QUOTE, :unescape => true) do |prod, token, input|
+      input[:string_value] = token.value[3..-4]
+    end
+    terminal(:STRING_LITERAL_QUOTE,      STRING_LITERAL_QUOTE, :unescape => true) do |prod, token, input|
+      input[:string_value] = token.value[1..-2]
+    end
+    terminal(:STRING_LITERAL_SINGLE_QUOTE,      STRING_LITERAL_SINGLE_QUOTE, :unescape => true) do |prod, token, input|
+      input[:string_value] = token.value[1..-2]
+    end
+
     # String terminals
     terminal(nil,                  %r([\{\}\(\),.;\[\]a]|\^\^|@base|@prefix|true|false)) do |prod, token, input|
       case token.value
       when 'a'                then input[:resource] = RDF.type
       when 'true', 'false'    then input[:resource] = RDF::Literal::Boolean.new(token.value)
       when '@base', '@prefix' then input[:lang] = token.value[1..-1]
       else                         input[:string] = token.value
       end
     end
 
+    terminal(:LANGTAG,              LANGTAG) do |prod, token, input|
+      input[:lang] = token.value[1..-1]
+    end
+
+    terminal(:SPARQL_PREFIX,      SPARQL_PREFIX) do |prod, token, input|
+      input[:string_value] = token.value.downcase
+    end
+    terminal(:SPARQL_BASE,      SPARQL_BASE) do |prod, token, input|
+      input[:string_value] = token.value.downcase
+    end
+
     # Productions
     # [3g] graph defines the basic creation of context
     start_production(:graph) do |input, current, callback|
       callback.call(:context, "graph", nil)
     end
     production(:graph) do |input, current, callback|
       callback.call(:context, "graph", nil)
     end
-    
+
     # [4g] graphIri
     # Normally, just returns the IRIref, but if called from [3g], also
     # sets the context for triples defined within that graph
     production(:graphIri) do |input, current, callback|
       # If input contains set_graph_iri, use the returned value to set @context
       debug("graphIri") {"Set graph context to #{current[:resource]}"}
       callback.call(:context, "graphIri", current[:resource])
     end
-    
+
+
+    # Productions
+    # [4] prefixID defines a prefix mapping
+    production(:prefixID) do |input, current, callback|
+      prefix = current[:prefix]
+      iri = current[:resource]
+      debug("prefixID") {"Defined prefix #{prefix.inspect} mapping to #{iri.inspect}"}
+      prefix(prefix, iri)
+    end
+
+    # [5] base set base_uri
+    production(:base) do |input, current, callback|
+      iri = current[:resource]
+      debug("base") {"Defined base as #{iri}"}
+      options[:base_uri] = iri
+    end
+
+    # [28s] sparqlPrefix ::= [Pp][Rr][Ee][Ff][Ii][Xx] PNAME_NS IRIREF
+    production(:sparqlPrefix) do |input, current, callback|
+      prefix = current[:prefix]
+      iri = current[:resource]
+      debug("sparqlPrefix") {"Defined prefix #{prefix.inspect} mapping to #{iri.inspect}"}
+      prefix(prefix, iri)
+    end
+
+    # [29s] sparqlBase ::= [Bb][Aa][Ss][Ee] IRIREF
+    production(:sparqlBase) do |input, current, callback|
+      iri = current[:resource]
+      debug("base") {"Defined base as #{iri}"}
+      options[:base_uri] = iri
+    end
+
+    # [6] triples
+    start_production(:triples) do |input, current, callback|
+      # Note production as triples for blankNodePropertyList
+      # to set :subject instead of :resource
+      current[:triples] = true
+    end
+    production(:triples) do |input, current, callback|
+      # Note production as triples for blankNodePropertyList
+      # to set :subject instead of :resource
+      current[:triples] = true
+    end
+
+    # [9] verb ::= predicate | "a"
+    production(:verb) do |input, current, callback|
+      input[:predicate] = current[:resource]
+    end
+
+    # [10] subject ::= IRIref | BlankNode | collection
+    start_production(:subject) do |input, current, callback|
+      current[:triples] = nil
+    end
+
+    production(:subject) do |input, current, callback|
+      input[:subject] = current[:resource]
+    end
+
+    # [12] object ::= iri | BlankNode | collection | blankNodePropertyList | literal
+    production(:object) do |input, current, callback|
+      if input[:object_list]
+        # Part of an rdf:List collection
+        input[:object_list] << current[:resource]
+      else
+        debug("object") {"current: #{current.inspect}"}
+        callback.call(:statement, "object", input[:subject], input[:predicate], current[:resource])
+      end
+    end
+
+    # [14] blankNodePropertyList ::= "[" predicateObjectList "]"
+    start_production(:blankNodePropertyList) do |input, current, callback|
+      current[:subject] = self.bnode
+    end
+
+    production(:blankNodePropertyList) do |input, current, callback|
+      if input[:triples]
+        input[:subject] = current[:subject]
+      else
+        input[:resource] = current[:subject]
+      end
+    end
+
+    # [15] collection ::= "(" object* ")"
+    start_production(:collection) do |input, current, callback|
+      # Tells the object production to collect and not generate statements
+      current[:object_list] = []
+    end
+
+    production(:collection) do |input, current, callback|
+      # Create an RDF list
+      bnode = self.bnode
+      objects = current[:object_list]
+      list = RDF::List.new(bnode, nil, objects)
+      list.each_statement do |statement|
+        # Spec Confusion, referenced section "Collection" is missing from the spec.
+        # Anicdodal evidence indicates that some expect each node to be of type rdf:list,
+        # but existing Notation3 and Turtle tests (http://www.w3.org/2001/sw/DataAccess/df1/tests/manifest.ttl) do not.
+        next if statement.predicate == RDF.type && statement.object == RDF.List
+        callback.call(:statement, "collection", statement.subject, statement.predicate, statement.object)
+      end
+      bnode = RDF.nil if list.empty?
+
+      # Return bnode as resource
+      input[:resource] = bnode
+    end
+
+    # [16] RDFLiteral ::= String ( LanguageTag | ( "^^" IRIref ) )?
+    production(:RDFLiteral) do |input, current, callback|
+      opts = {}
+      opts[:datatype] = current[:resource] if current[:resource]
+      opts[:language] = current[:lang] if current[:lang]
+      input[:resource] = literal(current[:string_value], opts)
+    end
+
     ##
     # Iterates the given block for each RDF statement in the input.
     #
     # @yield  [statement]
     # @yieldparam [RDF::Statement] statement
@@ -47,28 +228,40 @@
     def each_statement(&block)
       @callback = block
 
       parse(@input, START.to_sym, @options.merge(:branch => BRANCH,
                                                  :first => FIRST,
-                                                 :follow => FOLLOW)
+                                                 :follow => FOLLOW,
+                                                 :reset_on_start => true)
       ) do |context, *data|
-        loc = data.shift
         case context
         when :context
-          @context = data[0]
+          @context = data[1]
         when :statement
           data << @context if @context
           debug("each_statement") {"data: #{data.inspect}, context: #{@context.inspect}"}
+          loc = data.shift
           add_statement(loc, RDF::Statement.from(data))
         when :trace
-          debug(loc, *data)
+          level, lineno, depth, *args = data
+          message = "#{args.join(': ')}"
+          d_str = depth > 100 ? ' ' * 100 + '+' : ' ' * depth
+          str = "[#{lineno}](#{level})#{d_str}#{message}"
+          case @options[:debug]
+          when Array
+            @options[:debug] << str
+          when TrueClass
+            $stderr.puts str
+          when Integer
+            $stderr.puts(str) if level <= @options[:debug]
+          end
         end
       end
     rescue EBNF::LL1::Parser::Error => e
       debug("Parsing completed with errors:\n\t#{e.message}")
       raise RDF::ReaderError, e.message if validate?
     end
-    
+
     ##
     # Iterates the given block for each RDF quad in the input.
     #
     # @yield  [subject, predicate, object, context]
     # @yieldparam [RDF::Resource] subject