require 'rdf' # @see http://rubygems.org/gems/rdf
require 'sparql/algebra'
require 'json'
require 'sxp'
module SPARQL
##
# A SPARQL grammar for RDF.rb.
#
# ## Representation
# The parser natively generates native SPARQL S-Expressions (SSE),
# a hierarch of `SPARQL::Algebra::Operator` instances
# which can be executed against a queryable object, such as a Repository identically
# to `RDF::Query`.
#
# Other elements within the hierarchy
# are generated using RDF objects, such as `RDF::URI`, `RDF::Node`, `RDF::Literal`, and `RDF::Query`.
#
# See {SPARQL::Grammar::Parser} for a full listing
# of algebra operations and RDF objects generated by the parser.
#
# The native SSE representation may be serialized to a textual representation of SSE as
# serialized general S-Expressions (SXP).
# The SXP generated closely follows that of [OpenJena ARQ](http://openjena.org/wiki/SSE), which is intended principally for
# running the SPARQL rules. Additionally, SSE is generated for CONSTRUCT, ASK, DESCRIBE and FROM operators.
#
# SXP is generated by serializing the parser result as follows:
#
# sse = SPARQL::Grammar.parse("SELECT * WHERE { ?s ?p ?o }")
# sxp = sse.to_sxp
#
# The following examples illustrate SPARQL transformations:
#
# SPARQL:
#
# SELECT * WHERE { ?a ?b ?c }
#
# SSE:
#
# RDF::Query.new {
# pattern [RDF::Query::Variable.new("a"), RDF::Query::Variable.new("b"), RDF::Query::Variable.new("c")]
# }
#
# SXP:
#
# (bgp (triple ?a ?b ?c))
#
# SPARQL:
#
# SELECT * FROM WHERE { ?a ?b ?c }
#
# SSE:
#
# SPARQL::Algebra::Operator::Dataset.new(
# [RDF::URI("a")],
# RDF::Query.new {
# pattern [RDF::Query::Variable.new("a"), RDF::Query::Variable.new("b"), RDF::Query::Variable.new("c")]
# }
# )
#
# SXP:
#
# (dataset () (bgp (triple ?a ?b ?c)))
#
# SPARQL:
#
# SELECT * FROM NAMED WHERE { ?a ?b ?c }
#
# SSE:
#
# SPARQL::Algebra::Operator::Dataset.new(
# [[:named, RDF::URI("a")]],
# RDF::Query.new {
# pattern [RDF::Query::Variable.new("a"), RDF::Query::Variable.new("b"), RDF::Query::Variable.new("c")]
# }
# )
#
# SXP:
#
# (dataset ((named )) (bgp (triple ?a ?b ?c)))
#
# SPARQL:
#
# SELECT DISTINCT * WHERE {?a ?b ?c}
#
# SSE:
#
# SPARQL::Algebra::Operator::Distinct.new(
# RDF::Query.new {
# pattern [RDF::Query::Variable.new("a"), RDF::Query::Variable.new("b"), RDF::Query::Variable.new("c")]
# }
# )
#
# SXP:
#
# (distinct (bgp (triple ?a ?b ?c)))
#
# SPARQL:
#
# SELECT ?a ?b WHERE {?a ?b ?c}
#
# SSE:
#
# SPARQL::Algebra::Operator::Project.new(
# [RDF::Query::Variable.new("a"), RDF::Query::Variable.new("b")],
# RDF::Query.new {
# pattern [RDF::Query::Variable.new("a"), RDF::Query::Variable.new("b"), RDF::Query::Variable.new("c")]
# }
# )
#
# SXP:
#
# (project (?a ?b) (bgp (triple ?a ?b ?c)))
#
# SPARQL:
#
# CONSTRUCT {?a ?b ?c} WHERE {?a ?b ?c FILTER (?a)}
#
# SSE:
#
# SPARQL::Algebra::Operator::Construct.new(
# [RDF::Query::Pattern.new(RDF::Query::Variable.new("a"), RDF::Query::Variable.new("b"), RDF::Query::Variable.new("c"))],
# SPARQL::Algebra::Operator::Filter.new(
# RDF::Query::Variable.new("a"),
# RDF::Query.new {
# pattern [RDF::Query::Variable.new("a"), RDF::Query::Variable.new("b"), RDF::Query::Variable.new("c")]
# }
# )
# )
#
# SXP:
#
# (construct ((triple ?a ?b ?c)) (filter ?a (bgp (triple ?a ?b ?c))))
#
# SPARQL:
#
# SELECT * WHERE { OPTIONAL { }}
#
# SSE:
#
# SPARQL::Algebra::Operator::LeftJoin.new(
# RDF::Query.new {
# pattern [RDF::URI("a"), RDF::URI("b"), RDF::URI("c")]
# },
# RDF::Query.new {
# pattern [RDF::URI("d"), RDF::URI("e"), RDF::URI("f")]
# }
# )
#
# SXP:
#
# (leftjoin (bgp (triple )) (bgp (triple )))
#
# SPARQL:
#
# SELECT * WHERE { { }}
#
# SSE:
#
# SPARQL::Algebra::Operator::Join.new(
# RDF::Query.new {
# pattern [RDF::URI("a"), RDF::URI("b"), RDF::URI("c")]
# },
# RDF::Query.new {
# pattern [RDF::URI("d"), RDF::URI("e"), RDF::URI("f")]
# }
# )
#
# SXP:
#
# (join (bgp (triple )) (bgp (triple )))
#
# SPARQL:
#
# PREFIX :
#
# SELECT *
# {
# { ?s ?p ?o }
# UNION
# { GRAPH ?g { ?s ?p ?o } }
# }
#
# SSE:
#
# SPARQL::Algebra::Operator::Prefix.new(
# [[:":", RDF::URI("http://example/")]],
# SPARQL::Algebra::Operator::Union.new(
# RDF::Query.new {
# pattern [RDF::Query::Variable.new("s"), RDF::Query::Variable.new("p"), RDF::Query::Variable.new("o")]
# },
# RDF::Query.new(:context => RDF::Query::Variable.new("g")) {
# pattern [RDF::Query::Variable.new("s"), RDF::Query::Variable.new("p"), RDF::Query::Variable.new("o")]
# }
# )
# )
#
# SXP:
#
# (prefix ((: ))
# (union
# (bgp (triple ?s ?p ?o))
# (graph ?g
# (bgp (triple ?s ?p ?o)))))
#
# ## Implementation Notes
# The parser is driven through a rules table contained in lib/sparql/grammar/parser/meta.rb. This includes
# branch rules to indicate productions to be taken based on a current production.
#
# The meta.rb file is generated from etc/sparql-selectors.n3 which is the result of parsing
# http://www.w3.org/2000/10/swap/grammar/sparql.n3 (along with bnf-token-rules.n3) using cwm using the following command sequence:
#
# cwm ../grammar/sparql.n3 bnf-token-rules.n3 --think --purge --data > sparql-selectors.n3
#
# sparql-selectors.n3 is itself used to generate lib/sparql/grammar/parser/meta.rb using script/build_meta.
#
# Note that The SWAP version of sparql.n3 is an older version of the grammar with the newest in http://www.w3.org/2001/sw/DataAccess/rq23/parsers/sparql.ttl,
# which uses the EBNF form. Sparql.n3 file has been updated by hand to be consistent with the etc/sparql.ttl version.
# A future direction will be to generate rules from etc/sparql.ttl to generate branch tables similar to those
# expressed in meta.rb, but this requires rules not currently available.
#
# ## Next Steps for Parsing EBNF
# A more modern approach is to use the EBNF grammar (e.g., etc/sparql.bnf) to generate a Turtle/N3 representation of the grammar, transform
# this to and LL1 representation and use this to create meta.rb.
#
# Using SWAP utilities, this would seemingly be done as follows:
#
# python http://www.w3.org/2000/10/swap/grammar/ebnf2turtle.py \
# http://www.w3.org/2001/sw/DataAccess/rq23/parsers/sparql.bnf \
# en \
# 'http://www.w3.org/2001/sw/DataAccess/parsers/sparql#' > etc/sparql.ttl
#
# python http://www.w3.org/2000/10/swap/cwm.py etc/sparql.ttl \
# http://www.w3.org/2000/10/swap/grammar/ebnf2bnf.n3 \
# http://www.w3.org/2000/10/swap/grammar/first_follow.n3 \
# --think --data > etc/sparql-ll1.n3
#
# At this point, a variation of script/build_meta should be able to extract first/follow information to re-create the meta branch tables.
#
# @see http://www.w3.org/TR/rdf-sparql-query/#grammar
module Grammar
autoload :Lexer, 'sparql/grammar/lexer'
autoload :Parser, 'sparql/grammar/parser'
autoload :Meta, 'sparql/grammar/parser/meta'
autoload :VERSION, 'sparql/grammar/version'
METHODS = %w(SELECT CONSTRUCT DESCRIBE ASK).map(&:to_sym)
KEYWORDS = %w(BASE PREFIX LIMIT OFFSET DISTINCT REDUCED
ORDER BY ASC DESC FROM NAMED WHERE GRAPH
OPTIONAL UNION FILTER).map(&:to_sym).unshift(*METHODS)
FUNCTIONS = %w(STR LANGMATCHES LANG DATATYPE BOUND sameTerm
isIRI isURI isBLANK isLITERAL REGEX).map(&:to_sym)
# Make all defined non-autoloaded constants immutable:
constants.each { |name| const_get(name).freeze unless autoload?(name) }
##
# Parse the given SPARQL `query` string.
#
# @example
# result = SPARQL::Grammar.parse("SELECT * WHERE { ?s ?p ?o }")
#
# @param [IO, StringIO, Lexer, Array, String, #to_s] query
# Query may be an array of lexed tokens, a lexer, or a
# string or open file.
# @param [Hash{Symbol => Object}] options
# @return [Parser]
# @raise [Parser::Error] on invalid input
def self.parse(query, options = {}, &block)
Parser.new(query, options).parse
end
##
# Parses input from the given file name or URL.
#
# @param [String, #to_s] filename
# @param [Hash{Symbol => Object}] options
# any additional options (see `RDF::Reader#initialize` and `RDF::Format.for`)
# @option options [Symbol] :format (:ntriples)
# @yield [reader]
# @yieldparam [RDF::Reader] reader
# @yieldreturn [void] ignored
# @raise [RDF::FormatError] if no reader found for the specified format
def self.open(filename, options = {}, &block)
RDF::Util::File.open_file(filename, options) do |file|
self.parse(file, options, &block)
end
end
##
# Returns `true` if the given SPARQL `query` string is valid.
#
# @example
# SPARQL::Grammar.valid?("SELECT ?s WHERE { ?s ?p ?o }") #=> true
# SPARQL::Grammar.valid?("SELECT s WHERE { ?s ?p ?o }") #=> false
#
# @param [String, #to_s] query
# @param [Hash{Symbol => Object}] options
# @return [Boolean]
def self.valid?(query, options = {})
Parser.new(query, options).valid?
end
##
# Tokenizes the given SPARQL `query` string.
#
# @example
# lexer = SPARQL::Grammar.tokenize("SELECT * WHERE { ?s ?p ?o }")
# lexer.each_token do |token|
# puts token.inspect
# end
#
# @param [String, #to_s] query
# @param [Hash{Symbol => Object}] options
# @yield [lexer]
# @yieldparam [Lexer] lexer
# @return [Lexer]
# @raise [Lexer::Error] on invalid input
def self.tokenize(query, options = {}, &block)
Lexer.tokenize(query, options, &block)
end
class SPARQL_GRAMMAR < RDF::Vocabulary("http://www.w3.org/2000/10/swap/grammar/sparql#"); end
end # Grammar
end # SPARQL