lib/sparql/grammar/parser11.rb in sparql-1.0.7 vs lib/sparql/grammar/parser11.rb in sparql-1.0.8

- old
+ new

@@ -230,11 +230,11 @@ self.prefix(pfx, data[:iri].last) prefix_op = SPARQL::Algebra::Operator::Prefix.new([["#{pfx}:".to_sym, data[:iri].last]], []) add_prod_datum(:PrefixDecl, prefix_op) end end - + # [7] SelectQuery ::= SelectClause DatasetClause* WhereClause SolutionModifier production(:SelectQuery) do |input, data, callback| query = merge_modifiers(data) add_prod_datum :query, query end @@ -354,63 +354,13 @@ end # [54] GroupGraphPatternSub ::= TriplesBlock? # ( GraphPatternNotTriples '.'? TriplesBlock? )* production(:GroupGraphPatternSub) do |input, data, callback| - query_list = data[:query_list] - debug("GroupGraphPatternSub") {"ql #{query_list.to_a.inspect}"} - debug("GroupGraphPatternSub") {"q #{data[:query] ? data[:query].first.inspect : 'nil'}"} - - if query_list - lhs = data[:query].to_a.first + debug("GroupGraphPatternSub") {"q #{data[:query].inspect}"} - # Bind terminates the TriplesBlock? - if data[:extend] - lhs ||= SPARQL::Algebra::Operator::BGP.new - # query should be nil - lhs = SPARQL::Algebra::Operator::Extend.new(data.delete(:extend), lhs) - end - - while !query_list.empty? - rhs = query_list.shift - # Make the right-hand-side a Join with only a single operand, if it's not already and Operator - rhs = SPARQL::Algebra::Expression.for(:join, :placeholder, rhs) unless rhs.is_a?(SPARQL::Algebra::Operator) - debug("GroupGraphPatternSub") {"<= q: #{rhs.inspect}"} - debug("GroupGraphPatternSub") {"<= lhs: #{lhs ? lhs.inspect : 'nil'}"} - lhs ||= SPARQL::Algebra::Operator::BGP.new if rhs.is_a?(SPARQL::Algebra::Operator::LeftJoin) - if lhs - if rhs.operand(0) == :placeholder - rhs.operands[0] = lhs - else - rhs = SPARQL::Algebra::Operator::Join.new(lhs, rhs) - end - end - lhs = rhs - lhs = lhs.operand(1) if lhs.operand(0) == :placeholder - debug("GroupGraphPatternSub(itr)") {"=> lhs: #{lhs.inspect}"} - end - # Trivial simplification for :join or :union of one query - case lhs - when SPARQL::Algebra::Operator::Join, SPARQL::Algebra::Operator::Union - if lhs.operand(0) == :placeholder - lhs = lhs.operand(1) - debug("GroupGraphPatternSub(simplify)") {"=> lhs: #{lhs.inspect}"} - end - end - res = lhs - elsif data[:query] - res = data[:query].first - end - - debug("GroupGraphPatternSub(pre-extend)") {"res: #{res.inspect}"} - - if data[:extend] - res ||= SPARQL::Algebra::Operator::BGP.new - # query should be nil - res = SPARQL::Algebra::Operator::Extend.new(data[:extend], res) - end - + res = data[:query].first debug("GroupGraphPatternSub(pre-filter)") {"res: #{res.inspect}"} if data[:filter] expr, query = flatten_filter(data[:filter]) query = res || SPARQL::Algebra::Operator::BGP.new @@ -418,64 +368,68 @@ res = SPARQL::Algebra::Operator::Filter.new(expr, query) end add_prod_datum(:query, res) end - # _GroupGraphPatternSub_2 ::= ( GraphPatternNotTriples '.'? TriplesBlock? ) - # Create a stack of GroupQuerys having a single graph element and resolve in GroupGraphPattern - production(:_GroupGraphPatternSub_2) do |input, data, callback| - lhs = data[:query_list] - [data[:query]].flatten.compact.each do |rhs| - rhs = SPARQL::Algebra::Expression.for(:join, :placeholder, rhs) if rhs.is_a?(RDF::Query) - add_prod_data(:query_list, rhs) - end - add_prod_datum(:query_list, lhs) - add_prod_datum(:extend, data[:extend]) - add_prod_datum(:filter, data[:filter]) - end - - # _GroupGraphPatternSub_3 - # [55] TriplesBlock ::= TriplesSameSubjectPath # ( '.' TriplesBlock? )? production(:TriplesBlock) do |input, data, callback| query = SPARQL::Algebra::Operator::BGP.new data[:pattern].to_a.each {|p| query << p} - + # Append triples from ('.' TriplesBlock? )? data[:query].to_a.each {|q| query += q} + if (lhs = (input.delete(:query) || []).first) && !lhs.empty? + query = SPARQL::Algebra::Expression.for(:join, lhs, query) + end add_prod_datum(:query, query) end # [56] GraphPatternNotTriples ::= GroupOrUnionGraphPattern # | OptionalGraphPattern # | MinusGraphPattern # | GraphGraphPattern # | ServiceGraphPattern # | Filter | Bind + start_production(:GraphPatternNotTriples) do |input, data, callback| + # Modifies previous graph + data[:input_query] = input.delete(:query) || [SPARQL::Algebra::Operator::BGP.new] + end production(:GraphPatternNotTriples) do |input, data, callback| - add_prod_datum(:extend, data[:extend]) + lhs = data[:input_query].first + + # Filter trickls up to GroupGraphPatternSub add_prod_datum(:filter, data[:filter]) - if data[:query] - res = data[:query].to_a.first - add_prod_data(:query, res) + if data[:extend] && lhs.is_a?(SPARQL::Algebra::Operator::Extend) + # Coalesce extensions + lhs = lhs.dup + lhs.operands.first.concat(data[:extend]) + add_prod_datum(:query, lhs) + elsif data[:extend] + add_prod_datum(:query, SPARQL::Algebra::Expression.for(:extend, data[:extend], lhs)) + elsif data[:leftjoin] + add_prod_datum(:query, SPARQL::Algebra::Expression.for(:leftjoin, lhs, *data[:leftjoin])) + elsif data[:query] && !lhs.empty? + add_prod_datum(:query, SPARQL::Algebra::Expression.for(:join, lhs, *data[:query])) + elsif data[:query] + add_prod_datum(:query, data[:query]) + else + add_prod_datum(:query, lhs) end end # [57] OptionalGraphPattern ::= 'OPTIONAL' GroupGraphPattern production(:OptionalGraphPattern) do |input, data, callback| - if data[:query] - expr = nil - query = data[:query].first - if query.is_a?(SPARQL::Algebra::Operator::Filter) - # Change to expression on left-join with query element - expr, query = query.operands - add_prod_data(:query, SPARQL::Algebra::Expression.for(:leftjoin, :placeholder, query, expr)) - else - add_prod_data(:query, SPARQL::Algebra::Expression.for(:leftjoin, :placeholder, query)) - end + expr = nil + query = data[:query] ? data[:query].first : SPARQL::Algebra::Operator::BGP.new + if query.is_a?(SPARQL::Algebra::Operator::Filter) + # Change to expression on left-join with query element + expr, query = query.operands + add_prod_data(:leftjoin, query, expr) + elsif !query.empty? + add_prod_data(:leftjoin, query) end end # [58] GraphGraphPattern ::= 'GRAPH' VarOrIri GroupGraphPattern production(:GraphGraphPattern) do |input, data, callback| @@ -784,13 +738,13 @@ # | '/' UnaryExpression )* production(:_MultiplicativeExpression_1) do |input, data, callback| accumulate_operator_expressions(:MultiplicativeExpression, :_Mul_Div, data) end - # [118] UnaryExpression ::= '!' PrimaryExpression - # | '+' PrimaryExpression - # | '-' PrimaryExpression + # [118] UnaryExpression ::= '!' PrimaryExpression + # | '+' PrimaryExpression + # | '-' PrimaryExpression # | PrimaryExpression production(:UnaryExpression) do |input, data, callback| case data[:UnaryExpression] when ["!"] add_prod_datum(:Expression, SPARQL::Algebra::Expression[:not, data[:Expression].first]) @@ -828,64 +782,64 @@ # Keep track of this for parent UnaryExpression production add_prod_datum(:UnaryExpression, data[:UnaryExpression]) end # [121] BuiltInCall ::= Aggregate - # | 'STR' '(' Expression ')' - # | 'LANG' '(' Expression ')' - # | 'LANGMATCHES' '(' Expression ',' Expression ')' - # | 'DATATYPE' '(' Expression ')' - # | 'BOUND' '(' Var ')' - # | 'IRI' '(' Expression ')' - # | 'URI' '(' Expression ')' - # | 'BNODE' ( '(' Expression ')' | NIL ) - # | 'RAND' NIL - # | 'ABS' '(' Expression ')' - # | 'CEIL' '(' Expression ')' - # | 'FLOOR' '(' Expression ')' - # | 'ROUND' '(' Expression ')' - # | 'CONCAT' ExpressionList - # | SubstringExpression - # | 'STRLEN' '(' Expression ')' - # | StrReplaceExpression - # | 'UCASE' '(' Expression ')' - # | 'LCASE' '(' Expression ')' - # | 'ENCODE_FOR_URI' '(' Expression ')' - # | 'CONTAINS' '(' Expression ',' Expression ')' - # | 'STRSTARTS' '(' Expression ',' Expression ')' - # | 'STRENDS' '(' Expression ',' Expression ')' - # | 'STRBEFORE' '(' Expression ',' Expression ')' - # | 'STRAFTER' '(' Expression ',' Expression ')' - # | 'YEAR' '(' Expression ')' - # | 'MONTH' '(' Expression ')' - # | 'DAY' '(' Expression ')' - # | 'HOURS' '(' Expression ')' - # | 'MINUTES' '(' Expression ')' - # | 'SECONDS' '(' Expression ')' - # | 'TIMEZONE' '(' Expression ')' - # | 'TZ' '(' Expression ')' - # | 'NOW' NIL + # | 'STR' '(' Expression ')' + # | 'LANG' '(' Expression ')' + # | 'LANGMATCHES' '(' Expression ',' Expression ')' + # | 'DATATYPE' '(' Expression ')' + # | 'BOUND' '(' Var ')' + # | 'IRI' '(' Expression ')' + # | 'URI' '(' Expression ')' + # | 'BNODE' ( '(' Expression ')' | NIL ) + # | 'RAND' NIL + # | 'ABS' '(' Expression ')' + # | 'CEIL' '(' Expression ')' + # | 'FLOOR' '(' Expression ')' + # | 'ROUND' '(' Expression ')' + # | 'CONCAT' ExpressionList + # | SubstringExpression + # | 'STRLEN' '(' Expression ')' + # | StrReplaceExpression + # | 'UCASE' '(' Expression ')' + # | 'LCASE' '(' Expression ')' + # | 'ENCODE_FOR_URI' '(' Expression ')' + # | 'CONTAINS' '(' Expression ',' Expression ')' + # | 'STRSTARTS' '(' Expression ',' Expression ')' + # | 'STRENDS' '(' Expression ',' Expression ')' + # | 'STRBEFORE' '(' Expression ',' Expression ')' + # | 'STRAFTER' '(' Expression ',' Expression ')' + # | 'YEAR' '(' Expression ')' + # | 'MONTH' '(' Expression ')' + # | 'DAY' '(' Expression ')' + # | 'HOURS' '(' Expression ')' + # | 'MINUTES' '(' Expression ')' + # | 'SECONDS' '(' Expression ')' + # | 'TIMEZONE' '(' Expression ')' + # | 'TZ' '(' Expression ')' + # | 'NOW' NIL # | 'UUID' NIL # | 'STRUUID' NIL - # | 'MD5' '(' Expression ')' - # | 'SHA1' '(' Expression ')' - # | 'SHA224' '(' Expression ')' - # | 'SHA256' '(' Expression ')' - # | 'SHA384' '(' Expression ')' - # | 'SHA512' '(' Expression ')' - # | 'COALESCE' ExpressionList - # | 'IF' '(' Expression ',' Expression ',' Expression ')' - # | 'STRLANG' '(' Expression ',' Expression ')' - # | 'STRDT' '(' Expression ',' Expression ')' - # | 'sameTerm' '(' Expression ',' Expression ')' - # | 'isIRI' '(' Expression ')' - # | 'isURI' '(' Expression ')' - # | 'isBLANK' '(' Expression ')' - # | 'isLITERAL' '(' Expression ')' - # | 'isNUMERIC' '(' Expression ')' - # | RegexExpression - # | ExistsFunc + # | 'MD5' '(' Expression ')' + # | 'SHA1' '(' Expression ')' + # | 'SHA224' '(' Expression ')' + # | 'SHA256' '(' Expression ')' + # | 'SHA384' '(' Expression ')' + # | 'SHA512' '(' Expression ')' + # | 'COALESCE' ExpressionList + # | 'IF' '(' Expression ',' Expression ',' Expression ')' + # | 'STRLANG' '(' Expression ',' Expression ')' + # | 'STRDT' '(' Expression ',' Expression ')' + # | 'sameTerm' '(' Expression ',' Expression ')' + # | 'isIRI' '(' Expression ')' + # | 'isURI' '(' Expression ')' + # | 'isBLANK' '(' Expression ')' + # | 'isLITERAL' '(' Expression ')' + # | 'isNUMERIC' '(' Expression ')' + # | RegexExpression + # | ExistsFunc # | NotExistsFunc production(:BuiltInCall) do |input, data, callback| if builtin = data.keys.detect {|k| BUILTINS.include?(k)} add_prod_datum(:BuiltInCall, SPARQL::Algebra::Expression.for( @@ -931,16 +885,16 @@ # [126] NotExistsFunc ::= 'NOT' 'EXISTS' GroupGraphPattern production(:NotExistsFunc) do |input, data, callback| add_prod_datum(:notexists, data[:query]) end - # [127] Aggregate ::= 'COUNT' '(' 'DISTINCT'? ( '*' | Expression ) ')' - # | 'SUM' '(' 'DISTINCT'? Expression ')' - # | 'MIN' '(' 'DISTINCT'? Expression ')' - # | 'MAX' '(' 'DISTINCT'? Expression ')' - # | 'AVG' '(' 'DISTINCT'? Expression ')' - # | 'SAMPLE' '(' 'DISTINCT'? Expression ')' + # [127] Aggregate ::= 'COUNT' '(' 'DISTINCT'? ( '*' | Expression ) ')' + # | 'SUM' '(' 'DISTINCT'? Expression ')' + # | 'MIN' '(' 'DISTINCT'? Expression ')' + # | 'MAX' '(' 'DISTINCT'? Expression ')' + # | 'AVG' '(' 'DISTINCT'? Expression ')' + # | 'SAMPLE' '(' 'DISTINCT'? Expression ')' # | 'GROUP_CONCAT' '(' 'DISTINCT'? Expression # ( ';' 'SEPARATOR' '=' String )? ')' production(:Aggregate) do |input, data, callback| if aggregate_rule = data.keys.detect {|k| AGGREGATE_RULES.include?(k)} parts = [aggregate_rule] @@ -963,11 +917,11 @@ # [129] RDFLiteral ::= String ( LANGTAG | ( '^^' iri ) )? production(:RDFLiteral) do |input, data, callback| if data[:string] lit = data.dup - str = lit.delete(:string).last + str = lit.delete(:string).last lit[:datatype] = lit.delete(:iri).last if lit[:iri] lit[:language] = lit.delete(:language).last.downcase if lit[:language] add_prod_datum(:literal, RDF::Literal.new(str, lit)) if str end end @@ -1000,11 +954,11 @@ end ## # Initializes a new parser instance. # - # @param [String, #to_s] input + # @param [String, IO, StringIO, #to_s] input # @param [Hash{Symbol => Object}] options # @option options [Hash] :prefixes (Hash.new) # the prefix mappings to use (for acessing intermediate parser productions) # @option options [#to_s] :base_uri (nil) # the base URI to use when resolving relative URIs (for acessing intermediate parser productions) @@ -1018,13 +972,19 @@ # whether to validate the parsed statements and values # @option options [Boolean] :progress # Show progress of parser productions # @option options [Boolean] :debug # Detailed debug output + # @yield [parser] `self` + # @yieldparam [SPARQL::Grammar::Parser] parser + # @yieldreturn [void] ignored # @return [SPARQL::Grammar::Parser] - def initialize(input = nil, options = {}) - @input = input.to_s.dup + def initialize(input = nil, options = {}, &block) + @input = case input + when IO, StringIO then input.read + else input.to_s.dup + end @input.force_encoding(Encoding::UTF_8) @options = {:anon_base => "b0", :validate => false}.merge(options) @options[:debug] ||= case when options[:progress] then 2 when options[:validate] then 1 @@ -1051,16 +1011,16 @@ def valid? parse rescue Error false end - + # @return [String] def to_sxp_bin @result end - + def to_s @result.to_sxp end alias_method :ll1_parse, :parse @@ -1235,11 +1195,11 @@ # is a disinguished or non-distinguished variable. Non-distinguished # variables are effectively the same as BNodes. # @return [RDF::Query::Variable] def variable(id, distinguished = true) id = nil if id.to_s.empty? - + if id @vars[id] ||= begin v = RDF::Query::Variable.new(id) v.distinguished = distinguished v @@ -1279,11 +1239,11 @@ iri = iri(base + suffix.to_s) # Cause URI to be serialized as a lexical iri.lexical = "#{prefix}:#{suffix}" unless resolve_iris? iri end - + # Create a literal def literal(value, options = {}) options = options.dup # Internal representation is to not use xsd:string, although it could arguably go the other way. options.delete(:datatype) if options[:datatype] == RDF::XSD.string @@ -1298,11 +1258,11 @@ # Take collection of objects and create RDF Collection using rdf:first, rdf:rest and rdf:nil # @param [Hash] data Production Data def expand_collection(data) # Add any triples generated from deeper productions add_prod_datum(:pattern, data[:pattern]) - + # Create list items for each element in data[:GraphNode] first = data[:Collection] list = data[:GraphNode].to_a.flatten.compact last = list.pop @@ -1310,11 +1270,11 @@ add_pattern(:Collection, :subject => first, :predicate => RDF["first"], :object => r) rest = bnode() add_pattern(:Collection, :subject => first, :predicate => RDF["rest"], :object => rest) first = rest end - + if last add_pattern(:Collection, :subject => first, :predicate => RDF["first"], :object => last) end add_pattern(:Collection, :subject => first, :predicate => RDF["rest"], :object => RDF["nil"]) end @@ -1341,15 +1301,15 @@ end # Flatten a Data in form of :filter => [op+ bgp?], without a query into filter and query creating exprlist, if necessary # @return [Array[:expr, query]] def flatten_filter(data) - query = data.pop if data.last.respond_to?(:execute) + query = data.pop if data.last.is_a?(SPARQL::Algebra::Query) expr = data.length > 1 ? SPARQL::Algebra::Operator::Exprlist.new(*data) : data.first [expr, query] end - + # Merge query modifiers, datasets, and projections # # This includes tranforming aggregates if also used with a GROUP BY # # @see http://www.w3.org/TR/sparql11-query/#convertGroupAggSelectExpressions @@ -1362,11 +1322,11 @@ # Add datasets and modifiers in order if data[:group] query = SPARQL::Algebra::Expression[:group, data[:group].first, query] end - if data[:extend] + if data[:extend] # FIXME: needed? # extension variables must not appear in projected variables. # Add them to the projection otherwise data[:extend].each do |(var, expr)| raise Error, "Extension variable #{var} also in SELECT" if vars.map(&:to_s).include?(var.to_s) vars << var @@ -1380,12 +1340,12 @@ query = SPARQL::Algebra::Expression[:project, vars, query] unless vars.empty? query = SPARQL::Algebra::Expression[data[:DISTINCT_REDUCED].first, query] if data[:DISTINCT_REDUCED] query = SPARQL::Algebra::Expression[:slice, data[:slice][0], data[:slice][1], query] if data[:slice] - + query = SPARQL::Algebra::Expression[:dataset, data[:dataset], query] if data[:dataset] - + query end # Add joined expressions in for prod1 (op prod2)* to form (op (op 1 2) 3) def add_operator_expressions(production, data)