# $Id$
#
# SparkQL grammar

class Sparkql::Parser

###############################################################################
# READ THIS!
# The grammar documentation is parsed from this file and is in a sensitive 
# syntax between the START_MARKDOWN and STOP_MARKDOWN keywords. In general, all
# line comments will be treated as markdown text, and everything else is padded
# for code formatting
###############################################################################

#START_MARKDOWN

### SparkQL BNF Grammar
# 
# This document explains the rules for the Spark API filter language syntax and
# is a living document generated from the reference implementation at 
# https://github.com/sparkapi/sparkql.

#### Precedence Rules
# 
# Unless otherwise specified, SparkQL follows SQL precendence conventions for 
# operators and conjunctions.
# 
# Unary minus is always tied to value, such as for negative numbers.
prechigh
  nonassoc UMINUS
preclow
  

#### Grammar Rules
# 
# A filter (target) is a composition of filter basic filter expressions.
rule
  target
    : expressions
    | /* none */ { result = 0 }
    ;

##### Expressions
# 
# One or more expressions
  expressions
    : expression
    | conjunction
    | unary_conjunction
    ;

##### Expression
# 
# The core of the filtering system, the expression requires a field, a condition 
# and criteria for comparing the value of the field to the value(s) of the 
# condition. The result of evaluating the expression on a resource is a true of 
# false for matching the criteria.
  expression
    : field OPERATOR condition { result = tokenize_expression(val[0], val[1],val[2]) }
    | field RANGE_OPERATOR range { result = tokenize_expression(val[0], val[1], val[2]) }
    | group
    ;
  
##### Unary Conjunction
# 
# Some conjunctions don't need to expression at all times (e.g. 'NOT'). 
  unary_conjunction
    : UNARY_CONJUNCTION expression { result = tokenize_unary_conjunction(val[0], val[1]) }
    ;  
  
##### Conjunction
# 
# Two expressions joined together using a supported conjunction
  conjunction
    : expressions CONJUNCTION expression { result = tokenize_conjunction(val[0], val[1],val[2]) }
    | expressions UNARY_CONJUNCTION expression { result = tokenize_conjunction(val[0], val[1],val[2]) }
    ;
  
##### Group
# 
# One or more expressions encased in parenthesis. There are limitations on nesting depth at the time of this writing.
  group
  	: LPAREN expressions RPAREN { result = tokenize_group(val[1]) }
  	;

##### Field
# 
# Keyword for searching on, these fields should be discovered using the metadata 
# rules. In general, Keywords that cannot be found will be dropped from the 
# filter.
  field
  	: STANDARD_FIELD
  	| CUSTOM_FIELD
  	| function
  	;
  
##### Condition
# 
# The determinant of the filter, this is typically a value or set of values of 
# a type that the field supports (review the field meta data for support). 
# Functions are also supported on some field types, and provide more flexibility
# on filtering values
  condition
    : literal
    | function
    | literal_list { result = tokenize_list(val[0]) }
    ;
    
##### Function
# 
# Functions may replace static values for conditions with supported field 
# types. Functions may have parameters that match types supported by 
# fields.
  function
    : function_name LPAREN RPAREN { result = tokenize_function(val[0], []) }
    | function_name LPAREN function_args RPAREN { result = tokenize_function(val[0], val[2]) }
    ;
    
  function_name
    : KEYWORD
    ;
    
##### Function Arguments
# 
# Functions may optionally have a comma delimited list of parameters.
  function_args
    : function_arg
    | function_args COMMA function_arg { result = tokenize_function_args(val[0], val[2]) }
    ; 
    
  function_arg
    : literal
    | literals
    | field { result = tokenize_field_arg(val[0]) }
    ;
    
##### Literal List
# 
# A comma delimited list of functions and values.
  literal_list
    : literals
    | function
    | literal_list COMMA literals { result = tokenize_multiple(val[0], val[2]) }
    | literal_list COMMA function { result = tokenize_multiple(val[0], val[2]) }
    ;
    
##### Range List
# 
# A comma delimited list of values that support ranges for the Between operator 
# (see rangeable).
  range                                                                             
    : rangeable COMMA rangeable { result = tokenize_multiple(val[0], val[2]) }
    ;

##### Literals
# 
# Literals that support multiple values in a list for a condition
  literals
    : INTEGER
    | DECIMAL
    | CHARACTER
    ;
  
##### Literal
# 
# Literals only support a single value in a condition
  literal
    : DATE
    | DATETIME
    | TIME
    | BOOLEAN
    | NULL
    ;

##### Range List
# 
# Functions, and literals that can be used in a range                                                       
  rangeable
    : INTEGER
    | DECIMAL
    | DATE
    | DATETIME
    | TIME
    | function
    ;

#STOP_MARKDOWN

    
end

---- header
# $Id$
---- inner
  include Sparkql::ParserTools
  include Sparkql::ParserCompatibility
  
---- footer

# END PARSER