lib/sql_tree/node/expression.rb in sql_tree-0.1.0 vs lib/sql_tree/node/expression.rb in sql_tree-0.1.1

- old
+ new

@@ -1,216 +1,484 @@ module SQLTree::Node - # Base class for all SQL expressions. + # Abstract base class for all SQL expressions. # - # This is an asbtract class and should not be used directly. Use - # one of the subclasses instead. + # To parse a string as an SQL expression, use: + # + # SQLTree::Node::Expression["(3 + 2 = 10 / 2) AND MD5('$ecret') = password"] + # + # This is an abtract class: its parse method will never return an + # <tt>SQLTree::Node::Expression</tt> instance, but always an instance + # of one of its subclasses. The concrete expression classes are defined in the + # SQLTree::Node::Expression namespace. class Expression < Base + # Parses an SQL expression from a stream of tokens. + # + # This method will start trying to parse the token stream as a + # <tt>SQLTree::Node::Expression::BinaryOperator</tt>, which will in turn try + # to parse it as other kinds of expressions if a binary expression is not appropriate. + # + # <tt>tokens</tt>:: The token stream to parse from, which is an instance + # of <tt> SQLTree::Parser</tt>. def self.parse(tokens) - SQLTree::Node::LogicalExpression.parse(tokens) + SQLTree::Node::Expression::BinaryOperator.parse(tokens) end # Parses a single, atomic SQL expression. This can be either: # * a full expression (or set of expressions) within parentheses. # * a logical NOT expression # * an SQL variable # * an SQL function # * a literal SQL value (numeric or string) + # + # <tt>tokens</tt>:: The token stream to parse from, which is an instance + # of <tt> SQLTree::Parser</tt>. def self.parse_atomic(tokens) - case tokens.peek - when SQLTree::Token::LPAREN + if SQLTree::Token::LPAREN === tokens.peek tokens.consume(SQLTree::Token::LPAREN) - expr = SQLTree::Node::Expression.parse(tokens) + expr = self.parse(tokens) tokens.consume(SQLTree::Token::RPAREN) expr - when SQLTree::Token::NOT - SQLTree::Node::LogicalNotExpression.parse(tokens) - when SQLTree::Token::Variable - if tokens.peek(2) == SQLTree::Token::LPAREN - SQLTree::Node::FunctionExpression.parse(tokens) + elsif tokens.peek.prefix_operator? + PrefixOperator.parse(tokens) + elsif tokens.peek.variable? + if SQLTree::Token::LPAREN === tokens.peek(2) + FunctionCall.parse(tokens) + elsif SQLTree::Token::DOT === tokens.peek(2) + Field.parse(tokens) else - SQLTree::Node::Variable.parse(tokens) + Variable.parse(tokens) end else - SQLTree::Node::Value.parse(tokens) + Value.parse(tokens) end end - end - - class LogicalNotExpression < Expression - - attr_accessor :expression - - def initialize(expression) - @expression = expression + + # A prefix operator expression parses a construct that consists of an + # operator and an expression. Currently, the only prefix operator that + # is supported is the NOT keyword. + # + # This node has two child nodes: <tt>operator</tt> and <tt>rhs</tt>. + class PrefixOperator < SQLTree::Node::Expression + + # The list of operator tokens that can be used as prefix operator. + TOKENS = [SQLTree::Token::NOT] + + # The SQL operator as <tt>String</tt> that was used for this expression. + leaf :operator + + # The right hand side of the prefix expression, i.e. the <tt>SQLTree::Node::Expression</tt> + # instance that appeared after the operator. + child :rhs + + # Generates an SQL fragment for this prefix operator expression. + def to_sql(options = {}) + "#{operator} #{rhs.to_sql(options)}" + end + + # Parses the operator from the token stream. + # <tt>tokens</tt>:: the token stream to parse from. + def self.parse_operator(tokens) + tokens.next.literal.upcase + end + + # Parses a prefix operator expression, by first parsing the operator + # and then parsing the right hand side expression. + # <tt>tokens</tt>:: the token stream to parse from, which is an instance + # of <tt> SQLTree::Parser</tt>. + def self.parse(tokens) + if tokens.peek.prefix_operator? + node = self.new + node.operator = parse_operator(tokens) + node.rhs = SQLTree::Node::Expression.parse(tokens) + return node + else + raise UnexpectedTokenException.new(tokens.peek) + end + end end - - def to_sql - "NOT(#{@expression.to_sql})" + + # A postfix operator expression is a construct in which the operator appears + # after a (left-hand side) expression. + # + # This operator has two child nodes: <tt>operator</tt> and <tt>lhs</tt>. + # + # Currently, SQLTreedoes not support any postfix operator. + class PostfixOperator < SQLTree::Node::Expression + + # The left-hand side <tt>SQLTree::Node::Expression</tt> instance that was parsed + # before the postfix operator. + child :lhs + + # The postfoix operator for this expression as <tt>String</tt>. + leaf :operator + + # Generates an SQL fragment for this postfix operator expression. + def to_sql(options = {}) + "#{lhs.to_sql(options)} #{operator}" + end + + # Parses a postfix operator expression. This method is not yet implemented. + # <tt>tokens</tt>:: The token stream to parse from, which is an instance + # of <tt> SQLTree::Parser</tt>. + def self.parse(tokens) + raise "Not yet implemented" + end end + + # A binary operator expression consists of a left-hand side expression (lhs), the + # binary operator itself and a right-hand side expression (rhs). It therefore has + # three children: <tt>operator</tt>, <tt>lhs</tt> and <tt>rhs</tt>. + # + # When multiple binary operators appear in an expression, they can be grouped + # using parenthesis (e.g. "(1 + 3) / 2", or "1 + (3 / 2)" ). If the parentheses + # are absent, the grouping is determined using the precedence of the operator. + class BinaryOperator < SQLTree::Node::Expression + + # The token precedence list. Tokens that occur first in this list have + # the lowest precedence, the last tokens have the highest. This impacts + # parsing when no parentheses are used to indicate how operators should + # be grouped. + # + # The token precedence list is taken from the SQLite3 documentation: + # http://www.sqlite.org/lang_expr.html + TOKEN_PRECEDENCE = [ + [SQLTree::Token::OR], + [SQLTree::Token::AND], + [SQLTree::Token::EQ, SQLTree::Token::NE, SQLTree::Token::IN, SQLTree::Token::LIKE, SQLTree::Token::ILIKE, SQLTree::Token::IS], + [SQLTree::Token::LT, SQLTree::Token::LTE, SQLTree::Token::GT, SQLTree::Token::GTE], + [SQLTree::Token::LSHIFT, SQLTree::Token::RSHIFT, SQLTree::Token::BINARY_AND, SQLTree::Token::BINARY_OR], + [SQLTree::Token::PLUS, SQLTree::Token::MINUS], + [SQLTree::Token::MULTIPLY, SQLTree::Token::DIVIDE, SQLTree::Token::MODULO], + [SQLTree::Token::CONCAT], + ] + + # A list of binary operator tokens, taken from the operator precedence list. + TOKENS = TOKEN_PRECEDENCE.flatten + + # The operator to use for this binary operator expression. + leaf :operator + + # The left hand side <tt>SQLTree::Node::Expression</tt> instance for this operator. + child :lhs + + # The rights hand side <tt>SQLTree::Node::Expression</tt> instance for this operator. + child :rhs + + # Generates an SQL fragment for this exression. + def to_sql(options = {}) + "(#{lhs.to_sql(options)} #{operator} #{rhs.to_sql(options)})" + end - def ==(other) - other.kind_of?(self.class) && other.expression == self.expression - end - - def self.parse(tokens) - tokens.consume(SQLTree::Token::NOT) - self.new(SQLTree::Node::Expression.parse(tokens)) - end - end - - class LogicalExpression < Expression - attr_accessor :operator, :expressions - - def initialize(operator, expressions) - @expressions = expressions - @operator = operator.to_s.downcase.to_sym - end - - def to_sql - "(" + @expressions.map { |e| e.to_sql }.join(" #{@operator.to_s.upcase} ") + ")" - end - - def ==(other) - self.operator == other.operator && self.expressions == other.expressions - end - - def self.parse(tokens) - expr = ComparisonExpression.parse(tokens) - while [SQLTree::Token::AND, SQLTree::Token::OR].include?(tokens.peek) - expr = SQLTree::Node::LogicalExpression.new(tokens.next.literal, [expr, ComparisonExpression.parse(tokens)]) + # Parses the operator for this expression. + # + # Some operators can be negated using the NOT operator (e.g. <tt>IS NOT</tt>, + # <tt>NOT LIKE</tt>). This is handled in this function as well. + # + # <tt>tokens</tt>:: The token stream to parse from. + def self.parse_operator(tokens) + if tokens.peek.optional_not_suffix? && tokens.peek(2).not? + return "#{tokens.next.literal.upcase} #{tokens.next.literal.upcase}" + elsif tokens.peek.not? && tokens.peek(2).optional_not_prefix? + return "#{tokens.next.literal.upcase} #{tokens.next.literal.upcase}" + else + return tokens.next.literal.upcase + end end - return expr - end - end - class ComparisonExpression < Expression - attr_accessor :lhs, :rhs, :operator - - def initialize(operator, lhs, rhs) - @lhs = lhs - @rhs = rhs - @operator = operator - end - - def to_sql - "(#{@lhs.to_sql} #{@operator} #{@rhs.to_sql})" - end - - def self.parse_comparison_operator(tokens) - operator_token = tokens.next - if SQLTree::Token::IS === operator_token - if SQLTree::Token::NOT === tokens.peek - tokens.consume(SQLTree::Token::NOT) - 'IS NOT' + # Parses the right hand side expression of the operator. + # + # Usually, this will parse another BinaryOperator expression with a higher + # precedence, but for some operators (+IN+ and +IS+), the default behavior + # is overriden to implement exceptions. + # + # <tt>tokens</tt>:: The token stream to parse from, which is an instance + # of <tt> SQLTree::Parser</tt>. + # <tt>precedence</tt>:: The current precedence level. By default, this method + # will try to parse a BinaryOperator expression with a + # one higher precedence level than the current level. + # <tt>operator</tt>:: The operator that was parsed. + def self.parse_rhs(tokens, precedence, operator = nil) + if ['IN', 'NOT IN'].include?(operator) + return List.parse(tokens) + elsif ['IS', 'IS NOT'].include?(operator) + tokens.consume(SQLTree::Token::NULL) + return SQLTree::Node::Expression::Value.new(nil) else - 'IS' + return parse(tokens, precedence + 1) end - elsif SQLTree::Token::NOT === operator_token - case tokens.peek - when SQLTree::Token::LIKE, SQLTree::Token::ILIKE, SQLTree::Token::BETWEEN, SQLTree::Token::IN - "NOT #{tokens.next.literal.upcase}" + end + + # Parses the binary operator by first parsing the left hand side, then the operator + # itself, and finally the right hand side. + # + # BinaryOperator -> Expression <operator> Expression + # + # This method will try to parse the lowest precedence operator first, and gradually + # try to parse operators with a higher precedence level. The left and right hand side + # will both be parsed with a higher precedence level. This ensures that the resulting + # expression is grouped correctly. + # + # If no binary operator is found of any precedence level, this method will back on + # pasring an atomic expression, see {SQLTree::Node::Expression.parse_atomic}. + # + # @param [SQLTree::Parser] tokens The token stream to parse from. + # @param [Integer] precedence The current precedence level. Starts with the lowest + # precedence level (0) by default. + # @return [SQLTree::Node::Expression] The parsed expression. This may not be + # a binary operator expression, as this method falls back on parsing other + # expresison types if no binary operator is found. + # @raise [SQLTree::Parser::UnexpectedToken] if an unexpected token is + # encountered during parsing. + def self.parse(tokens, precedence = 0) + if precedence >= TOKEN_PRECEDENCE.length + return SQLTree::Node::Expression.parse_atomic(tokens) else - raise SQLTree::Parser::UnexpectedToken.new(tokens.peek) + expr = parse(tokens, precedence + 1) + while TOKEN_PRECEDENCE[precedence].include?(tokens.peek.class) || (tokens.peek && tokens.peek.not?) + operator = parse_operator(tokens) + rhs = parse_rhs(tokens, precedence, operator) + expr = self.new(:operator => operator, :lhs => expr, :rhs => rhs) + end + return expr end - else - operator_token.literal end end + + # Parses a comma-separated list of expressions, which is used after the IN operator. + # The attribute <tt>items</tt> contains the array of child nodes, all instances of + # {SQLTree::Node::Expression}. + class List < SQLTree::Node::Expression + + # Include the enumerable module to simplify handling the items in this list. + include Enumerable + + # The items that appear in the list, i.e. an array of {SQLTree::Node::Expression} + # instances. + child :items - def self.parse(tokens) - lhs = SQLTree::Node::ArithmeticExpression.parse(tokens) - while SQLTree::Token::COMPARISON_OPERATORS.include?(tokens.peek) - comparison_operator = parse_comparison_operator(tokens) - rhs = ['IN', 'NOT IN'].include?(comparison_operator) ? - SQLTree::Node::SetExpression.parse(tokens) : - SQLTree::Node::ArithmeticExpression.parse(tokens) + def initialize(*items) + if items.length == 1 && items.first.kind_of?(Array) + @items = items.first + elsif items.length == 1 && items.first.kind_of?(Hash) + super(items.first) + else + @items + end + end - lhs = self.new(comparison_operator, lhs, rhs) + # Generates an SQL fragment for this list. + def to_sql(options = {}) + "(#{items.map {|i| i.to_sql(options)}.join(', ')})" end - return lhs - end - end - class SetExpression < Expression - attr_accessor :items + # Returns true if this list has no items. + def empty? + items.empty? + end + + # Makes sure the enumerable module works over the items in the list. + def each(&block) # :nodoc: + items.each(&block) + end - def initialize(items = []) - @items = items + # Parses a list of expresison by parsing expressions as long as it sees + # a comma that indicates the presence of a next expression. + # + # List -> LPAREN (Expression (COMMA Expression)*)? RPAREN + # + # @param [SQLTree::Parser] tokens The token stream to parse from. + # @return [SQLTree::Node::Expression::List] The parsed list instance. + # @raise [SQLTree::Parser::UnexpectedToken] if an unexpected token is + # encountered during parsing. + def self.parse(tokens) + tokens.consume(SQLTree::Token::LPAREN) + items = [] + unless SQLTree::Token::RPAREN === tokens.peek + items = self.parse_list(tokens, SQLTree::Node::Expression) + end + tokens.consume(SQLTree::Token::RPAREN) + self.new(items) + end end + + # Represents a SQL function call expression. This node has two child nodes: + # <tt>function</tt> and <tt>argument_list</tt>. + class FunctionCall < SQLTree::Node::Expression - def to_sql - "(#{items.map {|i| i.to_sql}.join(', ')})" - end + # The name of the function that is called as <tt>String</tt>. + leaf :function + + # The argument list as {SQLTree::Node::Expression::List} instance. + child :arguments - def self.parse(tokens) - tokens.consume(SQLTree::Token::LPAREN) - items = [SQLTree::Node::Expression.parse(tokens)] - while tokens.peek == SQLTree::Token::COMMA - tokens.consume(SQLTree::Token::COMMA) - items << SQLTree::Node::Expression.parse(tokens) + # Generates an SQL fragment for this function call. + def to_sql(options = {}) + "#{function}(" + arguments.map { |e| e.to_sql(options) }.join(', ') + ")" end - tokens.consume(SQLTree::Token::RPAREN) - - self.new(items) + + # Parses an SQL function call. + # + # FunctionCall -> <identifier> List + # + # @param [SQLTree::Parser] tokens The token stream to parse from. + # @return [SQLTree::Node::Expression::FunctionCall] The parsed function call instance. + # @raise [SQLTree::Parser::UnexpectedToken] if an unexpected token is + # encountered during parsing. + def self.parse(tokens) + function_call = self.new(:function => tokens.next.literal, :arguments => []) + tokens.consume(SQLTree::Token::LPAREN) + function_call.arguments = self.parse_list(tokens) unless SQLTree::Token::RPAREN === tokens.peek + tokens.consume(SQLTree::Token::RPAREN) + return function_call + end end - end + + # Represents alitreal value in an SQL expression. This node is a leaf node + # and thus has no child nodes. + # + # A value can either be: + # * the SQL <tt>NULL</tt> keyword, which is represented by <tt>nil</tt>. + # * an SQL string, which is represented by a <tt>String</tt> instance. + # * an SQL date or time value, which can be represented as a <tt>Date</tt>, + # <tt>Time</tt> or <tt>DateTime</tt> instance. + # * an integer or decimal value, which is represented by an appropriate + # <tt>Numeric</tt> instance. + class Value < SQLTree::Node::Expression + + # The actual value this node represents. + leaf :value - class FunctionExpression < Expression - attr_accessor :function, :arguments + def initialize(value) # :nodoc: + @value = value + end + + # Generates an SQL representation for this value. + # + # This method supports nil, string, numeric, date and time values. + # + # @return [String] A correctly quoted value that can be used safely + # within an SQL query + def to_sql(options = {}) + case value + when nil then 'NULL' + when String then quote_str(@value) + when Numeric then @value.to_s + when Date then @value.strftime("'%Y-%m-%d'") + when DateTime, Time then @value.strftime("'%Y-%m-%d %H:%M:%S'") + else raise "Don't know how te represent this value in SQL!" + end + end - def initialize(function, arguments = []) - @function = function - @arguments = arguments + # Parses a literal value. + # + # Value -> (NULL | <string> | <number>) + # + # @param [SQLTree::Parser] tokens The token stream to parse from. + # @return [SQLTree::Node::Expression::Value] The parsed value instance. + # @raise [SQLTree::Parser::UnexpectedToken] if an unexpected token is + # encountered during parsing. + def self.parse(tokens) + case tokens.next + when SQLTree::Token::String, SQLTree::Token::Number + SQLTree::Node::Expression::Value.new(tokens.current.literal) + when SQLTree::Token::NULL + SQLTree::Node::Expression::Value.new(nil) + else + raise SQLTree::Parser::UnexpectedToken.new(tokens.current, :literal) + end + end end + + # Represents a variable within an SQL expression. This is a leaf node, so it + # does not have any child nodes. A variale can point to a field of a table or + # to another expression that was declared elsewhere. + class Variable < SQLTree::Node::Expression + + # The name of the variable as <tt>String</tt>. + leaf :name - def to_sql - "#{@function}(" + @arguments.map { |e| e.to_sql }.join(', ') + ")" - end + def initialize(name) # :nodoc: + @name = name + end - def self.parse(tokens) - expr = self.new(tokens.next.literal) - tokens.consume(SQLTree::Token::LPAREN) - until tokens.peek == SQLTree::Token::RPAREN - expr.arguments << SQLTree::Node::Expression.parse(tokens) - tokens.consume(SQLTree::Token::COMMA) if tokens.peek == SQLTree::Token::COMMA + # Generates a quoted reference to the variable. + # + # @return [String] A correctly quoted variable that can be safely + # used in SQL queries + def to_sql(options = {}) + quote_var(@name) end - tokens.consume(SQLTree::Token::RPAREN) - return expr - end - end - class ArithmeticExpression < Expression - attr_accessor :lhs, :rhs, :operator - - def initialize(operator, lhs, rhs) - @lhs = lhs - @rhs = rhs - @operator = operator + # Parses an SQL variable. + # + # Variable -> <identifier> + # + # @param [SQLTree::Parser] tokens The token stream to parse from. + # @return [SQLTree::Node::Expression::Variable] The parsed variable instance. + # @raise [SQLTree::Parser::UnexpectedToken] if an unexpected token is + # encountered during parsing. + def self.parse(tokens) + if SQLTree::Token::Identifier === tokens.peek + self.new(tokens.next.literal) + else + raise SQLTree::Parser::UnexpectedToken.new(tokens.peek, :variable) + end + end end + + # Represents a reference to a field of a table in an SQL expression. + # This is a leaf node, which means that it does not have any child nodes. + class Field < Variable - def to_sql - "(#{@lhs.to_sql} #{@operator} #{@rhs.to_sql})" - end + # The table in which the field resides. This can be +nil+, in which case + # the table the field belongs to is inferred from the rest of the query. + leaf :table + + # The name of the field. + leaf :name - def self.parse(tokens) - self.parse_primary(tokens) - end + alias :field :name + alias :field= :name= - def self.parse_primary(tokens) - expr = self.parse_secondary(tokens) - while [SQLTree::Token::PLUS, SQLTree::Token::MINUS].include?(tokens.peek) - expr = self.new(tokens.next.literal, expr, self.parse_secondary(tokens)) + # Initializes a new Field + def initialize(name, table = nil) + @name = name + @table = table end - return expr - end - def self.parse_secondary(tokens) - expr = Expression.parse_atomic(tokens) - while [SQLTree::Token::PLUS, SQLTree::Token::MINUS].include?(tokens.peek) - expr = self.new(tokens.next.literal, expr, SQLTree::Node::Expression.parse_atomic(tokens)) + # Generates a correctly quoted reference to the field, which can + # be incorporated safely into an SQL query. + def to_sql(options = {}) + @table.nil? ? quote_var(@name) : quote_var(@table) + '.' + quote_var(@name) end - return expr + + # Parses a field, either with or without the table reference. + # + # Field -> (<identifier> DOT)? <identifier> + # + # @param [SQLTree::Parser] tokens The token stream to parse from. + # @return [SQLTree::Node::Expression::Field] The parsed field instance. + # @raise [SQLTree::Parser::UnexpectedToken] if an unexpected token is + # encountered during parsing. + def self.parse(tokens) + if SQLTree::Token::Identifier === tokens.peek + field_or_table = tokens.next.literal + else + raise SQLTree::Parser::UnexpectedToken.new(tokens.next) + end + + if SQLTree::Token::DOT === tokens.peek + tokens.consume(SQLTree::Token::DOT) + if SQLTree::Token::Identifier === tokens.peek + self.new(tokens.next.literal, field_or_table) + else + raise SQLTree::Parser::UnexpectedToken.new(tokens.next) + end + else + self.new(field_or_table) + end + end end end end