require 'time'
require 'date'
module Sequel
# A Dataset represents a view of a the data in a database, constrained by
# specific parameters such as filtering conditions, order, etc. Datasets
# can be used to create, retrieve, update and delete records.
#
# Query results are always retrieved on demand, so a dataset can be kept
# around and reused indefinitely:
# my_posts = DB[:posts].filter(:author => 'david') # no records are retrieved
# p my_posts.all # records are now retrieved
# ...
# p my_posts.all # records are retrieved again
#
# In order to provide this functionality, dataset methods such as where,
# select, order, etc. return modified copies of the dataset, so you can
# use different datasets to access data:
# posts = DB[:posts]
# davids_posts = posts.filter(:author => 'david')
# old_posts = posts.filter('stamp < ?', 1.week.ago)
#
# Datasets are Enumerable objects, so they can be manipulated using any
# of the Enumerable methods, such as map, inject, etc.
class Dataset
include Enumerable
attr_reader :db, :opts
attr_accessor :model_class
# Constructs a new instance of a dataset with a database instance, initial
# options and an optional record class. Datasets are usually constructed by
# invoking Database methods:
# DB[:posts]
# Or:
# DB.dataset # the returned dataset is blank
#
# Sequel::Dataset is an abstract class that is not useful by itself. Each
# database adaptor should provide a descendant class of Sequel::Dataset.
def initialize(db, opts = nil, model_class = nil)
@db = db
@opts = opts || {}
@model_class = model_class
end
# Returns a new instance of the dataset with with the give options merged.
def dup_merge(opts)
self.class.new(@db, @opts.merge(opts), @model_class)
end
# Returns a dataset that fetches records as hashes (instead of model
# objects). If no record class is defined for the dataset, self is
# returned.
def naked
@model_class ? self.class.new(@db, opts || @opts.dup) : self
end
# Returns a valid SQL fieldname as a string. Field names specified as
# symbols can include double underscores to denote a dot separator, e.g.
# :posts__id will be converted into posts.id.
def field_name(field)
field.is_a?(Symbol) ? field.to_field_name : field
end
QUALIFIED_REGEXP = /(.*)\.(.*)/.freeze
# Returns a qualified field name (including a table name) if the field
# name isn't already qualified.
def qualified_field_name(field, table)
fn = field_name(field)
fn =~ QUALIFIED_REGEXP ? fn : "#{table}.#{fn}"
end
WILDCARD = '*'.freeze
COMMA_SEPARATOR = ", ".freeze
# Converts an array of field names into a comma seperated string of
# field names. If the array is empty, a wildcard (*) is returned.
def field_list(fields)
if fields.empty?
WILDCARD
else
fields.map {|i| field_name(i)}.join(COMMA_SEPARATOR)
end
end
# Converts an array of sources names into into a comma separated list.
def source_list(source)
if source.nil? || source.empty?
raise SequelError, 'No source specified for query'
end
source.map {|i| i.is_a?(Dataset) ? i.to_table_reference : i}.
join(COMMA_SEPARATOR)
end
NULL = "NULL".freeze
TIMESTAMP_FORMAT = "TIMESTAMP '%Y-%m-%d %H:%M:%S'".freeze
DATE_FORMAT = "DATE '%Y-%m-%d'".freeze
# Returns a literal representation of a value to be used as part
# of an SQL expression. The stock implementation supports literalization
# of String (with proper escaping to prevent SQL injections), numbers,
# Symbol (as field references), Array (as a list of literalized values),
# Time (as an SQL TIMESTAMP), Date (as an SQL DATE), Dataset (as a
# subquery) and nil (AS NULL).
#
# dataset.literal("abc'def") #=> "'abc''def'"
# dataset.literal(:items__id) #=> "items.id"
# dataset.literal([1, 2, 3]) => "(1, 2, 3)"
# dataset.literal(DB[:items]) => "(SELECT * FROM items)"
#
# If an unsupported object is given, an exception is raised.
def literal(v)
case v
when String: "'#{v.gsub(/'/, "''")}'"
when Integer, Float: v.to_s
when NilClass: NULL
when Symbol: v.to_field_name
when Array: v.empty? ? NULL : v.map {|i| literal(i)}.join(COMMA_SEPARATOR)
when Time: v.strftime(TIMESTAMP_FORMAT)
when Date: v.strftime(DATE_FORMAT)
when Dataset: "(#{v.sql})"
else
raise SequelError, "can't express #{v.inspect} as a SQL literal"
end
end
AND_SEPARATOR = " AND ".freeze
# Formats an equality expression involving a left value and a right value.
# Equality expressions differ according to the class of the right value.
# The stock implementation supports Range (inclusive and exclusive), Array
# (as a list of values to compare against), Dataset (as a subquery to
# compare against), or a regular value.
#
# dataset.format_eq_expression('id', 1..20) #=>
# "(id >= 1 AND id <= 20)"
# dataset.format_eq_expression('id', [3,6,10]) #=>
# "(id IN (3, 6, 10))"
# dataset.format_eq_expression('id', DB[:items].select(:id)) #=>
# "(id IN (SELECT id FROM items))"
# dataset.format_eq_expression('id', nil) #=>
# "(id IS NULL)"
# dataset.format_eq_expression('id', 3) #=>
# "(id = 3)"
def format_eq_expression(left, right)
case right
when Range:
right.exclude_end? ? \
"(#{left} >= #{right.begin} AND #{left} < #{right.end})" : \
"(#{left} >= #{right.begin} AND #{left} <= #{right.end})"
when Array:
"(#{left} IN (#{literal(right)}))"
when Dataset:
"(#{left} IN (#{right.sql}))"
when NilClass:
"(#{left} IS NULL)"
else
"(#{left} = #{literal(right)})"
end
end
# Formats an expression comprising a left value, a binary operator and a
# right value. The supported operators are :eql (=), :not (!=), :lt (<),
# :lte (<=), :gt (>), :gte (>=) and :like (LIKE operator). Examples:
#
# dataset.format_expression('price', :gte, 100) #=> "(price >= 100)"
# dataset.format_expression('id', :not, 30) #=> "NOT (id = 30)"
# dataset.format_expression('name', :like, 'abc%') #=>
# "(name LIKE 'abc%')"
#
# If an unsupported operator is given, an exception is raised.
def format_expression(left, op, right)
left = field_name(left)
case op
when :eql:
format_eq_expression(left, right)
when :not:
"NOT #{format_eq_expression(left, right)}"
when :lt:
"(#{left} < #{literal(right)})"
when :lte:
"(#{left} <= #{literal(right)})"
when :gt:
"(#{left} > #{literal(right)})"
when :gte:
"(#{left} >= #{literal(right)})"
when :like:
"(#{left} LIKE #{literal(right)})"
else
raise SequelError, "Invalid operator specified: #{op}"
end
end
QUESTION_MARK = '?'.freeze
# Formats a where clause. If parenthesize is true, then the whole
# generated clause will be enclosed in a set of parentheses.
def expression_list(where, parenthesize = false)
case where
when Hash:
parenthesize = false if where.size == 1
fmt = where.map {|i| format_expression(i[0], :eql, i[1])}.
join(AND_SEPARATOR)
when Array:
fmt = where.shift.gsub(QUESTION_MARK) {literal(where.shift)}
when Proc:
fmt = where.to_expressions.map {|e| format_expression(e.left, e.op, e.right)}.
join(AND_SEPARATOR)
else
fmt = where
end
parenthesize ? "(#{fmt})" : fmt
end
# Formats a join condition.
def join_cond_list(cond, join_table)
cond.map do |kv|
l = qualified_field_name(kv[0], join_table)
r = qualified_field_name(kv[1], @opts[:from])
"(#{l} = #{r})"
end.join(AND_SEPARATOR)
end
# Returns a copy of the dataset with the source changed.
def from(*source)
dup_merge(:from => source)
end
# Returns a copy of the dataset with the selected fields changed.
def select(*fields)
dup_merge(:select => fields)
end
# Returns a copy of the dataset with the distinct option.
def uniq
dup_merge(:distinct => true)
end
alias distinct uniq
# Returns a copy of the dataset with the order changed.
def order(*order)
dup_merge(:order => order)
end
# Returns a copy of the dataset with the order reversed. If no order is
# given, the existing order is inverted.
def reverse_order(*order)
order(invert_order(order.empty? ? @opts[:order] : order))
end
DESC_ORDER_REGEXP = /(.*)\sDESC/.freeze
# Inverts the given order by breaking it into a list of field references
# and inverting them.
#
# dataset.invert_order('id DESC') #=> "id"
# dataset.invert_order('category, price DESC') #=>
# "category DESC, price"
def invert_order(order)
new_order = []
order.each do |f|
f.to_s.split(',').map do |p|
p.strip!
new_order << (p =~ DESC_ORDER_REGEXP ? $1 : p.to_sym.DESC)
end
end
new_order
end
# Returns a copy of the dataset with the results grouped by the value of
# the given fields
def group(*fields)
dup_merge(:group => fields)
end
# Returns a copy of the dataset with the given conditions imposed upon it.
# If the query has been grouped, then the conditions are imposed in the
# HAVING clause. If not, then they are imposed in the WHERE clause. Filter
# accepts a Hash (formated into a list of equality expressions), an Array
# (formatted ala ActiveRecord conditions), a String (taken literally), or
# a block that is converted into expressions.
#
# dataset.filter(:id => 3).sql #=>
# "SELECT * FROM items WHERE (id = 3)"
# dataset.filter('price < ?', 100).sql #=>
# "SELECT * FROM items WHERE price < 100"
# dataset.filter('price < 100').sql #=>
# "SELECT * FROM items WHERE price < 100"
# dataset.filter {price < 100}.sql #=>
# "SELECT * FROM items WHERE (price < 100)"
#
# Multiple filter calls can be chained for scoping:
#
# software = dataset.filter(:category => 'software')
# software.filter {price < 100}.sql #=>
# "SELECT * FROM items WHERE (category = 'software') AND (price < 100)"
def filter(*cond, &block)
clause = (@opts[:group] ? :having : :where)
cond = cond.first if cond.size == 1
parenthesize = !(cond.is_a?(Hash) || cond.is_a?(Array))
if @opts[clause]
l = expression_list(@opts[clause])
r = expression_list(block || cond, parenthesize)
dup_merge(clause => "#{l} AND #{r}")
else
dup_merge(clause => expression_list(block || cond))
end
end
# Performs the inverse of Dataset#filter.
#
# dataset.exclude(:category => 'software').sql #=>
# "SELECT * FROM items WHERE NOT (category = 'software')"
def exclude(*cond, &block)
clause = (@opts[:group] ? :having : :where)
cond = cond.first if cond.size == 1
parenthesize = !(cond.is_a?(Hash) || cond.is_a?(Array))
if @opts[clause]
l = expression_list(@opts[clause])
r = expression_list(block || cond, parenthesize)
cond = "#{l} AND NOT #{r}"
else
cond = "NOT #{expression_list(block || cond, true)}"
end
dup_merge(clause => cond)
end
# Returns a copy of the dataset with the where conditions changed. Raises
# if the dataset has been grouped. See also #filter.
def where(*cond, &block)
if @opts[:group]
raise SequelError, "Can't specify a WHERE clause once the dataset has been grouped"
else
filter(*cond, &block)
end
end
# Returns a copy of the dataset with the having conditions changed. Raises
# if the dataset has not been grouped. See also #filter
def having(*cond, &block)
unless @opts[:group]
raise SequelError, "Can only specify a HAVING clause on a grouped dataset"
else
filter(*cond, &block)
end
end
def union(dataset, all = false)
dup_merge(:union => dataset, :union_all => all)
end
def intersect(dataset, all = false)
dup_merge(:intersect => dataset, :intersect_all => all)
end
def except(dataset, all = false)
dup_merge(:except => dataset, :except_all => all)
end
LEFT_OUTER_JOIN = 'LEFT OUTER JOIN'.freeze
INNER_JOIN = 'INNER JOIN'.freeze
RIGHT_OUTER_JOIN = 'RIGHT OUTER JOIN'.freeze
FULL_OUTER_JOIN = 'FULL OUTER JOIN'.freeze
# Returns a joined dataset.
def join(table, expr)
expr = {expr => :id} unless expr.is_a?(Hash)
dup_merge(:join_type => LEFT_OUTER_JOIN, :join_table => table,
:join_cond => expr)
end
alias all to_a
# Maps field values for each record in the dataset (if a field name is
# given), or performs the stock mapping functionality of Enumerable.
def map(field_name = nil, &block)
if field_name
super() {|r| r[field_name]}
else
super(&block)
end
end
# Returns a hash with one column used as key and another used as value.
def hash_column(key_column, value_column)
inject({}) do |m, r|
m[r[key_column]] = r[value_column]
m
end
end
# Inserts the given values into the table.
def <<(values)
insert(values)
end
# Inserts multiple values. If a block is given it is invoked for each
# item in the given array before inserting it.
def insert_multiple(array, &block)
if block
array.each {|i| insert(block[i])}
else
array.each {|i| insert(i)}
end
end
EMPTY = ''.freeze
SPACE = ' '.freeze
# Formats a SELECT statement using the given options and the dataset
# options.
def select_sql(opts = nil)
opts = opts ? @opts.merge(opts) : @opts
fields = opts[:select]
select_fields = fields ? field_list(fields) : WILDCARD
select_source = source_list(opts[:from])
sql = opts[:distinct] ? \
"SELECT DISTINCT #{select_fields} FROM #{select_source}" : \
"SELECT #{select_fields} FROM #{select_source}"
if join_type = opts[:join_type]
join_table = opts[:join_table]
join_cond = join_cond_list(opts[:join_cond], join_table)
sql << " #{join_type} #{join_table} ON #{join_cond}"
end
if where = opts[:where]
sql << " WHERE #{where}"
end
if group = opts[:group]
sql << " GROUP BY #{field_list(group)}"
end
if order = opts[:order]
sql << " ORDER BY #{field_list(order)}"
end
if having = opts[:having]
sql << " HAVING #{having}"
end
if limit = opts[:limit]
sql << " LIMIT #{limit}"
if offset = opts[:offset]
sql << " OFFSET #{offset}"
end
end
if union = opts[:union]
sql << (opts[:union_all] ? \
" UNION ALL #{union.sql}" : " UNION #{union.sql}")
elsif intersect = opts[:intersect]
sql << (opts[:intersect_all] ? \
" INTERSECT ALL #{intersect.sql}" : " INTERSECT #{intersect.sql}")
elsif except = opts[:except]
sql << (opts[:except_all] ? \
" EXCEPT ALL #{except.sql}" : " EXCEPT #{except.sql}")
end
sql
end
alias sql select_sql
# Formats an INSERT statement using the given values. If a hash is given,
# the resulting statement includes field names. If no values are given,
# the resulting statement includes a DEFAULT VALUES clause.
#
# dataset.insert_sql() #=> 'INSERT INTO items DEFAULT VALUES'
# dataset.insert_sql(1,2,3) #=> 'INSERT INTO items VALUES (1, 2, 3)'
# dataset.insert_sql(:a => 1, :b => 2) #=>
# 'INSERT INTO items (a, b) VALUES (1, 2)'
def insert_sql(*values)
if values.empty?
"INSERT INTO #{@opts[:from]} DEFAULT VALUES"
elsif (values.size == 1) && values[0].is_a?(Hash)
field_list = []
value_list = []
values[0].each do |k, v|
field_list << k
value_list << literal(v)
end
fl = field_list.join(COMMA_SEPARATOR)
vl = value_list.join(COMMA_SEPARATOR)
"INSERT INTO #{@opts[:from]} (#{fl}) VALUES (#{vl})"
else
"INSERT INTO #{@opts[:from]} VALUES (#{literal(values)})"
end
end
# Formats an UPDATE statement using the given values.
#
# dataset.update_sql(:price => 100, :category => 'software') #=>
# "UPDATE items SET price = 100, category = 'software'"
def update_sql(values, opts = nil)
opts = opts ? @opts.merge(opts) : @opts
if opts[:group]
raise SequelError, "Can't update a grouped dataset"
elsif (opts[:from].size > 1) or opts[:join_type]
raise SequelError, "Can't update a joined dataset"
end
set_list = values.map {|k, v| "#{k} = #{literal(v)}"}.
join(COMMA_SEPARATOR)
sql = "UPDATE #{@opts[:from]} SET #{set_list}"
if where = opts[:where]
sql << " WHERE #{where}"
end
sql
end
# Formats a DELETE statement using the given options and dataset options.
#
# dataset.filter {price >= 100}.delete_sql #=>
# "DELETE FROM items WHERE (price >= 100)"
def delete_sql(opts = nil)
opts = opts ? @opts.merge(opts) : @opts
if opts[:group]
raise SequelError, "Can't delete from a grouped dataset"
elsif opts[:from].is_a?(Array) && opts[:from].size > 1
raise SequelError, "Can't delete from a joined dataset"
end
sql = "DELETE FROM #{opts[:from]}"
if where = opts[:where]
sql << " WHERE #{where}"
end
sql
end
# Returns the first record in the dataset.
def single_record(opts = nil)
each(opts) {|r| return r}
nil
end
# Returns the first value of the first reecord in the dataset.
def single_value(opts = nil)
naked.each(opts) {|r| return r.values.first}
end
SELECT_COUNT = {:select => ["COUNT(*)"], :order => nil}.freeze
# Returns the number of records in the dataset.
def count
single_value(SELECT_COUNT).to_i
end
alias size count
# Returns a table reference for use in the FROM clause. If the dataset has
# only a :from option refering to a single table, only the table name is
# returned. Otherwise a subquery is returned.
def to_table_reference
if opts.keys == [:from] && opts[:from].size == 1
opts[:from].first.to_s
else
"(#{sql})"
end
end
# Returns the minimum value for the given field.
def min(field)
single_value(:select => [field.MIN])
end
# Returns the maximum value for the given field.
def max(field)
single_value(:select => [field.MAX])
end
# Returns the sum for the given field.
def sum(field)
single_value(:select => [field.SUM])
end
# Returns the average value for the given field.
def avg(field)
single_value(:select => [field.AVG])
end
# Returns an EXISTS clause for the dataset.
#
# dataset.exists #=> "EXISTS (SELECT 1 FROM items)"
def exists(opts = nil)
"EXISTS (#{sql({:select => [1]}.merge(opts || {}))})"
end
# If given an integer, the dataset will contain only the first l results.
# If given a range, it will contain only those at offsets within that
# range. If a second argument is given, it is used as an offset.
def limit(l, o = nil)
if l.is_a? Range
lim = (l.exclude_end? ? l.last - l.first : l.last + 1 - l.first)
dup_merge(:limit => lim, :offset=>l.first)
elsif o
dup_merge(:limit => l, :offset => o)
else
dup_merge(:limit => l)
end
end
# Returns the first record in the dataset. If the num argument is specified,
# an array is returned with the first num records.
def first(num = 1)
if num == 1
single_record
else
limit(num).all
end
end
# Returns the first record matching the condition.
def [](*conditions)
where(*conditions).first
end
# Returns the last records in the dataset by inverting the order. If no
# order is given, an exception is raised. If num is not given, the last
# record is returned. Otherwise an array is returned with the last
# num records.
def last(num = 1)
raise SequelError, 'No order specified' unless
@opts[:order] || (opts && opts[:order])
l = {:limit => num}
opts = {:order => invert_order(@opts[:order])}.
merge(opts ? opts.merge(l) : l)
if num == 1
single_record(opts)
else
dup_merge(opts).all
end
end
# Deletes all records in the dataset one at a time by invoking the destroy
# method of the associated model class.
def destroy
raise SequelError, 'Dataset not associated with model' unless @model_class
count = 0
@db.transaction {each {|r| count += 1; r.destroy}}
count
end
# Pretty prints the records in the dataset as plain-text table.
def print(*columns)
Sequel::PrettyTable.print(naked.all, columns.empty? ? nil : columns)
end
end
end
class Symbol
def DESC
"#{to_s} DESC"
end
def AS(target)
"#{to_field_name} AS #{target}"
end
def MIN; "min(#{to_field_name})"; end
def MAX; "max(#{to_field_name})"; end
def SUM; "sum(#{to_field_name})"; end
def AVG; "avg(#{to_field_name})"; end
AS_REGEXP = /(.*)___(.*)/.freeze
DOUBLE_UNDERSCORE = '__'.freeze
PERIOD = '.'.freeze
def to_field_name
s = to_s
if s =~ AS_REGEXP
s = "#{$1} AS #{$2}"
end
s.split(DOUBLE_UNDERSCORE).join(PERIOD)
end
def ALL
"#{to_s}.*"
end
end