# frozen-string-literal: true
#
# This extension changes Sequel's postgres adapter to automatically
# parameterize queries by default. Sequel's default behavior has always
# been to literalize all arguments unless specifically using
# parameters (via :$arg placeholders and the Dataset#prepare/call methods).
# This extension makes Sequel use string, numeric, blob, date, and
# time types as parameters. Example:
#
# # Default
# DB[:test].where(:a=>1)
# # SQL: SELECT * FROM test WHERE a = 1
#
# DB.extension :pg_auto_parameterize
# DB[:test].where(:a=>1)
# # SQL: SELECT * FROM test WHERE a = $1 (args: [1])
#
# Other pg_* extensions that ship with Sequel and add support for
# PostgreSQL-specific types support automatically parameterizing those
# types when used with this extension.
#
# This extension is not generally faster than the default behavior.
# In some cases it is faster, such as when using large strings.
# However, the use of parameters avoids potential security issues,
# in case Sequel does not correctly literalize one of the arguments
# that this extension would automatically parameterize.
#
# There are some known issues with automatic parameterization:
#
# 1. In order to avoid most type errors, the extension attempts to guess
# the appropriate type and automatically casts most placeholders,
# except plain Ruby strings (which PostgreSQL treats as an unknown
# type).
#
# Unfortunately, if the type guess is incorrect, or a plain Ruby
# string is used and PostgreSQL cannot determine the data type for it,
# the query may result in a DatabaseError. To fix both issues, you can
# explicitly cast values using Sequel.cast(value, type), and
# Sequel will cast to that type.
#
# 2. PostgreSQL supports a maximum of 65535 parameters per query.
# Attempts to use a query with more than this number of parameters
# will result in a Sequel::DatabaseError being raised. Sequel tries
# to mitigate this issue by turning column IN (int, ...)
# queries into column = ANY(CAST($ AS int8[])) using an
# array parameter, to reduce the number of parameters. It also limits
# inserting multiple rows at once to a maximum of 40 rows per query by
# default. While these mitigations handle the most common cases
# where a large number of parameters would be used, there are other
# cases.
#
# 3. Automatic parameterization will consider the same objects as
# equivalent when building SQL. However, for performance, it does
# not perform equality checks. So code such as:
#
# DB[:t].select{foo('a').as(:f)}.group{foo('a')}
# # SELECT foo('a') AS "f" FROM "t" GROUP BY foo('a')
#
# Will get auto paramterized as:
#
# # SELECT foo($1) AS "f" FROM "t" GROUP BY foo($2)
#
# Which will result in a DatabaseError, since that is not valid SQL.
#
# If you use the same expression, it will use the same parameter:
#
# foo = Sequel.function(:foo, 'a')
# DB[:t].select(foo.as(:f)).group(foo)
# # SELECT foo($1) AS "f" FROM "t" GROUP BY foo($1)
#
# Note that Dataset#select_group and similar methods that take arguments
# used in multiple places in the SQL will generally handle this
# automatically, since they will use the same objects:
#
# DB[:t].select_group{foo('a').as(:f)}
# # SELECT foo($1) AS "f" FROM "t" GROUP BY foo($1)
#
# You can work around any issues that come up by disabling automatic
# parameterization by calling the +no_auto_parameterize+ method on the
# dataset (which returns a clone of the dataset). You can avoid
# parameterization for specific values in the query by wrapping them
# with +Sequel.skip_pg_auto_param+.
#
# It is likely there are corner cases not mentioned above
# when using this extension. Users are encouraged to provide feedback
# when using this extension if they come across such corner cases.
#
# This extension is only compatible when using the pg driver, not
# when using the sequel-postgres-pr, jeremyevans-postgres-pr, or
# postgres-pr drivers, as those do not support bound variables.
#
# Related module: Sequel::Postgres::AutoParameterize
module Sequel
module Postgres
# Enable automatically parameterizing queries.
module AutoParameterize
# SQL query string that also holds an array of parameters
class QueryString < ::String
# The array of parameters used by this query.
attr_reader :args
# Add a new parameter to this query, which adds
# the parameter to the array of parameters, and an
# SQL placeholder to the query itself.
def add_arg(s)
unless defined?(@args)
@args = []
@arg_map = {}
@arg_map.compare_by_identity
end
unless pos = @arg_map[s]
@args << s
pos = @arg_map[s] = @args.length.to_s
end
self << '$' << pos
end
# Return a new QueryString with the given string appended
# to the receiver, and the same arguments.
def +(other)
v = self.class.new(super)
v.instance_variable_set(:@args, @args) if @args
v
end
# Whether this query string currently supports
# automatic parameterization. Automatic parameterization
# is disabled at certain points during query building where
# PostgreSQL does not support it.
def auto_param?
!@skip_auto_param
end
# Skip automatic parameterization inside the passed block.
# This is used during query generation to disable
# automatic parameterization for clauses not supporting it.
def skip_auto_param
skip_auto_param = @skip_auto_param
begin
@skip_auto_param = true
yield
ensure
@skip_auto_param = skip_auto_param
end
end
# Freeze the stored arguments when freezing the query string.
def freeze
if @args
@args.freeze
@arg_map.freeze
end
super
end
# Show args when the query string is inspected
def inspect
@args ? "#{self}; #{@args.inspect}".inspect : super
end
def initialize_copy(other)
super
if args = other.instance_variable_get(:@args)
@args = args.dup
@arg_map = other.instance_variable_get(:@arg_map).dup
end
end
end
# Wrapper class that skips auto parameterization for the wrapped object.
class SkipAutoParam < SQL::Wrapper
def to_s_append(ds, sql)
if sql.is_a?(QueryString)
sql.skip_auto_param{super}
else
super
end
end
end
# PlacholderLiteralizer subclass with support for stored auto parameters.
class PlaceholderLiteralizer < ::Sequel::Dataset::PlaceholderLiteralizer
def initialize(dataset, fragments, final_sql, arity)
s = dataset.sql.dup
s.clear
@sql_origin = s.freeze
super
end
private
def sql_origin
@sql_origin.dup
end
end
module DatabaseMethods
def self.extended(db)
unless (db.adapter_scheme == :postgres && USES_PG) || (db.adapter_scheme == :mock && db.database_type == :postgres)
raise Error, "pg_auto_parameterize is only supported when using the postgres adapter with the pg driver"
end
db.extend_datasets(DatasetMethods)
end
# If the sql string has an embedded parameter array,
# extract the parameter values from that.
def execute(sql, opts={})
if sql.is_a?(QueryString) && (args = sql.args)
opts = opts.merge(:arguments=>args)
end
super
end
private
# Disable auto_parameterization during COPY TABLE.
def copy_table_sql(table, opts=OPTS)
table = _no_auto_parameterize(table)
super
end
# Disable auto_parameterization during CREATE TABLE AS.
def create_table_as(name, sql, options)
sql = _no_auto_parameterize(sql)
super
end
# Disable auto_parameterization during CREATE VIEW.
def create_view_sql(name, source, options)
source = _no_auto_parameterize(source)
super
end
# Disable automatic parameterization for the given table if supported.
def _no_auto_parameterize(table)
if table.is_a?(DatasetMethods)
table.no_auto_parameterize
else
table
end
end
end
module DatasetMethods
# Return a clone of the dataset that will not do
# automatic parameterization.
def no_auto_parameterize
cached_dataset(:_no_auto_parameterize_ds) do
@opts[:no_auto_parameterize] ? self : clone(:no_auto_parameterize=>true)
end
end
# Do not add implicit typecasts for directly typecasted values,
# since the user is presumably doing so to set the type, not convert
# from the implicitly typecasted type.
def cast_sql_append(sql, expr, type)
if auto_param?(sql) && auto_param_type(expr)
sql << 'CAST('
sql.add_arg(expr)
sql << ' AS ' << db.cast_type_literal(type).to_s << ')'
else
super
end
end
# Transform column IN (int, ...) expressions into column = ANY($)
# and column NOT IN (int, ...) expressions into column != ALL($)
# using an integer array bound variable for the ANY/ALL argument.
# This is the same optimization PostgreSQL performs internally,
# but this reduces the number of bound variables.
def complex_expression_sql_append(sql, op, args)
case op
when :IN, :"NOT IN"
l, r = args
if auto_param?(sql) && !l.is_a?(Array) && _integer_array?(r) && r.size > 1
if op == :IN
op = :"="
func = :ANY
else
op = :!=
func = :ALL
end
args = [l, Sequel.function(func, Sequel.cast(_integer_array_auto_param(r), 'int8[]'))]
end
end
super
end
# Parameterize insertion of multiple values
def multi_insert_sql(columns, values)
if @opts[:no_auto_parameterize]
super
else
[clone(:multi_insert_values=>values.map{|r| Array(r)}).insert_sql(columns, LiteralString.new('VALUES '))]
end
end
# For strings, numeric arguments, and date/time arguments, add
# them as parameters to the query instead of literalizing them
# into the SQL.
def literal_append(sql, v)
if auto_param?(sql) && (type = auto_param_type(v))
sql.add_arg(v) << type
else
super
end
end
# The class to use for placeholder literalizers.
def placeholder_literalizer_class
if @opts[:no_auto_parameterize]
super
else
PlaceholderLiteralizer
end
end
# Disable automatic parameterization when using a cursor.
def use_cursor(*)
super.no_auto_parameterize
end
# Store receiving dataset and args when with_sql is used with a method name symbol, so sql
# can be parameterized correctly if used as a subselect.
def with_sql(*a)
ds = super
if Symbol === a[0]
ds = ds.clone(:with_sql_dataset=>self, :with_sql_args=>a.freeze)
end
ds
end
protected
# Disable automatic parameterization for prepared statements,
# since they will use manual parameterization.
def to_prepared_statement(*a)
@opts[:no_auto_parameterize] ? super : no_auto_parameterize.to_prepared_statement(*a)
end
private
# If auto parameterization is supported for the value, return a string
# for the implicit typecast to use. Return false/nil if the value should not be
# automatically parameterized.
def auto_param_type(v)
case v
when String
case v
when LiteralString
false
when Sequel::SQL::Blob
"::bytea"
else
""
end
when Integer
((v > 2147483647 || v < -2147483648) ? "::int8" : "::int4")
when Float
# PostgreSQL treats literal floats as numeric, not double precision
# But older versions of PostgreSQL don't handle Infinity/NaN in numeric
v.finite? ? "::numeric" : "::double precision"
when BigDecimal
"::numeric"
when Sequel::SQLTime
"::time"
when Time
"::#{@db.cast_type_literal(Time)}"
when DateTime
"::#{@db.cast_type_literal(DateTime)}"
when Date
"::date"
else
v.respond_to?(:sequel_auto_param_type) ? v.sequel_auto_param_type(self) : auto_param_type_fallback(v)
end
end
# Allow other extensions to support auto parameterization in ways that do not
# require adding the sequel_auto_param_type method.
def auto_param_type_fallback(v)
super if defined?(super)
end
# Whether the given query string currently supports automatic parameterization.
def auto_param?(sql)
sql.is_a?(QueryString) && sql.auto_param?
end
# Default the import slice to 40, since PostgreSQL supports a maximum of 1600
# columns per table, and it supports a maximum of 65k parameters. Technically,
# there can be more than one parameter per column, so this doesn't prevent going
# over the limit, though it does make it less likely.
def default_import_slice
40
end
# Handle parameterization of multi_insert_sql
def _insert_values_sql(sql, values)
super
if values = @opts[:multi_insert_values]
expression_list_append(sql, values.map{|r| Array(r)})
end
end
# Whether the given argument is an array of integers or NULL values, recursively.
def _integer_array?(v)
Array === v && v.all?{|x| nil == x || Integer === x}
end
# Create the bound variable string that will be used for the IN (int, ...) to = ANY($)
# optimization for integer arrays.
def _integer_array_auto_param(v)
buf = String.new
buf << '{'
comma = false
v.each do |x|
if comma
buf << ","
else
comma = true
end
buf << (x ? x.to_s : 'NULL')
end
buf << '}'
end
# Skip auto parameterization in LIMIT and OFFSET clauses
def select_limit_sql(sql)
if auto_param?(sql) && (@opts[:limit] || @opts[:offset])
sql.skip_auto_param{super}
else
super
end
end
# Skip auto parameterization in ORDER clause if used with
# integer values indicating ordering by the nth column.
def select_order_sql(sql)
if auto_param?(sql) && (order = @opts[:order]) && order.any?{|o| Integer === o || (SQL::OrderedExpression === o && Integer === o.expression)}
sql.skip_auto_param{super}
else
super
end
end
# Skip auto parameterization in CTE CYCLE clause
def select_with_sql_cte_search_cycle(sql,cte)
if auto_param?(sql) && cte[:cycle]
sql.skip_auto_param{super}
else
super
end
end
# Unless auto parameterization is disabled, use a string that
# can store the parameterized arguments.
def sql_string_origin
@opts[:no_auto_parameterize] ? super : QueryString.new
end
# If subquery uses with_sql with a method name symbol, get the dataset
# with_sql was called on, and use that as the subquery, recording the
# arguments to with_sql that will be used to calculate the sql.
def subselect_sql_dataset(sql, ds)
if ws_ds = ds.opts[:with_sql_dataset]
super(sql, ws_ds).clone(:subselect_sql_args=>ds.opts[:with_sql_args])
else
super
end
end
# If subquery used with_sql with a method name symbol, use the arguments to
# with_sql to determine the sql, so that the subselect can be parameterized.
def subselect_sql_append_sql(sql, ds)
if args = ds.opts[:subselect_sql_args]
ds.send(*args)
else
super
end
end
# Use auto parameterization for datasets with static SQL using placeholders.
def static_sql(sql)
if @opts[:append_sql] || @opts[:no_auto_parameterize] || String === sql
super
else
query_string = QueryString.new
literal_append(query_string, sql)
query_string
end
end
end
end
end
module SQL::Builders
# Skip auto parameterization for the given object when building queries.
def skip_pg_auto_param(v)
Postgres::AutoParameterize::SkipAutoParam.new(v)
end
end
Database.register_extension(:pg_auto_parameterize, Postgres::AutoParameterize::DatabaseMethods)
end