# frozen-string-literal: true # # This extension changes Sequel's postgres adapter to automatically # parameterize queries by default. Sequel's default behavior has always # been to literalize all arguments unless specifically using # parameters (via :$arg placeholders and the Dataset#prepare/call methods). # This extension makes Sequel use string, numeric, blob, date, and # time types as parameters. Example: # # # Default # DB[:test].where(:a=>1) # # SQL: SELECT * FROM test WHERE a = 1 # # DB.extension :pg_auto_parameterize # DB[:test].where(:a=>1) # # SQL: SELECT * FROM test WHERE a = $1 (args: [1]) # # Other pg_* extensions that ship with Sequel and add support for # PostgreSQL-specific types support automatically parameterizing those # types when used with this extension. # # This extension is not generally faster than the default behavior. # In some cases it is faster, such as when using large strings. # However, the use of parameters avoids potential security issues, # in case Sequel does not correctly literalize one of the arguments # that this extension would automatically parameterize. # # There are some known issues with automatic parameterization: # # 1. In order to avoid most type errors, the extension attempts to guess # the appropriate type and automatically casts most placeholders, # except plain Ruby strings (which PostgreSQL treats as an unknown # type). # # Unfortunately, if the type guess is incorrect, or a plain Ruby # string is used and PostgreSQL cannot determine the data type for it, # the query may result in a DatabaseError. To fix both issues, you can # explicitly cast values using Sequel.cast(value, type), and # Sequel will cast to that type. # # 2. PostgreSQL supports a maximum of 65535 parameters per query. # Attempts to use a query with more than this number of parameters # will result in a Sequel::DatabaseError being raised. Sequel tries # to mitigate this issue by turning column IN (int, ...) # queries into column = ANY(CAST($ AS int8[])) using an # array parameter, to reduce the number of parameters. It also limits # inserting multiple rows at once to a maximum of 40 rows per query by # default. While these mitigations handle the most common cases # where a large number of parameters would be used, there are other # cases. # # 3. Automatic parameterization will consider the same objects as # equivalent when building SQL. However, for performance, it does # not perform equality checks. So code such as: # # DB[:t].select{foo('a').as(:f)}.group{foo('a')} # # SELECT foo('a') AS "f" FROM "t" GROUP BY foo('a') # # Will get auto paramterized as: # # # SELECT foo($1) AS "f" FROM "t" GROUP BY foo($2) # # Which will result in a DatabaseError, since that is not valid SQL. # # If you use the same expression, it will use the same parameter: # # foo = Sequel.function(:foo, 'a') # DB[:t].select(foo.as(:f)).group(foo) # # SELECT foo($1) AS "f" FROM "t" GROUP BY foo($1) # # Note that Dataset#select_group and similar methods that take arguments # used in multiple places in the SQL will generally handle this # automatically, since they will use the same objects: # # DB[:t].select_group{foo('a').as(:f)} # # SELECT foo($1) AS "f" FROM "t" GROUP BY foo($1) # # You can work around any issues that come up by disabling automatic # parameterization by calling the +no_auto_parameterize+ method on the # dataset (which returns a clone of the dataset). You can avoid # parameterization for specific values in the query by wrapping them # with +Sequel.skip_pg_auto_param+. # # It is likely there are corner cases not mentioned above # when using this extension. Users are encouraged to provide feedback # when using this extension if they come across such corner cases. # # This extension is only compatible when using the pg driver, not # when using the sequel-postgres-pr, jeremyevans-postgres-pr, or # postgres-pr drivers, as those do not support bound variables. # # Related module: Sequel::Postgres::AutoParameterize module Sequel module Postgres # Enable automatically parameterizing queries. module AutoParameterize # SQL query string that also holds an array of parameters class QueryString < ::String # The array of parameters used by this query. attr_reader :args # Add a new parameter to this query, which adds # the parameter to the array of parameters, and an # SQL placeholder to the query itself. def add_arg(s) unless defined?(@args) @args = [] @arg_map = {} @arg_map.compare_by_identity end unless pos = @arg_map[s] @args << s pos = @arg_map[s] = @args.length.to_s end self << '$' << pos end # Return a new QueryString with the given string appended # to the receiver, and the same arguments. def +(other) v = self.class.new(super) v.instance_variable_set(:@args, @args) if @args v end # Whether this query string currently supports # automatic parameterization. Automatic parameterization # is disabled at certain points during query building where # PostgreSQL does not support it. def auto_param? !@skip_auto_param end # Skip automatic parameterization inside the passed block. # This is used during query generation to disable # automatic parameterization for clauses not supporting it. def skip_auto_param skip_auto_param = @skip_auto_param begin @skip_auto_param = true yield ensure @skip_auto_param = skip_auto_param end end # Freeze the stored arguments when freezing the query string. def freeze if @args @args.freeze @arg_map.freeze end super end # Show args when the query string is inspected def inspect @args ? "#{self}; #{@args.inspect}".inspect : super end def initialize_copy(other) super if args = other.instance_variable_get(:@args) @args = args.dup @arg_map = other.instance_variable_get(:@arg_map).dup end end end # Wrapper class that skips auto parameterization for the wrapped object. class SkipAutoParam < SQL::Wrapper def to_s_append(ds, sql) if sql.is_a?(QueryString) sql.skip_auto_param{super} else super end end end # PlacholderLiteralizer subclass with support for stored auto parameters. class PlaceholderLiteralizer < ::Sequel::Dataset::PlaceholderLiteralizer def initialize(dataset, fragments, final_sql, arity) s = dataset.sql.dup s.clear @sql_origin = s.freeze super end private def sql_origin @sql_origin.dup end end module DatabaseMethods def self.extended(db) unless (db.adapter_scheme == :postgres && USES_PG) || (db.adapter_scheme == :mock && db.database_type == :postgres) raise Error, "pg_auto_parameterize is only supported when using the postgres adapter with the pg driver" end db.extend_datasets(DatasetMethods) end # If the sql string has an embedded parameter array, # extract the parameter values from that. def execute(sql, opts={}) if sql.is_a?(QueryString) && (args = sql.args) opts = opts.merge(:arguments=>args) end super end private # Disable auto_parameterization during COPY TABLE. def copy_table_sql(table, opts=OPTS) table = _no_auto_parameterize(table) super end # Disable auto_parameterization during CREATE TABLE AS. def create_table_as(name, sql, options) sql = _no_auto_parameterize(sql) super end # Disable auto_parameterization during CREATE VIEW. def create_view_sql(name, source, options) source = _no_auto_parameterize(source) super end # Disable automatic parameterization for the given table if supported. def _no_auto_parameterize(table) if table.is_a?(DatasetMethods) table.no_auto_parameterize else table end end end module DatasetMethods # Return a clone of the dataset that will not do # automatic parameterization. def no_auto_parameterize cached_dataset(:_no_auto_parameterize_ds) do @opts[:no_auto_parameterize] ? self : clone(:no_auto_parameterize=>true) end end # Do not add implicit typecasts for directly typecasted values, # since the user is presumably doing so to set the type, not convert # from the implicitly typecasted type. def cast_sql_append(sql, expr, type) if auto_param?(sql) && auto_param_type(expr) sql << 'CAST(' sql.add_arg(expr) sql << ' AS ' << db.cast_type_literal(type).to_s << ')' else super end end # Transform column IN (int, ...) expressions into column = ANY($) # and column NOT IN (int, ...) expressions into column != ALL($) # using an integer array bound variable for the ANY/ALL argument. # This is the same optimization PostgreSQL performs internally, # but this reduces the number of bound variables. def complex_expression_sql_append(sql, op, args) case op when :IN, :"NOT IN" l, r = args if auto_param?(sql) && !l.is_a?(Array) && _integer_array?(r) && r.size > 1 if op == :IN op = :"=" func = :ANY else op = :!= func = :ALL end args = [l, Sequel.function(func, Sequel.cast(_integer_array_auto_param(r), 'int8[]'))] end end super end # Parameterize insertion of multiple values def multi_insert_sql(columns, values) if @opts[:no_auto_parameterize] super else [clone(:multi_insert_values=>values.map{|r| Array(r)}).insert_sql(columns, LiteralString.new('VALUES '))] end end # For strings, numeric arguments, and date/time arguments, add # them as parameters to the query instead of literalizing them # into the SQL. def literal_append(sql, v) if auto_param?(sql) && (type = auto_param_type(v)) sql.add_arg(v) << type else super end end # The class to use for placeholder literalizers. def placeholder_literalizer_class if @opts[:no_auto_parameterize] super else PlaceholderLiteralizer end end # Disable automatic parameterization when using a cursor. def use_cursor(*) super.no_auto_parameterize end # Store receiving dataset and args when with_sql is used with a method name symbol, so sql # can be parameterized correctly if used as a subselect. def with_sql(*a) ds = super if Symbol === a[0] ds = ds.clone(:with_sql_dataset=>self, :with_sql_args=>a.freeze) end ds end protected # Disable automatic parameterization for prepared statements, # since they will use manual parameterization. def to_prepared_statement(*a) @opts[:no_auto_parameterize] ? super : no_auto_parameterize.to_prepared_statement(*a) end private # If auto parameterization is supported for the value, return a string # for the implicit typecast to use. Return false/nil if the value should not be # automatically parameterized. def auto_param_type(v) case v when String case v when LiteralString false when Sequel::SQL::Blob "::bytea" else "" end when Integer ((v > 2147483647 || v < -2147483648) ? "::int8" : "::int4") when Float # PostgreSQL treats literal floats as numeric, not double precision # But older versions of PostgreSQL don't handle Infinity/NaN in numeric v.finite? ? "::numeric" : "::double precision" when BigDecimal "::numeric" when Sequel::SQLTime "::time" when Time "::#{@db.cast_type_literal(Time)}" when DateTime "::#{@db.cast_type_literal(DateTime)}" when Date "::date" else v.respond_to?(:sequel_auto_param_type) ? v.sequel_auto_param_type(self) : auto_param_type_fallback(v) end end # Allow other extensions to support auto parameterization in ways that do not # require adding the sequel_auto_param_type method. def auto_param_type_fallback(v) super if defined?(super) end # Whether the given query string currently supports automatic parameterization. def auto_param?(sql) sql.is_a?(QueryString) && sql.auto_param? end # Default the import slice to 40, since PostgreSQL supports a maximum of 1600 # columns per table, and it supports a maximum of 65k parameters. Technically, # there can be more than one parameter per column, so this doesn't prevent going # over the limit, though it does make it less likely. def default_import_slice 40 end # Handle parameterization of multi_insert_sql def _insert_values_sql(sql, values) super if values = @opts[:multi_insert_values] expression_list_append(sql, values.map{|r| Array(r)}) end end # Whether the given argument is an array of integers or NULL values, recursively. def _integer_array?(v) Array === v && v.all?{|x| nil == x || Integer === x} end # Create the bound variable string that will be used for the IN (int, ...) to = ANY($) # optimization for integer arrays. def _integer_array_auto_param(v) buf = String.new buf << '{' comma = false v.each do |x| if comma buf << "," else comma = true end buf << (x ? x.to_s : 'NULL') end buf << '}' end # Skip auto parameterization in LIMIT and OFFSET clauses def select_limit_sql(sql) if auto_param?(sql) && (@opts[:limit] || @opts[:offset]) sql.skip_auto_param{super} else super end end # Skip auto parameterization in ORDER clause if used with # integer values indicating ordering by the nth column. def select_order_sql(sql) if auto_param?(sql) && (order = @opts[:order]) && order.any?{|o| Integer === o || (SQL::OrderedExpression === o && Integer === o.expression)} sql.skip_auto_param{super} else super end end # Skip auto parameterization in CTE CYCLE clause def select_with_sql_cte_search_cycle(sql,cte) if auto_param?(sql) && cte[:cycle] sql.skip_auto_param{super} else super end end # Unless auto parameterization is disabled, use a string that # can store the parameterized arguments. def sql_string_origin @opts[:no_auto_parameterize] ? super : QueryString.new end # If subquery uses with_sql with a method name symbol, get the dataset # with_sql was called on, and use that as the subquery, recording the # arguments to with_sql that will be used to calculate the sql. def subselect_sql_dataset(sql, ds) if ws_ds = ds.opts[:with_sql_dataset] super(sql, ws_ds).clone(:subselect_sql_args=>ds.opts[:with_sql_args]) else super end end # If subquery used with_sql with a method name symbol, use the arguments to # with_sql to determine the sql, so that the subselect can be parameterized. def subselect_sql_append_sql(sql, ds) if args = ds.opts[:subselect_sql_args] ds.send(*args) else super end end # Use auto parameterization for datasets with static SQL using placeholders. def static_sql(sql) if @opts[:append_sql] || @opts[:no_auto_parameterize] || String === sql super else query_string = QueryString.new literal_append(query_string, sql) query_string end end end end end module SQL::Builders # Skip auto parameterization for the given object when building queries. def skip_pg_auto_param(v) Postgres::AutoParameterize::SkipAutoParam.new(v) end end Database.register_extension(:pg_auto_parameterize, Postgres::AutoParameterize::DatabaseMethods) end