require_relative "../context" require_relative "active_record_relation.rb" module Immunio # Since every value that will be escaped is very likely to be param passed to a SQL query, # we hook to the method escaping the values. # # Params are then sent to the QueryTracker which will take care of matching the params to the query. module QuotingHooks extend ActiveSupport::Concern included do Immunio::Utils.alias_method_chain self, :quote, :immunio if method_defined? :quote end IGNORED_TYPES = [TrueClass, FalseClass, NilClass, Fixnum, Bignum, Float].freeze def quote_with_immunio(value, column = nil) Request.time "plugin", "#{Module.nesting[0]}::#{__method__}" do if column column_name = column.name else column_name = nil end # Ignored empty strings and values that can't contain injections. unless value.blank? || IGNORED_TYPES.include?(value.class) QueryTracker.instance.add_param column_name, value.to_s, object_id end Request.pause "plugin", "#{Module.nesting[0]}::#{__method__}" do quote_without_immunio(value, column) end end end end # There is one place where a statement may be quoted without going through the # quote method. This occurs when a where statement is given an array, like: # # Users.where(["email LIKE %s", "bob@example.com"]) # # The first value is a sprintf format string and the rest are values # interpolated into it. This triggers a call into sanitize_sql_array, which # will pass the values through quote_string, but only if the first value is # not a Hash and the statement does not include '?' placeholders. Otherwise, # different interpolation and quoting mechanisms are used. # # The above has been verified to be the case from Rails 3.0 to 4.2. module SanitizeHooks extend ActiveSupport::Concern included do |base| base.class_eval do def sanitize_sql_array_with_immunio(ary) Request.time "plugin", "#{Module.nesting[0]}::#{__method__}" do statement, *values = ary # Check if rails will use some other mechanism for quoting unless (values.first.is_a?(Hash) && statement =~ /:\w+/) || (statement.include?('?')) || (statement.blank?) # Rails is going to use quote_string, so handle parameters values.each { |value| QueryTracker.instance.add_param nil, value, connection.object_id } end Request.pause "plugin", "#{Module.nesting[0]}::#{__method__}" do sanitize_sql_array_without_immunio ary end end end Immunio::Utils.alias_method_chain self, :sanitize_sql_array, :immunio end end end module ArelToSqlHooks extend ActiveSupport::Concern included do Immunio::Utils.alias_method_chain self, :accept, :immunio if method_defined? :accept end def accept_with_immunio(object, *args) Request.time "plugin", "#{Module.nesting[0]}::#{__method__}" do visitor = ArelNodeVisitor.new(@connection.object_id) visitor.accept(object) end accept_without_immunio(object, *args) end end # Arel AST visitor to collect params and modifiers. Based on Arel::Visitors::DepthFirst. # # Whenever a query is built in Rails, a tree of Ruby objects (AST) is built to represent that query. # Arel (the engine building this tree) uses a Visitor to build the SQL statement represented by tree. # We use the same base class as Arel, but instead of building some SQL, we track the params and modifiers. # # See http://en.wikipedia.org/wiki/Visitor_pattern class ArelNodeVisitor < Arel::Visitors::Visitor # Only accepts statements to avoid duplicates in params if branches are visited multiple times. VISITABLES = [Arel::Nodes::SelectStatement, Arel::Nodes::InsertStatement, Arel::Nodes::UpdateStatement, Arel::Nodes::DeleteStatement].freeze IGNORED_EXPRESSIONS = ["", "*", "1 AS one"].freeze # Copied from Arel::Visitors::Visitor to ensure the cached dispatch table isn't shared with # other Arel visitors under Rails 3.2. DISPATCH = Hash.new do |hash, klass| hash[klass] = "visit_#{(klass.name || '').gsub('::', '_')}" end def initialize(connection_id) @connection_id = connection_id super() end # Entry point into the visitor. def accept(object) if VISITABLES.include?(object.class) visit object, {} end end def dispatch DISPATCH end private # Backported from Arel 6 Visitor::Reduce, as Arel 3 (Used in Rails 3.2) doesn't have the Reduce visitor. def visit object, collector, opts={} send dispatch[object.class], object, collector, opts rescue NoMethodError => e raise e if respond_to?(dispatch[object.class], true) superklass = object.class.ancestors.find { |klass| respond_to?(dispatch[klass], true) } raise(TypeError, "Cannot visit #{object.class}") unless superklass dispatch[object.class] = dispatch[superklass] retry end # When an unsafe node is visited. Track the param or modifier. def unsafe(o, context, _opts) unless o.class == String QueryTracker.instance.add_ast_data o.class.name, @connection_id end if IGNORED_EXPRESSIONS.include?(o) # Ignore elsif context[:modifier] QueryTracker.instance.add_modifier context[:modifier], o.to_s, @connection_id end end alias :visit_Arel_Nodes_SqlLiteral :unsafe alias :visit_String :unsafe # We use a context (second argument of visit) to keep track of where we are in the AST. # The following methods update the contexts based on the branch of the AST being visited # and recursively visit the children nodes. def visit_Arel_Nodes_SelectCore(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.projections, modifier: :select visit o.source, modifier: :from visit o.wheres, modifier: :where visit o.groups, modifier: :group visit o.windows, context visit o.having, modifier: :having end def visit_Arel_Nodes_SelectStatement(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.cores, context visit o.orders, modifier: :order visit o.limit, modifier: :limit visit o.lock, modifier: :lock visit o.offset, modifier: :offset end def visit_Arel_Nodes_UpdateStatement(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.relation, context visit o.values, context visit o.wheres, modifier: :where visit o.orders, modifier: :order visit o.limit, modifier: :limit end def visit_Arel_Nodes_DeleteStatement(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.relation, context visit o.wheres, modifier: :where end # All other methods bellow are for visiting each node and their children. def unary(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.expr, context end alias :visit_Arel_Nodes_Group :unary alias :visit_Arel_Nodes_Grouping :unary alias :visit_Arel_Nodes_Having :unary alias :visit_Arel_Nodes_Limit :unary alias :visit_Arel_Nodes_Not :unary alias :visit_Arel_Nodes_Offset :unary alias :visit_Arel_Nodes_On :unary alias :visit_Arel_Nodes_Ordering :unary alias :visit_Arel_Nodes_Ascending :unary alias :visit_Arel_Nodes_Descending :unary alias :visit_Arel_Nodes_Top :unary alias :visit_Arel_Nodes_UnqualifiedColumn :unary alias :visit_Arel_Nodes_Lock :unary alias :visit_Arel_Nodes_Quoted :unary def function(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.expressions, context end alias :visit_Arel_Nodes_Avg :function alias :visit_Arel_Nodes_Exists :function alias :visit_Arel_Nodes_Max :function alias :visit_Arel_Nodes_Min :function alias :visit_Arel_Nodes_Sum :function def visit_Arel_Nodes_NamedFunction(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.expressions, context end def visit_Arel_Nodes_Count(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.expressions, context end def nary(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id o.children.each { |child| visit child, context } end alias :visit_Arel_Nodes_And :nary def binary(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.left, context visit o.right, context end alias :visit_Arel_Nodes_As :binary alias :visit_Arel_Nodes_Assignment :binary alias :visit_Arel_Nodes_Between :binary alias :visit_Arel_Nodes_DoesNotMatch :binary alias :visit_Arel_Nodes_Equality :binary alias :visit_Arel_Nodes_FullOuterJoin :binary alias :visit_Arel_Nodes_GreaterThan :binary alias :visit_Arel_Nodes_GreaterThanOrEqual :binary alias :visit_Arel_Nodes_InfixOperation :binary alias :visit_Arel_Nodes_JoinSource :binary alias :visit_Arel_Nodes_InnerJoin :binary alias :visit_Arel_Nodes_LessThan :binary alias :visit_Arel_Nodes_LessThanOrEqual :binary alias :visit_Arel_Nodes_Matches :binary alias :visit_Arel_Nodes_NotEqual :binary alias :visit_Arel_Nodes_NotRegexp :binary alias :visit_Arel_Nodes_Or :binary alias :visit_Arel_Nodes_OuterJoin :binary alias :visit_Arel_Nodes_Regexp :binary alias :visit_Arel_Nodes_RightOuterJoin :binary alias :visit_Arel_Nodes_TableAlias :binary alias :visit_Arel_Nodes_Values :binary alias :visit_Arel_Nodes_Union :binary # Special case the IN clause node. We don't want to add info about AST # nodes in the right side of an IN clause if they are terminal or an # array of terminal nodes. def visit_Arel_Nodes_In(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.left, context visit o.right, context, IN_children: true end alias :visit_Arel_Nodes_NotIn :visit_Arel_Nodes_In def visit_Arel_Nodes_StringJoin(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.left, context end def visit_Arel_Attribute(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.relation, context end alias :visit_Arel_Attributes_Integer :visit_Arel_Attribute alias :visit_Arel_Attributes_Float :visit_Arel_Attribute alias :visit_Arel_Attributes_String :visit_Arel_Attribute alias :visit_Arel_Attributes_Time :visit_Arel_Attribute alias :visit_Arel_Attributes_Boolean :visit_Arel_Attribute alias :visit_Arel_Attributes_Attribute :visit_Arel_Attribute alias :visit_Arel_Attributes_Decimal :visit_Arel_Attribute def visit_Arel_Table(o, _context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id end def terminal(_o, _context, _opts) end alias :visit_Arel_Nodes_Node :terminal alias :visit_ActiveSupport_Multibyte_Chars :terminal alias :visit_ActiveSupport_StringInquirer :terminal alias :visit_Symbol :terminal alias :visit_Arel_Nodes_Window :terminal alias :visit_Arel_Nodes_True :terminal alias :visit_Arel_Nodes_False :terminal alias :visit_BigDecimal :terminal alias :visit_Bignum :terminal alias :visit_Class :terminal alias :visit_Date :terminal alias :visit_DateTime :terminal alias :visit_FalseClass :terminal alias :visit_Fixnum :terminal alias :visit_Float :terminal alias :visit_Arel_Nodes_BindParam :terminal alias :visit_NilClass :terminal alias :visit_Time :terminal alias :visit_TrueClass :terminal alias :visit_Object :terminal alias :visit_Arel_Nodes_Casted :terminal def visit_Arel_Nodes_InsertStatement(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id visit o.relation, context visit o.columns, context visit o.values, context end def visit_Array(o, context, opts) unless opts[:IN_children] QueryTracker.instance.add_ast_data o.class, @connection_id end o.each { |i| visit i, context } end alias :visit_Set :visit_Array def visit_Hash(o, context, _opts) QueryTracker.instance.add_ast_data o.class, @connection_id o.each { |k,v| visit(k, context); visit(v, context) } end end class QueryTracker include Singleton def initialize # The data in these hashes represent relations and connections whose # lifecycle cannot be easily inferred. A relation could be kept around # across multiple HTTP requests, for example. We defined finalizers to # clean up data in the hashes when the objects they are linked to are # released by the Ruby runtime. # # Note: Relations have an associated connection and are not accessed by # other connections. Connections are associated with a thread and are not # accessed by other threads at the same time. Thus, there is no need for # thread safety in any of the logic in this class. # Data about a relation. The data inside is stored at different times and # must be reset properly: # # * params and relation_data: Added when ActiveRecord::Relation API calls # are made, like #where. Should never be reset for a given relation. # * ast_data and modifiers: Added when a relation is converted # into a SQL query statement. Should be reset after every query # execution. @relation_data = Hash.new do |relation_data, relation_id| # This should never happen, but if it does it's a sign of an impending # memory leak. Log it, but just let it happen as it would be hard to # handle elsewhere if we did not set up the data. unless ObjectSpace._id2ref(relation_id).is_a? ActiveRecord::Relation name = if ObjectSpace._id2ref(relation_id).is_a? Class "#{ObjectSpace._id2ref(relation_id).name} Class" else "#{ObjectSpace._id2ref(relation_id).class.name} Instance" end Immunio.logger.warn {"Creating relation data for non-relation: #{name}"} Immunio.logger.debug {"Call stack:\n#{caller.join "\n"}"} end # NOTE: If you hold a reference to the relation here, like say: # # relation = ObjectSpace._id2ref(relation_id) # # the scope for the block will hold the relation and it will never be # released. ObjectSpace.define_finalizer ObjectSpace._id2ref(relation_id), self.class.finalize_relation(relation_id) relation_data[relation_id] = { params: {}, relation_data: [], ast_data: [], modifiers: Hash.new do |modifiers, type| modifiers[type] = [] end } end # Stacks of relations for each connection. Used to find the appropriate # relation for a connection when a query is executed. A stack is used # because some relation methods create new relations and call other # relation methods on the new relations. @relations = Hash.new do |relations, connection_id| connection = ObjectSpace._id2ref(connection_id) ObjectSpace.define_finalizer(connection, self.class.finalize_connection(connection_id)) relations[connection_id] = [] end # Last spawned relations for connections. Used for a hack to propagate # params to the right relation in Rails 3. @last_spawned_relations = {} end # Delete a relation record when the relation object is released. def self.finalize_relation(relation_id) proc do relation_data = instance.instance_variable_get(:@relation_data) # Check if key exists, delete will call the default value block if not relation_data.delete(relation_id) if relation_data.has_key? relation_id end end # Delete a connection record when the connection object is released. def self.finalize_connection(connection_id) proc do relations = instance.instance_variable_get(:@relations) # Check if key exists, delete will call the default value block if not relations.delete(connection_id) if relations.has_key? connection_id instance.instance_variable_get(:@last_spawned_relations).delete connection_id end end # Push a relation onto the stack for its connection def push_relation(relation) @relations[relation.connection.object_id] << relation.object_id end # Pop a relation off the stack for its connection def pop_relation(relation) popped = @relations[relation.connection.object_id].pop unless popped == relation.object_id Immunio.logger.warn {"Popped wrong relation, expected: #{relation}, popped: #{popped}"} Immunio.logger.debug {"Call stack:\n#{caller.join "\n"}"} end end # Called when a relation is cloned. The data for the new relation must also # be copied from the old relation. def spawn_relation(orig, new) orig_id = orig.object_id new_id = new.object_id # If we weren't tracking the original relation, don't bother setting up # the new relation yet. if @relation_data.has_key? orig_id # ast_data and modifiers should be empty, but we must clone modifiers to # get the initializer. @relation_data[new_id] = { params: @relation_data[orig_id][:params].clone, relation_data: @relation_data[orig_id][:relation_data].clone, ast_data: @relation_data[orig_id][:ast_data] = [], modifiers: @relation_data[orig_id][:modifiers].clone } # The default block for the @relation_data hash isn't called when # assigning a value to a new key. We must set up the finalizer manually. ObjectSpace.define_finalizer(new, self.class.finalize_relation(new_id)) end # Save the last spawned relation for a hack for storing params from #where # and #having. @last_spawned_relations[new.connection.object_id] = new_id end # Retrieve the last spawned relation for the connection. This is an ugly # hack for a poor implementation of the #where and #having methods in Rails # 3. def last_spawned_relation(connection) ObjectSpace._id2ref @last_spawned_relations[connection.object_id] end # Called when two relations are merged. The data for the other relation must # be copied into the current relation. AST data and modifiers should be # empty and don't need to be copied. def merge_relations(relation, other) params = @relation_data[relation.object_id][:params] other_params = @relation_data[other.object_id][:params] other_params.each_pair do |name, value| # Update numeric ID for current relation if name is an integer. name = params.size.to_s if name.to_i.to_s == name params[name] = value end other_data = @relation_data[other.object_id][:relation_data] @relation_data[relation.object_id][:relation_data] += other_data end # Add relation API context data to the relation. def add_relation_data(relation, data) @relation_data[relation.object_id][:relation_data] << data end # Add a parameter to the current relation for the connection. If the # relation is copied or merged into another relation, the param will also # be copied. def add_param(name, value, connection_id) relation_id = @relations[connection_id].last # This can occur if the query statement isn't generated by the app but by # ActiveRecord itself. return unless relation_id params = @relation_data[relation_id][:params] # If no name given, use index. if name.nil? name = params.size.to_s end Immunio.logger.debug { "Adding ActiveRecord SQL param to relation #{relation_id} (name: #{name}, value: #{value})" } params[name] = value end # Add a modifier to the current relation for the connection. This only # occurs during conversion of a relation to SQL statement, so modifiers are # never copied from one relation to another. def add_modifier(type, value, connection_id) relation_id = @relations[connection_id].last # This can occur if the query statement isn't generated by the app but by # ActiveRecord itself. return unless relation_id @relation_data[relation_id][:modifiers][type] << value end # Add data about an Arel AST node to the context data for the connection. # This only occurs during conversion of a relation to SQL statement, so AST # context data is never copied from one relation to another. def add_ast_data(ast_node_name, connection_id) relation_id = @relations[connection_id].last # This can occur if the query statement was cached and there's no relation # associated with the connection. That's ok here, though, because there's # such a limited number of cacheable statement structures that we don't # need AST info to differentiate between queries with the same stack # trace. return unless relation_id @relation_data[relation_id][:ast_data] << "Arel AST visited node: #{ast_node_name}" end # Evaluate a SQL call. This occurs after Arel AST conversion of a relation # to a statement. def call(payload) Request.time "plugin", "#{Module.nesting[0]}::#{__method__}" do Immunio.logger.debug { "New ActiveRecord SQL query: #{payload}" } connection_id = payload[:connection_id] relation_id = @relations[connection_id].last if should_ignore? payload[:sql] Immunio.logger.debug { "Ignoring query as it was generated by ActiveRecord itself (#{payload[:sql]})" } return end if relation_id # Note: If a relation is released between when it is converted to a # SQL statement and now, we would lose the data and additionally leak # an empty entry in the @relation_data hash. I don't believe this is # possible due to how we wrap things, but there's no explicit # guarantee. relation_data = @relation_data[relation_id] params = relation_data[:params].clone context_data = (relation_data[:relation_data] + relation_data[:ast_data]).join "\n" # modifiers must be cloned because it will be cleared when the # relation is reset. modifiers = relation_data[:modifiers].clone else params = {} context_data = nil modifiers = {} end # Merge bound values question_marks = 0 payload[:binds].each do |(column, value)| if column.nil? params["?:#{question_marks}"] = value.to_s question_marks = question_marks + 1 else # When using the activerecord-sqlserver-adapter gem, the "column" is # the actual param name. name = column.respond_to?(:name) ? column.name : column.to_s params[name] = value.to_s end end strict_context, loose_context, stack = Immunio::Context.context context_data # Send in additional_context_data for debugging purposes Immunio.run_hook! "active_record", "sql_execute", sql: payload[:sql], connection_uuid: connection_id.to_s, params: params, modifiers: modifiers, context_key: strict_context, loose_context_key: loose_context, stack: stack, additional_context_data: context_data reset relation_id end end # Reset per-execution data for a relation. def reset(relation_id) return unless relation_id [:ast_data, :modifiers].each do |type| @relation_data[relation_id][type].clear end end private IGNORE_START_WITH = ['PRAGMA', 'SHOW', 'SAVEPOINT', 'RELEASE', 'ROLLBACK'] IGNORE_SQL = /^(begin|commit|rollback)(?: transaction)$/i def should_ignore?(sql) # Ignore queries generated by ActiveRecord. return sql.start_with?(*IGNORE_START_WITH) || IGNORE_SQL =~ sql end end # Hook into the SQL query execution methods of Rails. # Since all executed queries inside Rails are logged, we hook into the `log` method to catch them all. module QueryExecutionHooks extend ActiveSupport::Concern included do Immunio::Utils.alias_method_chain self, :log, :immunio if method_defined? :log end def log_with_immunio(sql, name = "SQL", binds = [], *args) QueryTracker.instance.call sql: sql, connection_id: object_id, binds: binds # Log and execute the query log_without_immunio(sql, name, binds, *args) { yield } end end end # Hook into quoting methods at the highest level possible in the ancestors chain. # In case the quote methods were overridden in a child class. module ActiveRecord::ConnectionAdapters if defined? Mysql2Adapter Mysql2Adapter.send :include, Immunio::QuotingHooks elsif defined? MysqlAdapter MysqlAdapter.send :include, Immunio::QuotingHooks end if defined? PostgreSQLAdapter PostgreSQLAdapter.send :include, Immunio::QuotingHooks end if defined? SQLite3Adapter SQLite3Adapter.send :include, Immunio::QuotingHooks elsif defined? SQLiteAdapter SQLiteAdapter.send :include, Immunio::QuotingHooks end end module ActiveRecord::Sanitization ClassMethods.send :include, Immunio::SanitizeHooks end Arel::Visitors::ToSql.send :include, Immunio::ArelToSqlHooks ActiveRecord::ConnectionAdapters::AbstractAdapter.send :include, Immunio::QueryExecutionHooks