module ETL #:nodoc: module Transform #:nodoc: # Transform which looks up the value and replaces it with a foriegn key reference class ForeignKeyLookupTransform < ETL::Transform::Transform # The resolver to use if the foreign key is not found in the collection attr_accessor :resolver # The default foreign key to use if none is found. attr_accessor :default # Initialize the foreign key lookup transform. # # Configuration options: # *:collection: A Hash of natural keys mapped to surrogate keys. If this is not specified then # an empty Hash will be used. This Hash will be used to cache values that have been resolved already # for future use. # *:resolver: Object or Class which implements the method resolve(value) # *:default: A default foreign key to use if no foreign key is found def initialize(control, name, configuration={}) super @collection = (configuration[:collection] || {}) @resolver = configuration[:resolver] @resolver = @resolver.new if @resolver.is_a?(Class) @default = configuration[:default] if configuration[:cache] ||= true if resolver.respond_to?(:load_cache) resolver.load_cache else ETL::Engine.logger.info "#{resolver.class.name} does not support caching" end end end # Transform the value by resolving it to a foriegn key def transform(name, value, row) fk = @collection[value] unless fk raise ResolverError, "Foreign key for #{value} not found and no resolver specified" unless resolver raise ResolverError, "Resolver does not appear to respond to resolve method" unless resolver.respond_to?(:resolve) fk = resolver.resolve(value) fk ||= @default raise ResolverError, "Unable to resolve #{value} to foreign key for #{name} in row #{ETL::Engine.rows_read}. You may want to specify a :default value." unless fk @collection[value] = fk end fk end end # Alias class name for the ForeignKeyLookupTransform. class FkLookupTransform < ForeignKeyLookupTransform; end end end # Resolver which resolves using ActiveRecord. class ActiveRecordResolver # The ActiveRecord class to use attr_accessor :ar_class # The find method to use (as a symbol) attr_accessor :find_method # Initialize the resolver. The ar_class argument should extend from # ActiveRecord::Base. The find_method argument must be a symbol for the # finder method used. For example: # # ActiveRecordResolver.new(Person, :find_by_name) # # Note that the find method defined must only take a single argument. def initialize(ar_class, find_method) @ar_class = ar_class @find_method = find_method end # Resolve the value def resolve(value) rec = ar_class.__send__(find_method, value) rec.nil? ? nil : rec.id end end class SQLResolver # Initialize the SQL resolver. Use the given table and field name to search # for the appropriate foreign key. The field should be the name of a natural # key that is used to locate the surrogate key for the record. # # The connection argument is optional. If specified it can be either a symbol # referencing a connection defined in the ETL database.yml file or an actual # ActiveRecord connection instance. If the connection is not specified then # the ActiveRecord::Base.connection will be used. def initialize(table, field, connection=nil) @table = table @field = field @connection = (connection.respond_to?(:quote) ? connection : ETL::Engine.connection(connection)) if connection @connection ||= ActiveRecord::Base.connection end def resolve(value) if @use_cache cache[value] else q = "SELECT id FROM #{table_name} WHERE #{@field} = #{@connection.quote(value)}" ETL::Engine.logger.debug("Executing query: #{q}") @connection.select_value(q) end end def table_name ETL::Engine.table(@table, @connection) end def cache @cache ||= {} end def load_cache @use_cache = true q = "SELECT id, #{@field} FROM #{table_name}" @connection.select_all(q).each do |record| cache[record[@field]] = record['id'] end end end class FlatFileResolver # Initialize the flat file resolver. Expects to open a comma-delimited file. # Returns the column with the given result_field_index. # # The matches argument is a Hash with the key as the column index to search and # the value of the Hash as a String to match exactly. It will only match the first # result. def initialize(file, match_index, result_field_index) @file = file @match_index = match_index @result_field_index = result_field_index end # Get the rows from the file specified in the initializer. def rows @rows ||= FasterCSV.read(@file) end protected :rows # Match the row field from the column indicated by the match_index with the given # value and return the field value from the column identified by the result_field_index. def resolve(value) rows.each do |row| #puts "checking #{row.inspect} for #{value}" if row[@match_index] == value #puts "match found!, returning #{row[@result_field_index]}" return row[@result_field_index] end end nil end end