lib/loaders/loader_base.rb in datashift-0.0.1 vs lib/loaders/loader_base.rb in datashift-0.0.2

- old
+ new

@@ -1,332 +1,404 @@ -# Copyright:: (c) Autotelik Media Ltd 2011 -# Author :: Tom Statter -# Date :: Aug 2010 -# License:: MIT -# -# Details:: Base class for loaders, providing a process hook which populates a model, -# based on a method map and supplied value from a file - i.e a single column/row's string value. -# Note that although a single column, the string can be formatted to contain multiple values. -# -# Tightly coupled with MethodMapper classes (in lib/engine) which contains full details of -# a file's column and it's correlated AR associations. -# -module DataShift - - require 'datashift/method_mapper' - - class LoaderBase - - attr_reader :headers - - attr_accessor :method_mapper - - attr_accessor :load_object_class, :load_object - attr_accessor :current_value, :current_method_detail - - attr_accessor :loaded_objects, :failed_objects - - attr_accessor :options - - # Support multiple associations being added to a base object to be specified in a single column. - # - # Entry represents the association to find via supplied name, value to use in the lookup. - # Can contain multiple lookup name/value pairs, separated by multi_assoc_delim ( | ) - # - # Default syntax : - # - # Name1:value1, value2|Name2:value1, value2, value3|Name3:value1, value2 - # - # E.G. - # Association Properties, has a column named Size, and another called Colour, - # and this combination could be used to lookup multiple associations to add to the main model Jumper - # - # Size:small # => generates find_by_size( 'small' ) - # Size:large # => generates find_by_size( 'large' ) - # Colour:red,green,blue # => generates find_all_by_colour( ['red','green','blue'] ) - # - # Size:large|Size:medium|Size:large - # => Find 3 different associations, perform lookup via column called Size - # => Jumper.properties << [ small, medium, large ] - # - def self.name_value_delim - @name_value_delim ||= ':' - @name_value_delim - end - - def self.set_name_value_delim(x) @name_value_delim = x; end - # TODO - support embedded object creation/update via hash (which hopefully we should be able to just forward to AR) - # - # |Category| - # name:new{ :date => '20110102', :owner = > 'blah'} - # - - - def self.multi_value_delim - @multi_value_delim ||= ',' - @multi_value_delim - end - - def self.set_multi_value_delim(x) @multi_value_delim = x; end - - # TODO - support multi embedded object creation/update via hash (which hopefully we should be able to just forward to AR) - # - # |Category| - # name:new{ :a => 1, :b => 2}|name:medium{ :a => 6, :b => 34}|name:old{ :a => 12, :b => 67} - # - def self.multi_assoc_delim - @multi_assoc_delim ||= '|' - @multi_assoc_delim - end - - def self.set_multi_assoc_delim(x) @multi_assoc_delim = x; end - - # Options - # :instance_methods => true - - def initialize(object_class, object = nil, options = {}) - @load_object_class = object_class - - # Gather list of all possible 'setter' methods on AR class (instance variables and associations) - DataShift::MethodMapper.find_operators( @load_object_class, :reload => true, :instance_methods => options[:instance_methods] ) - - @method_mapper = DataShift::MethodMapper.new - @options = options.clone - @headers = [] - - @default_values = {} - @prefixes = {} - @postfixes = {} - - reset(object) - end - - - # kinda the derived classes interface - best way in Ruby ? - def perform_load( input, options = {} ) - raise "WARNING- ABSTRACT METHOD CALLED - Please implement perform_load()" - end - - - # Core API - Given a list of free text column names from a file, map all headers to - # method mapper's operator list. - # Options: - # strict : report any header values that can't be mapped as an error - # - def map_headers_to_operators( headers, strict, mandatory = []) - @headers = headers - - @method_mapper.populate_methods( load_object_class, @headers ) - - unless(@method_mapper.missing_methods.empty?) - puts "WARNING: Following column headings could not be mapped : #{@method_mapper.missing_methods.inspect}" - raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict) - end - - unless(@method_mapper.contains_mandatory?(mandatory) ) - @method_mapper.missing_mandatory(mandatory).each { |e| puts "ERROR: Mandatory column missing - expected column '#{e}'" } - raise MissingMandatoryError, "Mandatory columns missing - please fix and retry." - end unless(mandatory.empty?) - end - - - # Core API - Given a free text column name from a file, search method mapper for - # associated operator on base object class. - # - # If suitable association found, process row data and then assign to current load_object - def find_and_process(column_name, data) - method_detail = MethodMapper.find_method_detail( load_object_class, column_name ) - - if(method_detail) - prepare_data(method_detail, data) - process() - else - @load_object.errors.add_base( "No matching method found for column #{column_name}") - end - end - - - # Set member variables to hold detsails and value. - # - # Check supplied value, validate it, and if required : - # set to any provided default value - # prepend or append with any provided extensions - def prepare_data(method_detail, value) - - @current_value = value - - @current_method_detail = method_detail - - operator = method_detail.operator - - if(default_value(operator) && (value.nil? || value.to_s.empty?)) - @current_value = default_value(operator) - end - - @current_value = "#{prefixes(operator)}#{@current_value}" if(prefixes(operator)) - @current_value = "#{@current_value}#{postfixes(operator)}" if(postfixes(operator)) - - @current_value - end - - - # Process a value string from a column. - # Assigning value(s) to correct association on @load_object. - # Method detail represents a column from a file and it's correlated AR associations. - # Value string which may contain multiple values for a collection association. - # - def process() - - if(@current_method_detail.operator_for(:has_many)) - - if(@current_method_detail.operator_class && @current_value) - - # there are times when we need to save early, for example before assigning to - # has_and_belongs_to associations which require the load_object has an id for the join table - - save_if_new - - # A single column can contain multiple associations delimited by special char - columns = @current_value.to_s.split( LoaderBase::multi_assoc_delim) - - # Size:large|Colour:red,green,blue => generates find_by_size( 'large' ) and find_all_by_colour( ['red','green','blue'] ) - - columns.each do |assoc| - operator, values = assoc.split(LoaderBase::name_value_delim) - - lookups = values.split(LoaderBase::multi_value_delim) - - if(lookups.size > 1) - - @current_value = @current_method_detail.operator_class.send("find_all_by_#{operator}", lookups ) - - unless(lookups.size == @current_value.size) - found = @current_value.collect {|f| f.send(operator) } - @load_object.errors.add( method_detail.operator, "Association with key(s) #{(lookups - found).inspect} NOT found") - puts "WARNING: Association with key(s) #{(lookups - found).inspect} NOT found - Not added." - next if(@current_value.empty?) - end - - else - - @current_value = @current_method_detail.operator_class.send("find_by_#{operator}", lookups ) - - unless(@current_value) - @load_object.errors.add( @current_method_detail.operator, "Association with key #{lookups} NOT found") - puts "WARNING: Association with key #{lookups} NOT found - Not added." - next - end - - end - - # Lookup Assoc's Model done, now add the found value(s) to load model's collection - @current_method_detail.assign(@load_object, @current_value) - end - end - # END HAS_MANY - else - # Nice n simple straight assignment to a column variable - #puts "INFO: LOADER BASE processing #{method_detail.name}" - @current_method_detail.assign(@load_object, @current_value) - end - end - - def save - #puts "DEBUG: SAVING #{load_object.class} : #{load_object.inspect}" #if(options[:verbose]) - begin - result = @load_object.save - #puts "DEBUG: SAVED [#{result.inspect}]" - #puts "SAVED 2. #{load_object.errors.methods.inspect}" - #puts "SAVED 3. #{load_object.errors.full_messages.inspect}" - @loaded_objects << @load_object unless(@loaded_objects.include?(@load_object)) - - return result - rescue => e - @failed_objects << @load_object unless( !load_object.new_record? || @failed_objects.include?(@load_object)) - puts "Error saving #{@load_object.class} : #{e.inspect}" - puts e.backtrace - raise "Error in save whilst processing column #{@current_method_detail.name}" if(@options[:strict]) - end - end - - def set_default_value( name, value ) - @default_values[name] = value - end - - def default_value(name) - @default_values[name] - end - - def set_prefix( name, value ) - @prefixes[name] = value - end - - def prefixes(name) - @prefixes[name] - end - - def set_postfix( name, value ) - @postfixes[name] = value - end - - def postfixes(name) - @postfixes[name] - end - - - # Reset the loader, including database object to be populated, and load counts - # - def reset(object = nil) - @load_object = object || new_load_object - @loaded_objects, @failed_objects = [],[] - @current_value = nil - end - - - def new_load_object - @load_object = @load_object_class.new - @load_object - end - - def abort_on_failure? - @options[:abort_on_failure] == 'true' - end - - def loaded_count - @loaded_objects.size - end - - def failed_count - @failed_objects.size - end - - - # Check whether headers contains supplied list - def headers_contain_mandatory?( mandatory_list ) - [ [*mandatory_list] - @headers].flatten.empty? - end - - - # Check whether headers contains supplied list - def missing_mandatory_headers( mandatory_list ) - [ [*mandatory_list] - @headers].flatten - end - - def find_or_new( klass, condition_hash = {} ) - @records[klass] = klass.find(:all, :conditions => condition_hash) - if @records[klass].any? - return @records[klass].first - else - return klass.new - end - end - - private - - def save_if_new - #puts "SAVE", load_object.inspect - save if(load_object.valid? && load_object.new_record?) - end - - end - +# Copyright:: (c) Autotelik Media Ltd 2011 +# Author :: Tom Statter +# Date :: Aug 2010 +# License:: MIT +# +# Details:: Base class for loaders, providing a process hook which populates a model, +# based on a method map and supplied value from a file - i.e a single column/row's string value. +# Note that although a single column, the string can be formatted to contain multiple values. +# +# Tightly coupled with MethodMapper classes (in lib/engine) which contains full details of +# a file's column and it's correlated AR associations. +# +module DataShift + + require 'datashift/method_mapper' + + class LoaderBase + + + include DataShift::Logging + + attr_reader :headers + + attr_accessor :method_mapper + + attr_accessor :load_object_class, :load_object + attr_accessor :current_value, :current_method_detail + + attr_accessor :loaded_objects, :failed_objects + + attr_accessor :options + + # Support multiple associations being added to a base object to be specified in a single column. + # + # Entry represents the association to find via supplied name, value to use in the lookup. + # Can contain multiple lookup name/value pairs, separated by multi_assoc_delim ( | ) + # + # Default syntax : + # + # Name1:value1, value2|Name2:value1, value2, value3|Name3:value1, value2 + # + # E.G. + # Association Properties, has a column named Size, and another called Colour, + # and this combination could be used to lookup multiple associations to add to the main model Jumper + # + # Size:small # => generates find_by_size( 'small' ) + # Size:large # => generates find_by_size( 'large' ) + # Colour:red,green,blue # => generates find_all_by_colour( ['red','green','blue'] ) + # + # Size:large|Size:medium|Size:large + # => Find 3 different associations, perform lookup via column called Size + # => Jumper.properties << [ small, medium, large ] + # + def self.name_value_delim + @name_value_delim ||= ':' + @name_value_delim + end + + def self.set_name_value_delim(x) @name_value_delim = x; end + # TODO - support embedded object creation/update via hash (which hopefully we should be able to just forward to AR) + # + # |Category| + # name:new{ :date => '20110102', :owner = > 'blah'} + # + + + def self.multi_value_delim + @multi_value_delim ||= ',' + @multi_value_delim + end + + def self.set_multi_value_delim(x) @multi_value_delim = x; end + + # TODO - support multi embedded object creation/update via hash (which hopefully we should be able to just forward to AR) + # + # |Category| + # name:new{ :a => 1, :b => 2}|name:medium{ :a => 6, :b => 34}|name:old{ :a => 12, :b => 67} + # + def self.multi_assoc_delim + @multi_assoc_delim ||= '|' + @multi_assoc_delim + end + + + def self.set_multi_assoc_delim(x) @multi_assoc_delim = x; end + + # Options + # :instance_methods => true + + def initialize(object_class, object = nil, options = {}) + @load_object_class = object_class + + # Gather list of all possible 'setter' methods on AR class (instance variables and associations) + DataShift::MethodMapper.find_operators( @load_object_class, :reload => true, :instance_methods => options[:instance_methods] ) + + @method_mapper = DataShift::MethodMapper.new + @options = options.clone + @headers = [] + + @default_data_objects ||= {} + + @default_values = {} + @override_values = {} + + @prefixes = {} + @postfixes = {} + + reset(object) + end + + + # kinda the derived classes interface - best way in Ruby ? + def perform_load( input, options = {} ) + raise "WARNING- ABSTRACT METHOD CALLED - Please implement perform_load()" + end + + + # Core API - Given a list of free text column names from a file, map all headers to + # method mapper's operator list. + # Options: + # strict : report any header values that can't be mapped as an error + # + def map_headers_to_operators( headers, strict, mandatory = []) + @headers = headers + + @method_mapper.populate_methods( load_object_class, @headers ) + + unless(@method_mapper.missing_methods.empty?) + puts "WARNING: Following column headings could not be mapped : #{@method_mapper.missing_methods.inspect}" + raise MappingDefinitionError, "Missing mappings for columns : #{@method_mapper.missing_methods.join(",")}" if(strict) + end + + unless(@method_mapper.contains_mandatory?(mandatory) ) + @method_mapper.missing_mandatory(mandatory).each { |e| puts "ERROR: Mandatory column missing - expected column '#{e}'" } + raise MissingMandatoryError, "Mandatory columns missing - please fix and retry." + end unless(mandatory.empty?) + end + + + # Core API - Given a free text column name from a file, search method mapper for + # associated operator on base object class. + # + # If suitable association found, process row data and then assign to current load_object + def find_and_process(column_name, data) + method_detail = MethodMapper.find_method_detail( load_object_class, column_name ) + + if(method_detail) + prepare_data(method_detail, data) + process() + else + @load_object.errors.add_base( "No matching method found for column #{column_name}") + end + end + + + # Default values can be provided in YAML config file + # Format : + # Load Class + # atttribute: value + + def configure_from( yaml_file ) + + data = YAML::load( File.open(yaml_file) ) + + + # TODO - MOVE DEFAULTS TO OWN MODULE + # decorate the loading class with the defaults/ove rides to manage itself + # IDEAS ..... + # + #unless(@default_data_objects[load_object_class]) + # + # @default_data_objects[load_object_class] = load_object_class.new + + # default_data_object = @default_data_objects[load_object_class] + + + # default_data_object.instance_eval do + # def datashift_defaults=(hash) + # @datashift_defaults = hash + # end + # def datashift_defaults + # @datashift_defaults + # end + #end unless load_object_class.respond_to?(:datashift_defaults) + #end + + #puts load_object_class.new.to_yaml + + puts data.inspect + + if(data[load_object_class.name]) + + deflts = data[load_object_class.name]['datashift_defaults'] + @default_values.merge!(deflts) if deflts + + ovrides = data[load_object_class.name]['datashift_overrides'] + @override_values.merge!(ovrides) if ovrides + end + + end + + # Set member variables to hold details and value. + # + # Check supplied value, validate it, and if required : + # set to any provided default value + # prepend or append with any provided extensions + def prepare_data(method_detail, value) + + @current_value = value + + @current_method_detail = method_detail + + operator = method_detail.operator + + override_value(operator) + + if((value.nil? || value.to_s.empty?) && default_value(operator)) + @current_value = default_value(operator) + end + + @current_value = "#{prefixes(operator)}#{@current_value}" if(prefixes(operator)) + @current_value = "#{@current_value}#{postfixes(operator)}" if(postfixes(operator)) + + @current_value + end + + + # Process a value string from a column. + # Assigning value(s) to correct association on @load_object. + # Method detail represents a column from a file and it's correlated AR associations. + # Value string which may contain multiple values for a collection association. + # + def process() + + if(@current_method_detail.operator_for(:has_many)) + + if(@current_method_detail.operator_class && @current_value) + + # there are times when we need to save early, for example before assigning to + # has_and_belongs_to associations which require the load_object has an id for the join table + + save_if_new + + # A single column can contain multiple associations delimited by special char + columns = @current_value.to_s.split( LoaderBase::multi_assoc_delim) + + # Size:large|Colour:red,green,blue => generates find_by_size( 'large' ) and find_all_by_colour( ['red','green','blue'] ) + + columns.each do |assoc| + operator, values = assoc.split(LoaderBase::name_value_delim) + + lookups = values.split(LoaderBase::multi_value_delim) + + if(lookups.size > 1) + + @current_value = @current_method_detail.operator_class.send("find_all_by_#{operator}", lookups ) + + unless(lookups.size == @current_value.size) + found = @current_value.collect {|f| f.send(operator) } + @load_object.errors.add( method_detail.operator, "Association with key(s) #{(lookups - found).inspect} NOT found") + puts "WARNING: Association with key(s) #{(lookups - found).inspect} NOT found - Not added." + next if(@current_value.empty?) + end + + else + + @current_value = @current_method_detail.operator_class.send("find_by_#{operator}", lookups ) + + unless(@current_value) + @load_object.errors.add( @current_method_detail.operator, "Association with key #{lookups} NOT found") + puts "WARNING: Association with key #{lookups} NOT found - Not added." + next + end + + end + + # Lookup Assoc's Model done, now add the found value(s) to load model's collection + @current_method_detail.assign(@load_object, @current_value) + end + end + # END HAS_MANY + else + # Nice n simple straight assignment to a column variable + #puts "INFO: LOADER BASE processing #{method_detail.name}" + @current_method_detail.assign(@load_object, @current_value) + end + end + + def failure + @failed_objects << @load_object unless( !load_object.new_record? || @failed_objects.include?(@load_object)) + end + + def save + #puts "DEBUG: SAVING #{load_object.class} : #{load_object.inspect}" #if(options[:verbose]) + begin + result = @load_object.save + + @loaded_objects << @load_object unless(@loaded_objects.include?(@load_object)) + + return result + rescue => e + failure + puts "Error saving #{@load_object.class} : #{e.inspect}" + logger.error e.backtrace + raise "Error in save whilst processing column #{@current_method_detail.name}" if(@options[:strict]) + end + end + + def self.default_object_for( klass ) + @default_data_objects ||= {} + @default_data_objects[klass] + end + + def set_default_value( name, value ) + @default_values[name] = value + end + + def set_override_value( operator, value ) + @override_values[operator] = value + end + + def default_value(name) + @default_values[name] + end + + def override_value( operator ) + @current_value = @override_values[operator] if(@override_values[operator]) + end + + + def set_prefix( name, value ) + @prefixes[name] = value + end + + def prefixes(name) + @prefixes[name] + end + + def set_postfix( name, value ) + @postfixes[name] = value + end + + def postfixes(name) + @postfixes[name] + end + + + # Reset the loader, including database object to be populated, and load counts + # + def reset(object = nil) + @load_object = object || new_load_object + @loaded_objects, @failed_objects = [],[] + @current_value = nil + end + + + def new_load_object + @load_object = @load_object_class.new + @load_object + end + + def abort_on_failure? + @options[:abort_on_failure] == 'true' + end + + def loaded_count + @loaded_objects.size + end + + def failed_count + @failed_objects.size + end + + + # Check whether headers contains supplied list + def headers_contain_mandatory?( mandatory_list ) + [ [*mandatory_list] - @headers].flatten.empty? + end + + + # Check whether headers contains supplied list + def missing_mandatory_headers( mandatory_list ) + [ [*mandatory_list] - @headers].flatten + end + + def find_or_new( klass, condition_hash = {} ) + @records[klass] = klass.find(:all, :conditions => condition_hash) + if @records[klass].any? + return @records[klass].first + else + return klass.new + end + end + + private + + def save_if_new + #puts "SAVE", load_object.inspect + save if(load_object.valid? && load_object.new_record?) + end + + end + end \ No newline at end of file