lib/loaders/excel_loader.rb in datashift-0.2.1 vs lib/loaders/excel_loader.rb in datashift-0.2.2

- old
+ new

@@ -1,156 +1,156 @@ -# Copyright:: (c) Autotelik Media Ltd 2011 -# Author :: Tom Statter -# Date :: Aug 2011 -# License:: MIT -# -# Details:: Specific loader to support Excel files. -# Note this only requires JRuby, Excel not required, nor Win OLE. -# -# Maps column headings to operations on the model. -# Iterates over all the rows using mapped operations to assign row data to a database object, -# i.e pulls data from each column and sends to object. -# -require 'datashift/exceptions' - - -module DataShift - - if(Guards::jruby?) - - require 'loaders/loader_base' - - require 'java' - require 'jexcel_file' - - module ExcelLoading - - # Options: - # [:header_row] : Default is 0. Use alternative row as header definition. - # [:mandatory] : Array of mandatory column names - # [:strict] : Raise exception when no mapping found for a column heading (non mandatory) - # [:sheet_number] - - def perform_excel_load( file_name, options = {} ) - - @mandatory = options[:mandatory] || [] - - @excel = JExcelFile.new - - @excel.open(file_name) - - #if(options[:verbose]) - puts "\n\n\nLoading from Excel file: #{file_name}" - - sheet_number = options[:sheet_number] || 0 - - @sheet = @excel.sheet( sheet_number ) - - header_row_index = options[:header_row] || 0 - @header_row = @sheet.getRow(header_row_index) - - raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row) - - @headers = [] - - (0..JExcelFile::MAX_COLUMNS).each do |i| - cell = @header_row.getCell(i) - break unless cell - header = "#{@excel.cell_value(cell).to_s}".strip - break if header.empty? - @headers << header - end - - raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?) - - # Create a method_mapper which maps list of headers into suitable calls on the Active Record class - # For example if model has an attribute 'price' will map columns called Price, price, PRICE etc to this attribute - map_headers_to_operators( @headers, options[:strict] , @mandatory ) - - logger.info "Excel Loader prcoessing #{@excel.num_rows} rows" - load_object_class.transaction do - @loaded_objects = [] - - (1..@excel.num_rows).collect do |row| - - # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows - # (TODO - write spec to process .xls with a huge number of rows) - # - # This is rubbish but currently manually detect when actual data ends, this isn't very smart but - # got no better idea than ending once we hit the first completely empty row - break if @excel.sheet.getRow(row).nil? - - contains_data = false - - # TODO - Smart sorting of column processing order .... - # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving - # before associations can be processed so user should ensure mandatory columns are prior to associations - - # as part of this we also attempt to save early, for example before assigning to - # has_and_belongs_to associations which require the load_object has an id for the join table - - # Iterate over the columns method_mapper found in Excel, - # pulling data out of associated column - @method_mapper.method_details.each_with_index do |method_detail, col| - - value = value_at(row, col) - - contains_data = true unless(value.nil? || value.to_s.empty?) - - prepare_data(method_detail, value) - - process() - end - - break unless(contains_data == true) - - # TODO - requirements to handle not valid ? - # all or nothing or carry on and dump out the exception list at end - #puts "DEBUG: FINAL SAVE #{load_object.inspect}" - unless(save) - failure - logger.error "Failed to save row [#{row}]" - logger.error load_object.errors.inspect - else - logger.info "Row #{row} succesfully SAVED : ID #{load_object.id}" - end - - # don't forget to reset the object or we'll update rather than create - new_load_object - - end - end - puts "Excel loading stage complete - #{loaded_objects.size} rows added." - end - - def value_at(row, column) - @excel.get_cell_value( @excel.sheet.getRow(row), column) - end - end - - - class ExcelLoader < LoaderBase - - include ExcelLoading - - def initialize(klass, object = nil, options = {}) - super( klass, object, options ) - raise "Cannot load - failed to create a #{klass}" unless @load_object - end - - - def perform_load( file_name, options = {} ) - perform_excel_load( file_name, options ) - - puts "Excel loading stage complete - #{loaded_objects.size} rows added." - end - - end - - else - - module ExcelLoading - end - - end - +# Copyright:: (c) Autotelik Media Ltd 2011 +# Author :: Tom Statter +# Date :: Aug 2011 +# License:: MIT +# +# Details:: Specific loader to support Excel files. +# Note this only requires JRuby, Excel not required, nor Win OLE. +# +# Maps column headings to operations on the model. +# Iterates over all the rows using mapped operations to assign row data to a database object, +# i.e pulls data from each column and sends to object. +# +require 'datashift/exceptions' + + +module DataShift + + if(Guards::jruby?) + + require 'loaders/loader_base' + + require 'java' + require 'jexcel_file' + + module ExcelLoading + + # Options: + # [:header_row] : Default is 0. Use alternative row as header definition. + # [:mandatory] : Array of mandatory column names + # [:strict] : Raise exception when no mapping found for a column heading (non mandatory) + # [:sheet_number] + + def perform_excel_load( file_name, options = {} ) + + @mandatory = options[:mandatory] || [] + + @excel = JExcelFile.new + + @excel.open(file_name) + + #if(options[:verbose]) + puts "\n\n\nLoading from Excel file: #{file_name}" + + sheet_number = options[:sheet_number] || 0 + + @sheet = @excel.sheet( sheet_number ) + + header_row_index = options[:header_row] || 0 + @header_row = @sheet.getRow(header_row_index) + + raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row) + + @headers = [] + + (0..JExcelFile::MAX_COLUMNS).each do |i| + cell = @header_row.getCell(i) + break unless cell + header = "#{@excel.cell_value(cell).to_s}".strip + break if header.empty? + @headers << header + end + + raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?) + + # Create a method_mapper which maps list of headers into suitable calls on the Active Record class + # For example if model has an attribute 'price' will map columns called Price, price, PRICE etc to this attribute + map_headers_to_operators( @headers, options[:strict] , @mandatory ) + + logger.info "Excel Loader prcoessing #{@excel.num_rows} rows" + load_object_class.transaction do + @loaded_objects = [] + + (1..@excel.num_rows).collect do |row| + + # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows + # (TODO - write spec to process .xls with a huge number of rows) + # + # This is rubbish but currently manually detect when actual data ends, this isn't very smart but + # got no better idea than ending once we hit the first completely empty row + break if @excel.sheet.getRow(row).nil? + + contains_data = false + + # TODO - Smart sorting of column processing order .... + # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving + # before associations can be processed so user should ensure mandatory columns are prior to associations + + # as part of this we also attempt to save early, for example before assigning to + # has_and_belongs_to associations which require the load_object has an id for the join table + + # Iterate over the columns method_mapper found in Excel, + # pulling data out of associated column + @method_mapper.method_details.each_with_index do |method_detail, col| + + value = value_at(row, col) + + contains_data = true unless(value.nil? || value.to_s.empty?) + + prepare_data(method_detail, value) + + process() + end + + break unless(contains_data == true) + + # TODO - requirements to handle not valid ? + # all or nothing or carry on and dump out the exception list at end + #puts "DEBUG: FINAL SAVE #{load_object.inspect}" + unless(save) + failure + logger.error "Failed to save row [#{row}]" + logger.error load_object.errors.inspect + else + logger.info "Row #{row} succesfully SAVED : ID #{load_object.id}" + end + + # don't forget to reset the object or we'll update rather than create + new_load_object + + end + end + puts "Excel loading stage complete - #{loaded_objects.size} rows added." + end + + def value_at(row, column) + @excel.get_cell_value( @excel.sheet.getRow(row), column) + end + end + + + class ExcelLoader < LoaderBase + + include ExcelLoading + + def initialize(klass, object = nil, options = {}) + super( klass, object, options ) + raise "Cannot load - failed to create a #{klass}" unless @load_object + end + + + def perform_load( file_name, options = {} ) + perform_excel_load( file_name, options ) + + puts "Excel loading stage complete - #{loaded_objects.size} rows added." + end + + end + + else + + module ExcelLoading + end + + end + end \ No newline at end of file