lib/loaders/spreadsheet_loader.rb in datashift-0.5.0 vs lib/loaders/spreadsheet_loader.rb in datashift-0.6.0
- old
+ new
@@ -10,128 +10,136 @@
# Maps column headings to operations on the model.
# Iterates over all the rows using mapped operations to assign row data to a database object,
# i.e pulls data from each column and sends to object.
#
require 'datashift/exceptions'
-
+
module DataShift
+
+ unless(Guards::jruby?)
+
+ require 'loaders/loader_base'
- require 'loaders/loader_base'
+ module SpreadsheetLoading
- module SpreadsheetLoading
+ gem 'spreadsheet'
+ require 'spreadsheet'
- # Options:
- # [:header_row] : Default is 0. Use alternative row as header definition.
- # [:mandatory] : Array of mandatory column names
- # [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
- # [:sheet_number]
- def perform_spreadsheet_load( file_name, options = {} )
+ # Spreadsheet.client_encoding = 'UTF-8'F
+
+ # Options:
+ # [:header_row] : Default is 0. Use alternative row as header definition.
+ # [:mandatory] : Array of mandatory column names
+ # [:strict] : Raise exception when no mapping found for a column heading (non mandatory)
+ # [:sheet_number]
- @mandatory = options[:mandatory] || []
+ def perform_spreadsheet_load( file_name, options = {} )
- @excel = JExcelFile.new
+ @mandatory = options[:mandatory] || []
- @excel.open(file_name)
+ @excel = Spreadsheet.open file_name
- #if(options[:verbose])
- puts "\n\n\nLoading from Excel file: #{file_name}"
+ #if(options[:verbose])
+ puts "\n\n\nLoading from Excel file: #{file_name}"
- sheet_number = options[:sheet_number] || 0
+ sheet_number = options[:sheet_number] || 0
- @sheet = @excel.sheet( sheet_number )
+ @sheet = @excel.sheet( sheet_number )
- header_row_index = options[:header_row] || 0
- @header_row = @sheet.getRow(header_row_index)
+ header_row_index = options[:header_row] || 0
+ @header_row = @sheet.getRow(header_row_index)
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row)
+ raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" unless(@header_row)
- @headers = []
+ @headers = []
- (0..JExcelFile::MAX_COLUMNS).each do |i|
- cell = @header_row.getCell(i)
- break unless cell
- header = "#{@excel.cell_value(cell).to_s}".strip
- break if header.empty?
- @headers << header
- end
+ (0..JExcelFile::MAX_COLUMNS).each do |i|
+ cell = @header_row.getCell(i)
+ break unless cell
+ header = "#{@excel.cell_value(cell).to_s}".strip
+ break if header.empty?
+ @headers << header
+ end
- raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
+ raise MissingHeadersError, "No headers found - Check Sheet #{@sheet} is complete and Row #{header_row_index} contains headers" if(@headers.empty?)
- # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
- map_headers_to_operators( @headers, options[:strict] , @mandatory )
+ # Create a method_mapper which maps list of headers into suitable calls on the Active Record class
+ map_headers_to_operators( @headers, options)
- load_object_class.transaction do
- @loaded_objects = []
+ load_object_class.transaction do
+ @loaded_objects = []
- (1..@excel.num_rows).collect do |row|
+ (1..@excel.num_rows).collect do |row|
- # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
- # (TODO - write spec to process .xls with a huge number of rows)
- #
- # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
- # got no better idea than ending once we hit the first completely empty row
- break if @excel.sheet.getRow(row).nil?
+ # Excel num_rows seems to return all 'visible' rows, which appears to be greater than the actual data rows
+ # (TODO - write spec to process .xls with a huge number of rows)
+ #
+ # This is rubbish but currently manually detect when actual data ends, this isn't very smart but
+ # got no better idea than ending once we hit the first completely empty row
+ break if @excel.sheet.getRow(row).nil?
- contains_data = false
+ contains_data = false
- # TODO - Smart sorting of column processing order ....
- # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
- # before associations can be processed so user should ensure mandatory columns are prior to associations
+ # TODO - Smart sorting of column processing order ....
+ # Does not currently ensure mandatory columns (for valid?) processed first but model needs saving
+ # before associations can be processed so user should ensure mandatory columns are prior to associations
- # as part of this we also attempt to save early, for example before assigning to
- # has_and_belongs_to associations which require the load_object has an id for the join table
+ # as part of this we also attempt to save early, for example before assigning to
+ # has_and_belongs_to associations which require the load_object has an id for the join table
- # Iterate over the columns method_mapper found in Excel,
- # pulling data out of associated column
- @method_mapper.method_details.each_with_index do |method_detail, col|
+ # Iterate over the columns method_mapper found in Excel,
+ # pulling data out of associated column
+ @method_mapper.method_details.each_with_index do |method_detail, col|
- value = value_at(row, col)
+ value = value_at(row, col)
- contains_data = true unless(value.nil? || value.to_s.empty?)
+ contains_data = true unless(value.nil? || value.to_s.empty?)
- #puts "DEBUG: Excel process METHOD :#{method_detail.inspect}", value.inspect
- prepare_data(method_detail, value)
+ #puts "DEBUG: Excel process METHOD :#{method_detail.inspect}", value.inspect
+ prepare_data(method_detail, value)
- process()
- end
+ process()
+ end
- break unless(contains_data == true)
+ break unless(contains_data == true)
- # TODO - requirements to handle not valid ?
- # all or nothing or carry on and dump out the exception list at end
- #puts "DEBUG: FINAL SAVE #{load_object.inspect}"
- save
- #puts "DEBUG: SAVED #{load_object.inspect}"
+ # TODO - requirements to handle not valid ?
+ # all or nothing or carry on and dump out the exception list at end
+ #puts "DEBUG: FINAL SAVE #{load_object.inspect}"
+ save
+ #puts "DEBUG: SAVED #{load_object.inspect}"
- # don't forget to reset the object or we'll update rather than create
- new_load_object
+ # don't forget to reset the object or we'll update rather than create
+ new_load_object
+ end
end
+ puts "Spreadsheet loading stage complete - #{loaded_objects.size} rows added."
end
- puts "Spreadsheet loading stage complete - #{loaded_objects.size} rows added."
- end
- def value_at(row, column)
- @excel.get_cell_value( @excel.sheet.getRow(row), column)
+ def value_at(row, column)
+ @excel.get_cell_value( @excel.sheet.getRow(row), column)
+ end
end
- end
- class SpreadsheetLoader < LoaderBase
+ class SpreadsheetLoader < LoaderBase
- include SpreadsheetLoading
+ include SpreadsheetLoading
- def initialize(klass, object = nil, options = {})
- super( klass, object, options )
- raise "Cannot load - failed to create a #{klass}" unless @load_object
- end
+ def initialize(klass, object = nil, options = {})
+ super( klass, object, options )
+ raise "Cannot load - failed to create a #{klass}" unless @load_object
+ end
- def perform_load( file_name, options = {} )
- perform_spreadsheet_load( file_name, options )
+ def perform_load( file_name, options = {} )
+ perform_spreadsheet_load( file_name, options )
- puts "Spreadsheet loading stage complete - #{loaded_objects.size} rows added."
- end
+ puts "Spreadsheet loading stage complete - #{loaded_objects.size} rows added."
+ end
- end
+ end
+ end
+
end
\ No newline at end of file