require 'rubygems' gem 'parseexcel', '>= 0.5.2' require 'parseexcel' CHARGUESS=false require 'charguess' if CHARGUESS module Spreadsheet # :nodoc module ParseExcel class Worksheet include Enumerable attr_reader :min_row, :max_row, :min_col, :max_col end end end # Class for handling Excel-Spreadsheets class Excel < GenericSpreadsheet EXCEL_NO_FORMULAS = 'formulas are not supported for excel spreadsheets' # Creates a new Excel spreadsheet object. # Parameter packed: :zip - File is a zip-file def initialize(filename, packed = nil) @tmpdir = "oo_"+$$.to_s unless File.exists?(@tmpdir) FileUtils::mkdir(@tmpdir) end filename = open_from_uri(filename) if filename[0,7] == "http://" filename = unzip(filename) if packed and packed == :zip if File.extname(filename) != ".xls" warn "are you sure, this is an excel file?" end @filename = filename begin unless File.file?(@filename) raise IOError, "file #{@filename} does not exist" end @workbook = Spreadsheet::ParseExcel.parse(filename) @default_sheet = nil # no need to set default_sheet if there is only one sheet in the document if self.sheets.size == 1 @default_sheet = self.sheets.first end # @first_row = @last_row = @first_column = @last_column = nil #if ENV["roo_local"] != "thomas-p" ensure FileUtils::rm_r(@tmpdir) #end end @first_row = Hash.new @last_row = Hash.new @first_column = Hash.new @last_column = Hash.new @cells_read = Hash.new end # returns an array of sheet names in the spreadsheet def sheets result = [] 0.upto(@workbook.sheet_count - 1) do |i| # TODO: is there a better way to do conversion? if CHARGUESS encoding = CharGuess::guess(@workbook.worksheet(i).name) encoding = 'unicode' unless encoding result << Iconv.new('utf-8',encoding).iconv( @workbook.worksheet(i).name ) else # result << platform_specific_iconv(inject_null_characters(@workbook.worksheet(i).name)) result << platform_specific_iconv(@workbook.worksheet(i).name) #case RUBY_PLATFORM.downcase #when /darwin/ # result << Iconv.new('utf-8','utf-8').iconv( # @workbook.worksheet(i).name # ) #else # result << Iconv.new('utf-8','unicode').iconv( # @workbook.worksheet(i).name # ) #end # case end end return result end #TODO: testing only def inject_null_characters(str) if str.class != String return str end new_str='' 0.upto(str.size-1) do |i| new_str += str[i,1] new_str += "\000" end new_str end # returns the content of a cell. The upper left corner is (1,1) or ('A',1) def cell(row,col,sheet=nil) sheet = @default_sheet unless sheet row,col = normalize(row,col) worksheet = @workbook.worksheet(sheet_no(sheet)) skip = 0 line = 1 worksheet.each(skip) { |row_par| #TODO: nicht jedesmal durch alle Zeilen gehen, sonder aus interner Repraesentation holen? if line == row if row_par == nil return nil end cell = row_par.at(col-1) return nil unless cell case cell.type when :numeric then return cell.to_f when :text then return cell.to_s('utf-8') # when :date then return cell.date when :date if cell.to_s.to_f < 1.0 f = cell.to_s.to_f*24.0*60.0*60.0 secs = f.round h = (secs / 3600.0).floor secs = secs - 3600*h m = (secs / 60.0).floor secs = secs - 60*m s = secs return h*3600+m*60+s else return cell.date end else return nil # cell.to_s('utf-8') end end line += 1 } end # returns the type of a cell: :float, :string, :date, :time def celltype(row,col,sheet=nil) sheet = @default_sheet unless sheet row,col = normalize(row,col) worksheet = @workbook.worksheet(sheet_no(sheet)) skip = 0 line = 1 worksheet.each(skip) { |row_par| if line == row return nil unless row_par cell = row_par.at(col-1) return nil unless cell case cell.type when :numeric return :float when :text return :string when :date if cell.to_s.to_f < 1.0 return :time else return :date end else return cell.type.to_sym end end line += 1 } end # returns all values in this row as an array # row numbers are 1,2,3,... like in the spreadsheet def row(rownumber,sheet=nil) sheet = @default_sheet unless sheet worksheet = @workbook.worksheet(sheet_no(sheet)) #therow = worksheet.row(rownumber-1) result = [] worksheet.row(rownumber-1).each {|cell| if cell case cell.type when :numeric then result << cell.to_i when :text then result << cell.to_s('utf-8') when :date then result << cell.date else result << cell.to_s('utf-8') end else result << nil end } return result end # returns all values in this column as an array # column numbers are 1,2,3,... like in the spreadsheet def column(columnnumber,sheet=nil) if columnnumber.class == String columnnumber = Openoffice.letter_to_number(columnnumber) end sheet = @default_sheet unless sheet worksheet = @workbook.worksheet(sheet_no(sheet)) skip = 0 result = [] worksheet.each(skip) { |row_par| if defined? row_par.at(columnnumber-1) cell = row_par.at(columnnumber-1) #if defined? cell = row_par.at(columnnumber-1) if cell case cell.type when :numeric then result << cell.to_i when :text then result << cell.to_s('utf-8') when :date then result << cell.date else result << cell.to_s('utf-8') end else result << nil end else result << nil end } result end # returns the first non empty column def first_column(sheet=nil) sheet = @default_sheet unless sheet return @first_column[sheet] if @first_column[sheet] fr, lr, fc, lc = get_firsts_lasts(sheet) fc end # returns the last non empty column def last_column(sheet=nil) sheet = @default_sheet unless sheet return @last_column[sheet] if @last_column[sheet] fr, lr, fc, lc = get_firsts_lasts(sheet) lc end # returns the first non empty row def first_row(sheet=nil) sheet = @default_sheet unless sheet return @first_row[sheet] if @first_row[sheet] fr, lr, fc, lc = get_firsts_lasts(sheet) fr end # returns the last non empty row def last_row(sheet=nil) sheet = @default_sheet unless sheet return @last_row[sheet] if @last_row[sheet] fr, lr, fc, lc = get_firsts_lasts(sheet) lr end # returns NO formula in excel spreadsheets def formula(row,col,sheet=nil) raise EXCEL_NO_FORMULAS end # raises an exception because formulas are not supported for excel files def formula?(row,col,sheet=nil) raise EXCEL_NO_FORMULAS end # returns NO formulas in excel spreadsheets def formulas(sheet=nil) raise EXCEL_NO_FORMULAS end private # determine the first and last boundaries def get_firsts_lasts(sheet=nil) sheet = @default_sheet unless sheet fr = fc = 999_999 lr = lc = -999_999 worksheet = @workbook.worksheet(sheet_no(sheet)) unless worksheet.min_row fr = nil else fr = worksheet.min_row + 1 end unless worksheet.max_row lr = nil else lr = worksheet.max_row + 1 end unless worksheet.min_col fc = nil else fc = worksheet.min_col + 1 end unless worksheet.max_col lc = nil else lc = worksheet.max_col + 1 end if lr # 2007-11-05 BEGIN # parsexcel liefert (mir unverstaendlich) eine Zeile als letzte Zeile # zurueck, die aber leer ist. Deshalb Korrekturfunktion, die wirklich # die letzte nicht leere Zeile liefert. while empty_row? row(lr,sheet) lr -= 1 end end # 2007-11-05 END @first_row[sheet] = fr @last_row[sheet] = lr @first_column[sheet] = fc @last_column[sheet] = lc return fr, lr, fc, lc end # converts name of a sheet to index (0,1,2,..) def sheet_no(name) return name-1 if name.kind_of?(Fixnum) 0.upto(@workbook.sheet_count - 1) do |i| # TODO: is there a better way to do conversion? return i if name == platform_specific_iconv( @workbook.worksheet(i).name) #Iconv.new('utf-8','unicode').iconv( # @workbook.worksheet(i).name # ) end raise StandardError, "sheet '#{name}' not found" end def empty_row?(row) content = false row.each {|elem| if elem != '' #if elem.class == String and elem.size > 0 content = true end } ! content end def platform_specific_iconv(value) case RUBY_PLATFORM.downcase when /darwin/ result = Iconv.new('utf-8','utf-8').iconv(value) when /solaris/ result = Iconv.new('utf-8','utf-8').iconv(value) when /mswin32/ result = Iconv.new('utf-8','iso-8859-1').iconv(value) else result = Iconv.new('utf-8','unicode').iconv(value) end # case if every_second_null?(result) result = remove_every_second_null(result) end result end def every_second_null?(str) result = true 0.upto(str.length/2-1) do |i| c = str[i*2,1] n = str[i*2+1,1] if n != "\000" result = false break end end result end def remove_every_second_null(str) result = '' 0.upto(str.length/2-1) do |i| c = str[i*2,1] result += c end result end end