require 'rubygems' gem 'parseexcel', '>= 0.5.2' require 'parseexcel' module Spreadsheet module ParseExcel class Worksheet include Enumerable attr_reader :min_row, :max_row, :min_col, :max_col end end end # Class Excel is derived from class Openoffice. It implements almost all methods # from the Openoffice class in the same way. # Parameter packed: :zip - File is a zip-file class Excel < Openoffice EXCEL_NO_FORMULAS = 'formulas are not supported for excel spreadsheets' def initialize(filename, packed = nil) @tmpdir = "oo_"+$$.to_s unless File.exists?(@tmpdir) FileUtils::mkdir(@tmpdir) end filename = open_from_uri(filename) if filename[0,7] == "http://" filename = unzip(filename) if packed and packed == :zip if filename[-4..-1] != ".xls" warn "are you sure, this is an excel file?" end @filename = filename unless File.file?(@filename) raise IOError, "file #{@filename} does not exist" end @workbook = Spreadsheet::ParseExcel.parse(filename) @default_sheet = nil # no need to set default_sheet if there is only one sheet in the document if self.sheets.size == 1 @default_sheet = self.sheets.first end # @first_row = @last_row = @first_column = @last_column = nil #if ENV["roo_local"] != "thomas-p" FileUtils::rm_r(@tmpdir) #end @first_row = Hash.new @last_row = Hash.new @first_column = Hash.new @last_column = Hash.new @cells_read = Hash.new end def sheets result = [] 0.upto(@workbook.sheet_count - 1) do |i| # TODO: is there a better way to do conversion? result << Iconv.new('utf-8','unicode').iconv( @workbook.worksheet(i).name ) end return result end # sets the working sheet (1,2,3,..) def default_sheet=(n) if n.kind_of?(Fixnum) @default_sheet = n #-1 elsif n.kind_of?(String) raise RangeError if ! self.sheets.include?(n) # parseexcel supports now the name of a sheet @default_sheet = n else raise TypeError, "what are you trying to set as default sheet?" end @first_row[n] = @last_row[n] = @first_column[n] = @last_column[n] = nil @cells_read[n] = false end # returns the content of a cell. The upper left corner is (1,1) or ('A',1) def cell(row,col,sheet=nil) sheet = @default_sheet unless sheet row,col = normalize(row,col) worksheet = @workbook.worksheet(sheet_no(sheet)) skip = 0 line = 1 worksheet.each(skip) { |row_par| if line == row if row_par == nil return nil end cell = row_par.at(col-1) return nil unless cell case cell.type when :numeric then return cell.to_f when :text then return cell.to_s('utf-8') when :date then return cell.date else return nil # cell.to_s('utf-8') end end line += 1 } end # returns the type of a cell: :float, :string, :date def celltype(row,col,sheet=nil) default_sheet_check if sheet == nil sheet = @default_sheet unless sheet row,col = normalize(row,col) worksheet = @workbook.worksheet(sheet_no(sheet)) skip = 0 line = 1 worksheet.each(skip) { |row_par| if line == row return nil unless row_par cell = row_par.at(col-1) return nil unless cell case cell.type when :numeric return :float when :text return :string when :date return :date else return cell.type.to_sym end end line += 1 } end # returns all values in this row as an array # row numbers are 1,2,3,... like in the spreadsheet def row(rownumber,sheet=nil) sheet = @default_sheet unless sheet default_sheet_check worksheet = @workbook.worksheet(sheet_no(sheet)) #therow = worksheet.row(rownumber-1) result = [] worksheet.row(rownumber-1).each {|cell| if cell case cell.type when :numeric then result << cell.to_i when :text then result << cell.to_s('utf-8') when :date then result << cell.date else result << cell.to_s('utf-8') end else result << nil end } return result end # returns all values in this column as an array # column numbers are 1,2,3,... like in the spreadsheet def column(columnnumber,sheet=nil) if columnnumber.class == String columnnumber = Openoffice.letter_to_number(columnnumber) end sheet = @default_sheet unless sheet default_sheet_check worksheet = @workbook.worksheet(sheet_no(sheet)) skip = 0 result = [] worksheet.each(skip) { |row_par| if defined? row_par.at(columnnumber-1) cell = row_par.at(columnnumber-1) #if defined? cell = row_par.at(columnnumber-1) if cell case cell.type when :numeric then result << cell.to_i when :text then result << cell.to_s('utf-8') when :date then result << cell.date else result << cell.to_s('utf-8') end else result << nil end else result << nil end } result end # returns the first non empty column def first_column(sheet=nil) sheet = @default_sheet unless sheet return @first_column[sheet] if @first_column[sheet] fr, lr, fc, lc = get_firsts_lasts(sheet) fc end # returns the last non empty column def last_column(sheet=nil) sheet = @default_sheet unless sheet return @last_column[sheet] if @last_column[sheet] fr, lr, fc, lc = get_firsts_lasts(sheet) lc end # returns the first non empty row def first_row(sheet=nil) sheet = @default_sheet unless sheet return @first_row[sheet] if @first_row[sheet] fr, lr, fc, lc = get_firsts_lasts(sheet) fr end # returns the last non empty row def last_row(sheet=nil) sheet = @default_sheet unless sheet return @last_row[sheet] if @last_row[sheet] fr, lr, fc, lc = get_firsts_lasts(sheet) lr end # true if a cell is empty def empty?(row, col, sheet=nil) sheet = @default_sheet unless sheet row,col = normalize(row,col) return true if row < first_row(sheet) || row > last_row(sheet) || col < first_column(sheet) || col > last_column(sheet) return true unless cell(row, col, sheet) return true if celltype(row, col, sheet) == "string" && cell(row, col, sheet) == "" false end # first non-empty column as a letter def first_column_as_letter(sheet=nil) sheet = @default_sheet unless sheet Openoffice.number_to_letter(first_column(sheet)) end # last non-empty column as a letter def last_column_as_letter(sheet=nil) sheet = @default_sheet unless sheet Openoffice.number_to_letter(last_column(sheet)) end def formula(row,col,sheet=nil) raise EXCEL_NO_FORMULAS end def formula?(row,col,sheet=nil) raise EXCEL_NO_FORMULAS end def formulas(sheet=nil) raise EXCEL_NO_FORMULAS end private # check if default_sheet was set def default_sheet_check raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil end # determine the first and last boundaries def get_firsts_lasts(sheet=nil) sheet = @default_sheet unless sheet fr = fc = 999_999 lr = lc = -999_999 #TODO: worksheet = @workbook.worksheet(sheet_no(@default_sheet)) worksheet = @workbook.worksheet(sheet_no(sheet)) if false #============================================================= if @filename == "test/false_encoding.xls" #assert_instance_of(Spreadsheet::ParseExcel::Worksheet, worksheet) p worksheet.class p worksheet.methods p '### min_row: '+worksheet.min_row.to_s p '### max_row: '+worksheet.max_row.to_s p '### min_col: '+worksheet.min_col.to_s p '### max_col: '+worksheet.max_col.to_s end if @filename == "test/false_encoding.xls" p worksheet end skip = 0 line = 1 worksheet.each(skip) { |row_par| if row_par if @filename == "test/false_encoding.xls" p row_par end row_par.each_with_index {|cell,i| # nicht beruecksichtigen, wenn nil und vorher noch nichts war if cell fc = [fc, i+1].min lc = [lc, i+1].max fr = [fr, line].min lr = [lr, line].max end } end line += 1 } end #============================================================= fr = worksheet.min_row + 1 lr = worksheet.max_row + 1 fc = worksheet.min_col + 1 lc = worksheet.max_col + 1 # 2007-11-05 BEGIN # parsexcel liefert (mir unverstaendlich) eine Zeile als letzte Zeile # zurueck, die aber leer ist. Deshalb Korrekturfunktion, die wirklich # die letzte nicht leere Zeile liefert. while empty_row? row(lr,sheet) lr -= 1 end # 2007-11-05 END @first_row[sheet] = fr @last_row[sheet] = lr @first_column[sheet] = fc @last_column[sheet] = lc return fr, lr, fc, lc end # converts name of a sheet to index (0,1,2,..) def sheet_no(name) return name-1 if name.kind_of?(Fixnum) 0.upto(@workbook.sheet_count - 1) do |i| # TODO: is there a better way to do conversion? return i if name == Iconv.new('utf-8','unicode').iconv( @workbook.worksheet(i).name ) end raise StandardError, "sheet '#{name}' not found" end def write_csv_content(file=nil,sheet=nil) file = STDOUT unless file sheet = @default_sheet unless sheet worksheet = @workbook.worksheet(sheet_no(sheet)) skip = 0 worksheet.each(skip) { |row_par| 1.upto(row_par.length) {|col| file.print(",") if col > 1 cell = row_par.at(col-1) unless cell empty = true else case cell.type when :numeric onecelltype = :float onecell = cell.to_f when :text onecelltype = :string onecell = cell.to_s('utf-8') when :date onecelltype = :date onecell = cell.date else onecelltype = nil onecell = nil end end file.print one_cell_output(onecelltype,onecell,empty) } file.print("\n") } end def empty_row?(row) content = false row.each {|elem| if elem != '' #if elem.class == String and elem.size > 0 content = true end } ! content end end