lib/roo/excel.rb in roo-0.6.1 vs lib/roo/excel.rb in roo-0.7.0

- old
+ new

@@ -1,11 +1,26 @@ require 'rubygems' gem 'parseexcel', '>= 0.5.2' require 'parseexcel' +module Spreadsheet + module ParseExcel + class Worksheet + include Enumerable + attr_reader :min_row, :max_row, :min_col, :max_col + end + end +end + + +# Class Excel is derived from class Openoffice. It implements almost all methods +# from the Openoffice class in the same way. +# Parameter packed: :zip - File is a zip-file class Excel < Openoffice + EXCEL_NO_FORMULAS = 'formulas are not supported for excel spreadsheets' + def initialize(filename, packed = nil) @tmpdir = "oo_"+$$.to_s unless File.exists?(@tmpdir) FileUtils::mkdir(@tmpdir) end @@ -13,32 +28,39 @@ filename = unzip(filename) if packed and packed == :zip if filename[-4..-1] != ".xls" warn "are you sure, this is an excel file?" end @filename = filename + unless File.file?(@filename) + raise IOError, "file #{@filename} does not exist" + end @workbook = Spreadsheet::ParseExcel.parse(filename) @default_sheet = nil # no need to set default_sheet if there is only one sheet in the document if self.sheets.size == 1 @default_sheet = self.sheets.first end - @first_row = @last_row = @first_column = @last_column = nil + # @first_row = @last_row = @first_column = @last_column = nil + #if ENV["roo_local"] != "thomas-p" + FileUtils::rm_r(@tmpdir) + #end + @first_row = Hash.new + @last_row = Hash.new + @first_column = Hash.new + @last_column = Hash.new + @cells_read = Hash.new end def sheets - #if DateTime.now < Date.new(2007,6,10) - # return ["Tabelle1", "Name of Sheet 2", "Sheet3"] - #else - result = [] - 0.upto(@workbook.sheet_count - 1) do |i| - # TODO: is there a better way to do conversion? - result << Iconv.new('utf-8','unicode').iconv( - @workbook.worksheet(i).name - ) - end - return result - #end + result = [] + 0.upto(@workbook.sheet_count - 1) do |i| + # TODO: is there a better way to do conversion? + result << Iconv.new('utf-8','unicode').iconv( + @workbook.worksheet(i).name + ) + end + return result end # sets the working sheet (1,2,3,..) def default_sheet=(n) if n.kind_of?(Fixnum) @@ -48,18 +70,19 @@ # parseexcel supports now the name of a sheet @default_sheet = n else raise TypeError, "what are you trying to set as default sheet?" end - @first_row = @last_row = @first_column = @last_column = nil - @cells_read = false + @first_row[n] = @last_row[n] = @first_column[n] = @last_column[n] = nil + @cells_read[n] = false end # returns the content of a cell. The upper left corner is (1,1) or ('A',1) - def cell(row,col) + def cell(row,col,sheet=nil) + sheet = @default_sheet unless sheet row,col = normalize(row,col) - worksheet = @workbook.worksheet(sheet_no(@default_sheet)) + worksheet = @workbook.worksheet(sheet_no(sheet)) skip = 0 line = 1 worksheet.each(skip) { |row_par| if line == row if row_par == nil @@ -70,110 +93,202 @@ case cell.type when :numeric then return cell.to_f when :text then return cell.to_s('utf-8') when :date then return cell.date else - return cell.to_s('utf-8') + return nil # cell.to_s('utf-8') end end line += 1 } end - # returns the type of a cell: "float", "string", "date" - def celltype(row,col) + # returns the type of a cell: :float, :string, :date + def celltype(row,col,sheet=nil) + default_sheet_check if sheet == nil + sheet = @default_sheet unless sheet row,col = normalize(row,col) - default_sheet_check - worksheet = @workbook.worksheet(sheet_no(@default_sheet)) + + worksheet = @workbook.worksheet(sheet_no(sheet)) skip = 0 line = 1 worksheet.each(skip) { |row_par| if line == row + return nil unless row_par cell = row_par.at(col-1) return nil unless cell case cell.type - when :numeric then return :float - when :text then return :string - when :date then return :date - else return cell.type.to_sym + when :numeric + return :float + when :text + return :string + when :date + return :date + else + return cell.type.to_sym end end line += 1 } end - # return this row a an array off cells - def row(rownumber) + # returns all values in this row as an array + # row numbers are 1,2,3,... like in the spreadsheet + def row(rownumber,sheet=nil) + sheet = @default_sheet unless sheet default_sheet_check - worksheet = @workbook.worksheet(sheet_no(@default_sheet)) - therow = worksheet.row(rownumber-1) + worksheet = @workbook.worksheet(sheet_no(sheet)) + #therow = worksheet.row(rownumber-1) result = [] - therow.each {|cell| - case cell.type - when :numeric then result << cell.to_i - when :text then result << cell.to_s('utf-8') - when :date then result << cell.date + worksheet.row(rownumber-1).each {|cell| + if cell + case cell.type + when :numeric then result << cell.to_i + when :text then result << cell.to_s('utf-8') + when :date then result << cell.date + else + result << cell.to_s('utf-8') + end else - return result << cell.to_s('utf-8') + result << nil end } return result end + # returns all values in this column as an array + # column numbers are 1,2,3,... like in the spreadsheet + def column(columnnumber,sheet=nil) + if columnnumber.class == String + columnnumber = Openoffice.letter_to_number(columnnumber) + end + sheet = @default_sheet unless sheet + default_sheet_check + worksheet = @workbook.worksheet(sheet_no(sheet)) + skip = 0 + result = [] + worksheet.each(skip) { |row_par| + if defined? row_par.at(columnnumber-1) + cell = row_par.at(columnnumber-1) + #if defined? cell = row_par.at(columnnumber-1) + if cell + case cell.type + when :numeric then result << cell.to_i + when :text then result << cell.to_s('utf-8') + when :date then result << cell.date + else + result << cell.to_s('utf-8') + end + else + result << nil + end + else + result << nil + end + } + result + end + # returns the first non empty column - def first_column - return @first_column if @first_column - fr, lr, fc, lc = get_firsts_lasts + def first_column(sheet=nil) + sheet = @default_sheet unless sheet + return @first_column[sheet] if @first_column[sheet] + fr, lr, fc, lc = get_firsts_lasts(sheet) fc end # returns the last non empty column - def last_column - return @last_column if @last_column - fr, lr, fc, lc = get_firsts_lasts + def last_column(sheet=nil) + sheet = @default_sheet unless sheet + return @last_column[sheet] if @last_column[sheet] + fr, lr, fc, lc = get_firsts_lasts(sheet) lc end # returns the first non empty row - def first_row - return @first_row if @first_row - fr, lr, fc, lc = get_firsts_lasts + def first_row(sheet=nil) + sheet = @default_sheet unless sheet + return @first_row[sheet] if @first_row[sheet] + fr, lr, fc, lc = get_firsts_lasts(sheet) fr end # returns the last non empty row - def last_row - return @last_row if @last_row - fr, lr, fc, lc = get_firsts_lasts + def last_row(sheet=nil) + sheet = @default_sheet unless sheet + return @last_row[sheet] if @last_row[sheet] + fr, lr, fc, lc = get_firsts_lasts(sheet) lr end # true if a cell is empty - def empty?(row, col) + def empty?(row, col, sheet=nil) + sheet = @default_sheet unless sheet row,col = normalize(row,col) - return true if row < first_row || row > last_row || col < first_column || col > last_column - return true unless cell(row, col) - return true if celltype(row, col) == "string" && cell(row, col) == "" + return true if row < first_row(sheet) || row > last_row(sheet) || col < first_column(sheet) || col > last_column(sheet) + return true unless cell(row, col, sheet) + return true if celltype(row, col, sheet) == "string" && cell(row, col, sheet) == "" false end + # first non-empty column as a letter + def first_column_as_letter(sheet=nil) + sheet = @default_sheet unless sheet + Openoffice.number_to_letter(first_column(sheet)) + end + + # last non-empty column as a letter + def last_column_as_letter(sheet=nil) + sheet = @default_sheet unless sheet + Openoffice.number_to_letter(last_column(sheet)) + end + + def formula(row,col,sheet=nil) + raise EXCEL_NO_FORMULAS + end + def formula?(row,col,sheet=nil) + raise EXCEL_NO_FORMULAS + end + def formulas(sheet=nil) + raise EXCEL_NO_FORMULAS + end + private # check if default_sheet was set def default_sheet_check raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil end # determine the first and last boundaries - def get_firsts_lasts + def get_firsts_lasts(sheet=nil) + sheet = @default_sheet unless sheet fr = fc = 999_999 lr = lc = -999_999 - worksheet = @workbook.worksheet(sheet_no(@default_sheet)) + #TODO: worksheet = @workbook.worksheet(sheet_no(@default_sheet)) + worksheet = @workbook.worksheet(sheet_no(sheet)) + if false #============================================================= + if @filename == "test/false_encoding.xls" +#assert_instance_of(Spreadsheet::ParseExcel::Worksheet, worksheet) +p worksheet.class + + p worksheet.methods + p '### min_row: '+worksheet.min_row.to_s + p '### max_row: '+worksheet.max_row.to_s + p '### min_col: '+worksheet.min_col.to_s + p '### max_col: '+worksheet.max_col.to_s + end + if @filename == "test/false_encoding.xls" + p worksheet + end skip = 0 line = 1 worksheet.each(skip) { |row_par| if row_par + if @filename == "test/false_encoding.xls" + p row_par + end row_par.each_with_index {|cell,i| # nicht beruecksichtigen, wenn nil und vorher noch nichts war if cell fc = [fc, i+1].min lc = [lc, i+1].max @@ -182,14 +297,27 @@ end } end line += 1 } - @first_row = fr - @last_row = lr - @first_column = fc - @last_column = lc + end #============================================================= + fr = worksheet.min_row + 1 + lr = worksheet.max_row + 1 + fc = worksheet.min_col + 1 + lc = worksheet.max_col + 1 + # 2007-11-05 BEGIN + # parsexcel liefert (mir unverstaendlich) eine Zeile als letzte Zeile + # zurueck, die aber leer ist. Deshalb Korrekturfunktion, die wirklich + # die letzte nicht leere Zeile liefert. + while empty_row? row(lr,sheet) + lr -= 1 + end + # 2007-11-05 END + @first_row[sheet] = fr + @last_row[sheet] = lr + @first_column[sheet] = fc + @last_column[sheet] = lc return fr, lr, fc, lc end # converts name of a sheet to index (0,1,2,..) @@ -200,8 +328,52 @@ return i if name == Iconv.new('utf-8','unicode').iconv( @workbook.worksheet(i).name ) end raise StandardError, "sheet '#{name}' not found" + end + + def write_csv_content(file=nil,sheet=nil) + file = STDOUT unless file + sheet = @default_sheet unless sheet + worksheet = @workbook.worksheet(sheet_no(sheet)) + skip = 0 + worksheet.each(skip) { |row_par| + 1.upto(row_par.length) {|col| + file.print(",") if col > 1 + cell = row_par.at(col-1) + unless cell + empty = true + else + case cell.type + when :numeric + onecelltype = :float + onecell = cell.to_f + when :text + onecelltype = :string + onecell = cell.to_s('utf-8') + when :date + onecelltype = :date + onecell = cell.date + else + onecelltype = nil + onecell = nil + end + end + file.print one_cell_output(onecelltype,onecell,empty) + } + file.print("\n") + } + end + + def empty_row?(row) + content = false + row.each {|elem| + if elem != '' + #if elem.class == String and elem.size > 0 + content = true + end + } + ! content end end