lib/roo/excel.rb in roo-1.2.0 vs lib/roo/excel.rb in roo-1.2.1

- old
+ new

@@ -1,9 +1,9 @@ require 'rubygems' gem 'parseexcel', '>= 0.5.2' require 'parseexcel' -CHARGUESS=false +CHARGUESS = false require 'charguess' if CHARGUESS module Spreadsheet # :nodoc module ParseExcel class Worksheet @@ -27,10 +27,11 @@ @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP'] unless File.exists?(@tmpdir) FileUtils::mkdir(@tmpdir) end filename = open_from_uri(filename) if filename[0,7] == "http://" + filename = open_from_stream(filename[7..-1]) if filename[0,7] == "stream:" filename = unzip(filename) if packed and packed == :zip begin file_type_check(filename,'.xls','an Excel') @filename = filename unless File.file?(@filename) @@ -59,11 +60,12 @@ end # returns an array of sheet names in the spreadsheet def sheets result = [] - 0.upto(@workbook.sheet_count - 1) do |i| + #0.upto(@workbook.worksheets.size - 1) do |i| # spreadsheet + 0.upto(@workbook.sheet_count - 1) do |i| # parseexcel # TODO: is there a better way to do conversion? if CHARGUESS encoding = CharGuess::guess(@workbook.worksheet(i).name) encoding = 'unicode' unless encoding @@ -79,34 +81,42 @@ end # returns the content of a cell. The upper left corner is (1,1) or ('A',1) def cell(row,col,sheet=nil) sheet = @default_sheet unless sheet + raise ArgumentError unless sheet read_cells(sheet) unless @cells_read[sheet] + raise "should be read" unless @cells_read[sheet] row,col = normalize(row,col) if celltype(row,col,sheet) == :date - yyyy,mm,dd = @cell[sheet]["#{row},#{col}"].split('-') + yyyy,mm,dd = @cell[sheet][[row,col]].split('-') return Date.new(yyyy.to_i,mm.to_i,dd.to_i) end - return @cell[sheet]["#{row},#{col}"] + return @cell[sheet][[row,col]] end # returns the type of a cell: # * :float # * :string, # * :date # * :percentage # * :formula # * :time + # * :datetime def celltype(row,col,sheet=nil) sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] row,col = normalize(row,col) - if @formula[sheet]["#{row},#{col}"] - return :formula - else - @cell_type[sheet]["#{row},#{col}"] + begin + if @formula[sheet][[row,col]] + return :formula + else + @cell_type[sheet][[row,col]] + end + rescue + puts "Error in sheet #{sheet}, row #{row}, col #{col}" + raise end end # returns the first non empty column def first_column(sheet=nil) @@ -153,90 +163,49 @@ # returns NO formulas in excel spreadsheets def formulas(sheet=nil) raise EXCEL_NO_FORMULAS end - private - - # determine the first and last boundaries - def get_firsts_lasts_parseexcel(sheet=nil) + # shows the internal representation of all cells + # mainly for debugging purposes + def to_s(sheet=nil) sheet = @default_sheet unless sheet - fr = fc = 999_999 - lr = lc = -999_999 - worksheet = @workbook.worksheet(sheet_no(sheet)) - unless worksheet.min_row - fr = nil - else - fr = worksheet.min_row + 1 - end - unless worksheet.max_row - lr = nil - else - lr = worksheet.max_row + 1 - end - unless worksheet.min_col - fc = nil - else - fc = worksheet.min_col + 1 - end - unless worksheet.max_col - lc = nil - else - lc = worksheet.max_col + 1 - end - if lr - # 2007-11-05 BEGIN - # parsexcel liefert (mir unverstaendlich) eine Zeile als letzte Zeile - # zurueck, die aber leer ist. Deshalb Korrekturfunktion, die wirklich - # die letzte nicht leere Zeile liefert. - # 2008-07-23 meine Loesung funtionierte auch noch nicht unter allen - # Umstaenden row() == nil ergaenzt - while row(lr,sheet) == nil || empty_row?(row(lr,sheet)) - lr -= 1 - end - end - # 2007-11-05 END - - if lc - letzte_spalte_leer = true - until ! letzte_spalte_leer - worksheet.each(0) {|reihe| - if reihe - cell = reihe.at(lc-1) - if cell - case cell.type - when :numeric, :date - letzte_spalte_leer = false - when :text - letzte_spalte_leer = false if cell.to_s != "" - end - end - end - } - lc -= 1 if letzte_spalte_leer - #puts "letzte Spalte auf #{lc} verringert" if letzte_spalte_leer - end - end - - return fr, lr, fc, lc + read_cells(sheet) unless @cells_read[sheet] + @cell[sheet].inspect end + private # determine the first and last boundaries def get_firsts_lasts(sheet=nil) - fr,lr,fc,lc = get_firsts_lasts_parseexcel(sheet) - if lr - # 2007-11-05 BEGIN - # parsexcel liefert (mir unverstaendlich) eine Zeile als letzte Zeile - # zurueck, die aber leer ist. Deshalb Korrekturfunktion, die wirklich - # die letzte nicht leere Zeile liefert. - # 2008-07-23 meine Loesung funtionierte auch noch nicht unter allen - # Umstaenden row() == nil ergaenzt - while row(lr,sheet) == nil || empty_row?(row(lr,sheet)) - lr -= 1 - end + + # 2008-09-14 BEGINf + fr=lr=fc=lc=nil + sheet = @default_sheet unless sheet + if ! @cells_read[sheet] + read_cells(sheet) end - # 2007-11-05 END + if @cell[sheet] # nur wenn ueberhaupt Zellen belegt sind + @cell[sheet].each {|cellitem| + key = cellitem.first + y,x = key + + if cellitem[1].class != String or + (cellitem[1].class == String and cellitem[1] != "") + fr = y unless fr + fr = y if y < fr + + lr = y unless lr + lr = y if y > lr + + fc = x unless fc + fc = x if x < fc + + lc = x unless lc + lc = x if x > lc + end + } + end @first_row[sheet] = fr @last_row[sheet] = lr @first_column[sheet] = fc @last_column[sheet] = lc return fr, lr, fc, lc @@ -244,10 +213,11 @@ # converts name of a sheet to index (0,1,2,..) def sheet_no(name) return name-1 if name.kind_of?(Fixnum) 0.upto(@workbook.sheet_count - 1) do |i| + #0.upto(@workbook.worksheets.size - 1) do |i| # TODO: is there a better way to do conversion? return i if name == platform_specific_iconv( @workbook.worksheet(i).name) #Iconv.new('utf-8','unicode').iconv( # @workbook.worksheet(i).name @@ -315,23 +285,26 @@ result end # helper function to set the internal representation of cells def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v) - key = "#{y},#{x+i}" + #key = "#{y},#{x+i}" + key = [y,x+i] @cell_type[sheet] = {} unless @cell_type[sheet] @cell_type[sheet][key] = vt @formula[sheet] = {} unless @formula[sheet] @formula[sheet][key] = formula if formula @cell[sheet] = {} unless @cell[sheet] - case @cell_type[sheet][key] + case vt # @cell_type[sheet][key] when :float @cell[sheet][key] = v.to_f when :string @cell[sheet][key] = str_v when :date @cell[sheet][key] = v + when :datetime + @cell[sheet][key] = DateTime.new(v.year,v.month,v.day,v.hour,v.min,v.sec) when :percentage @cell[sheet][key] = v.to_f when :time @cell[sheet][key] = v else @@ -342,19 +315,24 @@ # read all cells in the selected sheet def read_cells(sheet=nil) sheet = @default_sheet unless sheet raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil raise RangeError unless self.sheets.include? sheet + + if @cells_read[sheet] + raise "sheet #{sheet} already read" + end + worksheet = @workbook.worksheet(sheet_no(sheet)) skip = 0 x =1 y=1 i=0 worksheet.each(skip) { |row_par| if row_par x =1 - row_par.each do # |void| + row_par.each do # |void| cell = row_par.at(x-1) if cell case cell.type when :numeric vt = :float @@ -372,39 +350,48 @@ m = (secs / 60.0).floor secs = secs - 60*m s = secs v = h*3600+m*60+s else - vt = :date - v = cell.date - v = sprintf("%04d-%02d-%02d",v.year,v.month,v.day) + if cell.datetime.hour != 0 or + cell.datetime.min != 0 or + cell.datetime.sec != 0 or + cell.datetime.msec != 0 + vt = :datetime + v = cell.datetime + else + vt = :date + v = cell.date + v = sprintf("%04d-%02d-%02d",v.year,v.month,v.day) + end end else - vt = cell.type.to_sym + vt = cell.type.to_s.downcase.to_sym v = nil end # case formula = tr = nil #TODO:??? set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v) end # if cell + x += 1 end end y += 1 } @cells_read[sheet] = true end - #TODO: testing only -# def inject_null_characters(str) -# if str.class != String -# return str -# end -# new_str='' -# 0.upto(str.size-1) do |i| -# new_str += str[i,1] -# new_str += "\000" -# end -# new_str -# end -# + #TODO: testing only + # def inject_null_characters(str) + # if str.class != String + # return str + # end + # new_str='' + # 0.upto(str.size-1) do |i| + # new_str += str[i,1] + # new_str += "\000" + # end + # new_str + # end + # end