lib/roo/excel.rb in roo-0.7.0 vs lib/roo/excel.rb in roo-0.8.0

- old
+ new

@@ -1,34 +1,35 @@ require 'rubygems' gem 'parseexcel', '>= 0.5.2' require 'parseexcel' +CHARGUESS=false +require 'charguess' if CHARGUESS module Spreadsheet module ParseExcel class Worksheet - include Enumerable - attr_reader :min_row, :max_row, :min_col, :max_col + include Enumerable + attr_reader :min_row, :max_row, :min_col, :max_col end end end -# Class Excel is derived from class Openoffice. It implements almost all methods -# from the Openoffice class in the same way. -# Parameter packed: :zip - File is a zip-file -class Excel < Openoffice +class Excel < GenericSpreadsheet #Openoffice EXCEL_NO_FORMULAS = 'formulas are not supported for excel spreadsheets' + # Creates a new Excel spreadsheet object. + # Parameter packed: :zip - File is a zip-file def initialize(filename, packed = nil) @tmpdir = "oo_"+$$.to_s unless File.exists?(@tmpdir) FileUtils::mkdir(@tmpdir) end filename = open_from_uri(filename) if filename[0,7] == "http://" filename = unzip(filename) if packed and packed == :zip - if filename[-4..-1] != ".xls" + if File.extname(filename) != ".xls" warn "are you sure, this is an excel file?" end @filename = filename unless File.file?(@filename) raise IOError, "file #{@filename} does not exist" @@ -39,11 +40,11 @@ if self.sheets.size == 1 @default_sheet = self.sheets.first end # @first_row = @last_row = @first_column = @last_column = nil #if ENV["roo_local"] != "thomas-p" - FileUtils::rm_r(@tmpdir) + FileUtils::rm_r(@tmpdir) #end @first_row = Hash.new @last_row = Hash.new @first_column = Hash.new @last_column = Hash.new @@ -52,28 +53,38 @@ def sheets result = [] 0.upto(@workbook.sheet_count - 1) do |i| # TODO: is there a better way to do conversion? - result << Iconv.new('utf-8','unicode').iconv( - @workbook.worksheet(i).name - ) + if CHARGUESS + encoding = CharGuess::guess(@workbook.worksheet(i).name) + encoding = 'unicode' unless encoding + + + result << Iconv.new('utf-8',encoding).iconv( + @workbook.worksheet(i).name + ) + else + result << Iconv.new('utf-8','unicode').iconv( + @workbook.worksheet(i).name + ) + end end return result end # sets the working sheet (1,2,3,..) def default_sheet=(n) if n.kind_of?(Fixnum) - @default_sheet = n #-1 + # elsif n.kind_of?(String) raise RangeError if ! self.sheets.include?(n) # parseexcel supports now the name of a sheet - @default_sheet = n else raise TypeError, "what are you trying to set as default sheet?" end + @default_sheet = n @first_row[n] = @last_row[n] = @first_column[n] = @last_column[n] = nil @cells_read[n] = false end # returns the content of a cell. The upper left corner is (1,1) or ('A',1) @@ -89,13 +100,13 @@ return nil end cell = row_par.at(col-1) return nil unless cell case cell.type - when :numeric then return cell.to_f - when :text then return cell.to_s('utf-8') - when :date then return cell.date + when :numeric then return cell.to_f + when :text then return cell.to_s('utf-8') + when :date then return cell.date else return nil # cell.to_s('utf-8') end end line += 1 @@ -140,13 +151,13 @@ #therow = worksheet.row(rownumber-1) result = [] worksheet.row(rownumber-1).each {|cell| if cell case cell.type - when :numeric then result << cell.to_i - when :text then result << cell.to_s('utf-8') - when :date then result << cell.date + when :numeric then result << cell.to_i + when :text then result << cell.to_s('utf-8') + when :date then result << cell.date else result << cell.to_s('utf-8') end else result << nil @@ -167,16 +178,16 @@ skip = 0 result = [] worksheet.each(skip) { |row_par| if defined? row_par.at(columnnumber-1) cell = row_par.at(columnnumber-1) - #if defined? cell = row_par.at(columnnumber-1) + #if defined? cell = row_par.at(columnnumber-1) if cell case cell.type - when :numeric then result << cell.to_i - when :text then result << cell.to_s('utf-8') - when :date then result << cell.date + when :numeric then result << cell.to_i + when :text then result << cell.to_s('utf-8') + when :date then result << cell.date else result << cell.to_s('utf-8') end else result << nil @@ -217,44 +228,27 @@ sheet = @default_sheet unless sheet return @last_row[sheet] if @last_row[sheet] fr, lr, fc, lc = get_firsts_lasts(sheet) lr end - - # true if a cell is empty - def empty?(row, col, sheet=nil) - sheet = @default_sheet unless sheet - row,col = normalize(row,col) - return true if row < first_row(sheet) || row > last_row(sheet) || col < first_column(sheet) || col > last_column(sheet) - return true unless cell(row, col, sheet) - return true if celltype(row, col, sheet) == "string" && cell(row, col, sheet) == "" - false - end - - # first non-empty column as a letter - def first_column_as_letter(sheet=nil) - sheet = @default_sheet unless sheet - Openoffice.number_to_letter(first_column(sheet)) - end - # last non-empty column as a letter - def last_column_as_letter(sheet=nil) - sheet = @default_sheet unless sheet - Openoffice.number_to_letter(last_column(sheet)) - end - + # returns NO formula in excel spreadsheets def formula(row,col,sheet=nil) raise EXCEL_NO_FORMULAS end + + # raises an exception because formulas are not supported for excel files def formula?(row,col,sheet=nil) raise EXCEL_NO_FORMULAS end + + # returns NO formulas in excel spreadsheets def formulas(sheet=nil) raise EXCEL_NO_FORMULAS end -private + private # check if default_sheet was set def default_sheet_check raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil end @@ -262,46 +256,11 @@ # determine the first and last boundaries def get_firsts_lasts(sheet=nil) sheet = @default_sheet unless sheet fr = fc = 999_999 lr = lc = -999_999 - #TODO: worksheet = @workbook.worksheet(sheet_no(@default_sheet)) worksheet = @workbook.worksheet(sheet_no(sheet)) - if false #============================================================= - if @filename == "test/false_encoding.xls" -#assert_instance_of(Spreadsheet::ParseExcel::Worksheet, worksheet) -p worksheet.class - - p worksheet.methods - p '### min_row: '+worksheet.min_row.to_s - p '### max_row: '+worksheet.max_row.to_s - p '### min_col: '+worksheet.min_col.to_s - p '### max_col: '+worksheet.max_col.to_s - end - if @filename == "test/false_encoding.xls" - p worksheet - end - skip = 0 - line = 1 - worksheet.each(skip) { |row_par| - if row_par - if @filename == "test/false_encoding.xls" - p row_par - end - row_par.each_with_index {|cell,i| - # nicht beruecksichtigen, wenn nil und vorher noch nichts war - if cell - fc = [fc, i+1].min - lc = [lc, i+1].max - fr = [fr, line].min - lr = [lr, line].max - end - } - end - line += 1 - } - end #============================================================= fr = worksheet.min_row + 1 lr = worksheet.max_row + 1 fc = worksheet.min_col + 1 lc = worksheet.max_col + 1 # 2007-11-05 BEGIN @@ -317,23 +276,23 @@ @first_column[sheet] = fc @last_column[sheet] = lc return fr, lr, fc, lc end - # converts name of a sheet to index (0,1,2,..) def sheet_no(name) return name-1 if name.kind_of?(Fixnum) 0.upto(@workbook.sheet_count - 1) do |i| # TODO: is there a better way to do conversion? return i if name == Iconv.new('utf-8','unicode').iconv( - @workbook.worksheet(i).name - ) + @workbook.worksheet(i).name + ) end raise StandardError, "sheet '#{name}' not found" end + # writes csv output to stdout or file def write_csv_content(file=nil,sheet=nil) file = STDOUT unless file sheet = @default_sheet unless sheet worksheet = @workbook.worksheet(sheet_no(sheet)) skip = 0 @@ -343,19 +302,19 @@ cell = row_par.at(col-1) unless cell empty = true else case cell.type - when :numeric - onecelltype = :float - onecell = cell.to_f - when :text - onecelltype = :string - onecell = cell.to_s('utf-8') - when :date - onecelltype = :date - onecell = cell.date + when :numeric + onecelltype = :float + onecell = cell.to_f + when :text + onecelltype = :string + onecell = cell.to_s('utf-8') + when :date + onecelltype = :date + onecell = cell.date else onecelltype = nil onecell = nil end end @@ -367,10 +326,10 @@ def empty_row?(row) content = false row.each {|elem| if elem != '' - #if elem.class == String and elem.size > 0 + #if elem.class == String and elem.size > 0 content = true end } ! content end