require 'rubygems' require 'rexml/document' require 'fileutils' require 'zip/zipfilesystem' require 'date' #require 'base64' class String def end_with?(str) self[-str.length,str.length] == str end end class Excelx < GenericSpreadsheet @@nr = 0 # initialization and opening of a spreadsheet file # values for packed: :zip def initialize(filename, packed=nil, file_warning = :error) #, create = false) super() @file_warning = file_warning @tmpdir = "oo_"+$$.to_s unless File.exists?(@tmpdir) FileUtils::mkdir(@tmpdir) end filename = open_from_uri(filename) if filename[0,7] == "http://" filename = unzip(filename) if packed and packed == :zip #if File.extname(filename).downcase != ".xlsx" # warn "are you sure, this is an Excel-xlsx file?" #end #@file_warning = :error file_type_check(filename,'.xlsx','an Excel-xlsx') @cells_read = Hash.new @filename = filename begin unless File.file?(@filename) raise IOError, "file #{@filename} does not exist" end @@nr += 1 @file_nr = @@nr extract_content(@filename) file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml")) @workbook_doc = REXML::Document.new file file.close @shared_table = [] if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml')) file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml')) @sharedstring_doc = REXML::Document.new file file.close read_shared_strings(@sharedstring_doc) end @sheet_doc = [] @sheet_files.each_with_index do |item, i| file = File.new(item) @sheet_doc[i] = REXML::Document.new file file.close end ensure #if ENV["roo_local"] != "thomas-p" FileUtils::rm_r(@tmpdir) #end end @default_sheet = nil # no need to set default_sheet if there is only one sheet in the document if self.sheets.size == 1 @default_sheet = self.sheets.first end @cell = Hash.new @cell_type = Hash.new @formula = Hash.new @first_row = Hash.new @last_row = Hash.new @first_column = Hash.new @last_column = Hash.new @header_line = 1 @excelx_type = Hash.new end # Returns the content of a spreadsheet-cell. # (1,1) is the upper left corner. # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the # cell at the first line and first row. def cell(row, col, sheet=nil) sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] row,col = normalize(row,col) if celltype(row,col,sheet) == :date yyyy,mm,dd = @cell[sheet]["#{row},#{col}"].split('-') return Date.new(yyyy.to_i,mm.to_i,dd.to_i) end @cell[sheet]["#{row},#{col}"] end # Returns the formula at (row,col). # Returns nil if there is no formula. # The method #formula? checks if there is a formula. def formula(row,col,sheet=nil) sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] row,col = normalize(row,col) if @formula[sheet]["#{row},#{col}"] == nil return nil else return @formula[sheet]["#{row},#{col}"] end end # true, if there is a formula def formula?(row,col,sheet=nil) sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] row,col = normalize(row,col) formula(row,col) != nil end # set a cell to a certain value # (this will not be saved back to the spreadsheet file!) def set(row,col,value,sheet=nil) #:nodoc: sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] row,col = normalize(row,col) set_value(row,col,value,sheet) if value.class == Fixnum set_type(row,col,:float,sheet) elsif value.class == String set_type(row,col,:string,sheet) elsif value.class == Float set_type(row,col,:string,sheet) else raise ArgumentError, "Type for "+value.to_s+" not set" end end # returns the type of a cell: # * :float # * :string, # * :date # * :percentage # * :formula # * :time def celltype(row,col,sheet=nil) sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] row,col = normalize(row,col) # if @formula and @formula[sheet] and @formula[sheet]["#{row},#{col}"] if @formula[sheet]["#{row},#{col}"] return :formula else @cell_type[sheet]["#{row},#{col}"] end end # returns the internal type of a excel cell # * :numeric_or_formula # * :string def excelx_type(row,col,sheet=nil) sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] row,col = normalize(row,col) :numeric_or_formula end # returns an array of sheet names in the spreadsheet def sheets return_sheets = [] @workbook_doc.each_element do |workbook| workbook.each_element do |el| if el.name == "sheets" el.each_element do |sheet| return_sheets << sheet.attributes['name'] end end end end return_sheets end # shows the internal representation of all cells # for debugging purposes def to_s(sheet=nil) sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] @cell[sheet].inspect end # returns each formula in the selected sheet as an array of elements # [row, col, formula] def formulas(sheet=nil) theformulas = Array.new sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] first_row(sheet).upto(last_row(sheet)) {|row| first_column(sheet).upto(last_column(sheet)) {|col| if formula?(row,col,sheet) f = [row, col, formula(row,col,sheet)] theformulas << f end } } theformulas end private # helper function to set the internal representation of cells def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,excelx_type=nil) key = "#{y},#{x+i}" @cell_type[sheet] = {} unless @cell_type[sheet] @cell_type[sheet][key] = vt @formula[sheet] = {} unless @formula[sheet] @formula[sheet][key] = formula if formula @cell[sheet] = {} unless @cell[sheet] case @cell_type[sheet][key] when :float @cell[sheet][key] = v.to_f when :string @cell[sheet][key] = str_v when :date @cell[sheet][key] = (Date.new(1899,12,30)+v.to_i).strftime("%Y-%m-%d") # tr.attributes['date-value'] when :percentage @cell[sheet][key] = v.to_f when :time #hms = v.split(':') #@cell[sheet][key] = hms[0].to_i*3600 + hms[1].to_i*60 + hms[2].to_i @cell[sheet][key] = v.to_f*(24*60*60) else @cell[sheet][key] = v end @excelx_type[sheet] = {} unless @excelx_type[sheet] @excelx_type[sheet][key] = excelx_type end def split_coord(s) letter = "" number = 0 i = 0 while i= 1.0 # 2008-07-26 tmp_type = :date # 2008-07-26 end # 2008-07-26 end # 2008-07-26 if tmp_type == :shared vt = :string str_v = @shared_table[cell.text.to_i] elsif tmp_type == :date vt = :date v = cell.text elsif tmp_type == :time vt = :time v = cell.text elsif tmp_type == :formula vt = :formula v = cell.text.to_f #TODO: !!!! else vt = :float v = cell.text end x,y = split_coordinate(row.attributes['r']) tr=nil #TODO: ???s set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v) end end end end end end end end end sheet_found = true #TODO: if !sheet_found raise RangeError end @cells_read[sheet] = true end # read all cells in the selected sheet def read_cells(sheet=nil) sheet = @default_sheet unless sheet sheet_found = false raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil raise RangeError unless self.sheets.include? sheet n = self.sheets.index(sheet) @sheet_doc[n].each_element do |worksheet| worksheet.each_element do |elem| if elem.name == 'sheetData' elem.each_element do |sheetdata| if sheetdata.name == 'row' sheetdata.each_element do |row| if row.name == 'c' if row.attributes['t'] == 's' tmp_type = :shared end if row.attributes['s'] == '2' #or #row.attributes['s'] == '1' # 2008-07-26 tmp_type = :time elsif row.attributes['s'] == '1' and row.attributes['t'] == nil # and ergaenzt 2008-07-03 tmp_type = :formula end formula = nil f_element_found = false row.each_element do |cell| if cell.name == 'f' f_element_found = true formula = cell.text end if cell.name == 'v' if tmp_type == :formula and f_element_found == false #if cell.text.to_f < 1.0 # 2008-07-26 tmp_type = :time #else # 2008-07-26 #tmp_type = :date #2008-07-26 #end #2008-07-26 end if tmp_type == :time #2008-07-26 if cell.text.to_f >= 1.0 # 2008-07-26 tmp_type = :date # 2008-07-26 end # 2008-07-26 end # 2008-07-26 excelx_type = :numeric_or_formula if tmp_type == :shared vt = :string str_v = @shared_table[cell.text.to_i] excelx_type = :string elsif tmp_type == :date vt = :date v = cell.text elsif tmp_type == :time vt = :time v = cell.text elsif tmp_type == :formula vt = :formula v = cell.text.to_f #TODO: !!!! else vt = :float v = cell.text end x,y = split_coordinate(row.attributes['r']) tr=nil #TODO: ???s set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type) end end end end end end end end end sheet_found = true #TODO: if !sheet_found raise RangeError end @cells_read[sheet] = true end # Checks if the default_sheet exists. If not an RangeError exception is # raised def check_default_sheet sheet_found = false raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil @workbook_doc.each_element do |workbook| workbook.each_element do |el| if el.name == "sheets" el.each_element do |sheet| if @default_sheet == sheet.attributes['name'] sheet_found = true end end end end end if ! sheet_found raise RangeError, "sheet '#{@default_sheet}' not found" end end def process_zipfile(zipfilename, zip, path='') @sheet_files = [] Zip::ZipFile.open(zipfilename) {|zf| zf.entries.each {|entry| #entry.extract if entry.to_s.end_with?('workbook.xml') open(@tmpdir+'/'+@file_nr.to_s+'_roo_workbook.xml','wb') {|f| f << zip.read(entry) } end if entry.to_s.end_with?('sharedStrings.xml') open(@tmpdir+'/'+@file_nr.to_s+'_roo_sharedStrings.xml','wb') {|f| f << zip.read(entry) } end if entry.to_s =~ /sheet([0-9]+).xml$/ nr = $1 # entry.extract("xaxa_sheet#{nr}.xml") open(@tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}",'wb') {|f| f << zip.read(entry) } @sheet_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}" end } } return end def extract_content(zipfilename) Zip::ZipFile.open(@filename) do |zip| process_zipfile(zipfilename,zip) end end def set_value(row,col,value,sheet=nil) sheet = @default_value unless sheet @cell[sheet]["#{row},#{col}"] = value end def set_type(row,col,type,sheet=nil) sheet = @default_value unless sheet @cell_type[sheet]["#{row},#{col}"] = type end def read_shared_strings(doc) doc.each_element do |sst| if sst.name == 'sst' sst.each_element do |si| if si.name == 'si' si.each_element do |elem| if elem.name == 't' @shared_table << elem.text end end end end end end end end # class