lib/roo/excelx.rb in roo-1.1.0 vs lib/roo/excelx.rb in roo-1.2.0

- old
+ new

@@ -11,31 +11,92 @@ self[-str.length,str.length] == str end end class Excelx < GenericSpreadsheet + FORMATS = { + 'General' => :float, + '0' => :float, + '0.00' => :float, + '#,##0' => :float, + '#,##0.00' => :float, + '0%' => :percentage, + '0.00%' => :percentage, + '0.00E+00' => :float, + '# ?/?' => :float, #??? TODO: + '# ??/??' => :float, #??? TODO: + 'mm-dd-yy' => :date, + 'd-mmm-yy' => :date, + 'd-mmm' => :date, + 'mmm-yy' => :date, + 'h:mm AM/PM' => :date, + 'h:mm:ss AM/PM' => :date, + 'h:mm' => :time, + 'h:mm:ss' => :time, + 'm/d/yy h:mm' => :date, + '#,##0 ;(#,##0)' => :float, + '#,##0 ;[Red](#,##0)' => :float, + '#,##0.00;(#,##0.00)' => :float, + '#,##0.00;[Red](#,##0.00)' => :float, + 'mm:ss' => :time, + '[h]:mm:ss' => :time, + 'mmss.0' => :time, + '##0.0E+0' => :float, + '@' => :float, + #-- zusaetzliche Formate, die nicht standardmaessig definiert sind: + "yyyy\\-mm\\-dd" => :date, + 'dd/mm/yy' => :date, + 'hh:mm:ss' => :time, + } + STANDARD_FORMATS = { + 0 => 'General', + 1 => '0', + 2 => '0.00', + 3 => '#,##0', + 4 => '#,##0.00', + 9 => '0%', + 10 => '0.00%', + 11 => '0.00E+00', + 12 => '# ?/?', + 13 => '# ??/??', + 14 => 'mm-dd-yy', + 15 => 'd-mmm-yy', + 16 => 'd-mmm', + 17 => 'mmm-yy', + 18 => 'h:mm AM/PM', + 19 => 'h:mm:ss AM/PM', + 20 => 'h:mm', + 21 => 'h:mm:ss', + 22 => 'm/d/yy h:mm', + 37 => '#,##0 ;(#,##0)', + 38 => '#,##0 ;[Red](#,##0)', + 39 => '#,##0.00;(#,##0.00)', + 40 => '#,##0.00;[Red](#,##0.00)', + 45 => 'mm:ss', + 46 => '[h]:mm:ss', + 47 => 'mmss.0', + 48 => '##0.0E+0', + 49 => '@', + } @@nr = 0 # initialization and opening of a spreadsheet file # values for packed: :zip def initialize(filename, packed=nil, file_warning = :error) #, create = false) super() @file_warning = file_warning @tmpdir = "oo_"+$$.to_s + @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP'] unless File.exists?(@tmpdir) FileUtils::mkdir(@tmpdir) end filename = open_from_uri(filename) if filename[0,7] == "http://" filename = unzip(filename) if packed and packed == :zip - #if File.extname(filename).downcase != ".xlsx" - # warn "are you sure, this is an Excel-xlsx file?" - #end - #@file_warning = :error - file_type_check(filename,'.xlsx','an Excel-xlsx') - @cells_read = Hash.new - @filename = filename begin + file_type_check(filename,'.xlsx','an Excel-xlsx') + @cells_read = Hash.new + @filename = filename unless File.file?(@filename) raise IOError, "file #{@filename} does not exist" end @@nr += 1 @file_nr = @@nr @@ -48,10 +109,17 @@ file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml')) @sharedstring_doc = REXML::Document.new file file.close read_shared_strings(@sharedstring_doc) end + @styles_table = [] + if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml')) + file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml')) + @styles_doc = REXML::Document.new file + file.close + read_styles(@styles_doc) + end @sheet_doc = [] @sheet_files.each_with_index do |item, i| file = File.new(item) @sheet_doc[i] = REXML::Document.new file file.close @@ -73,10 +141,12 @@ @last_row = Hash.new @first_column = Hash.new @last_column = Hash.new @header_line = 1 @excelx_type = Hash.new + @excelx_value = Hash.new + @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt end # Returns the content of a spreadsheet-cell. # (1,1) is the upper left corner. # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the @@ -149,20 +219,40 @@ else @cell_type[sheet]["#{row},#{col}"] end end - # returns the internal type of a excel cell + # returns the internal type of an excel cell # * :numeric_or_formula # * :string + # Note: this is only available within the Excelx class def excelx_type(row,col,sheet=nil) - sheet = @default_sheet unless sheet + sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] row,col = normalize(row,col) - :numeric_or_formula + return @excelx_type[sheet]["#{row},#{col}"] end + # returns the internal value of an excelx cell + # Note: this is only available within the Excelx class + def excelx_value(row,col,sheet=nil) + sheet = @default_sheet unless sheet + read_cells(sheet) unless @cells_read[sheet] + row,col = normalize(row,col) + return @excelx_value[sheet]["#{row},#{col}"] + end + + # returns the internal format of an excel cell + def excelx_format(row,col,sheet=nil) + sheet = @default_sheet unless sheet + read_cells(sheet) unless @cells_read[sheet] + row,col = normalize(row,col) + s = @s_attribute[sheet]["#{row},#{col}"] + result = attribute2format(s) + result + end + # returns an array of sheet names in the spreadsheet def sheets return_sheets = [] @workbook_doc.each_element do |workbook| workbook.each_element do |el| @@ -202,11 +292,14 @@ end private # helper function to set the internal representation of cells - def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,excelx_type=nil) + def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v, + excelx_type=nil, + excelx_value=nil, + s_attribute=nil) key = "#{y},#{x+i}" @cell_type[sheet] = {} unless @cell_type[sheet] @cell_type[sheet][key] = vt @formula[sheet] = {} unless @formula[sheet] @formula[sheet][key] = formula if formula @@ -227,12 +320,17 @@ else @cell[sheet][key] = v end @excelx_type[sheet] = {} unless @excelx_type[sheet] @excelx_type[sheet][key] = excelx_type + @excelx_value[sheet] = {} unless @excelx_value[sheet] + @excelx_value[sheet][key] = excelx_value + @s_attribute[sheet] = {} unless @s_attribute[sheet] + @s_attribute[sheet][key] = s_attribute end + # splits a coordinate like "AA12" into the parts "AA" (String) and 12 (Fixnum) def split_coord(s) letter = "" number = 0 i = 0 while i<s.length and "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".include?(s[i,1]) @@ -255,131 +353,52 @@ y = number return x,y end # read all cells in the selected sheet - def ALTER_ANSATZ_read_cells(sheet=nil) - sheet = @default_sheet unless sheet - sheet_found = false - raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil - raise RangeError unless self.sheets.include? sheet - n = self.sheets.index(sheet) - @sheet_doc[n].each_element do |worksheet| - worksheet.each_element do |elem| - if elem.name == 'sheetData' - elem.each_element do |sheetdata| - if sheetdata.name == 'row' - sheetdata.each_element do |row| - if row.name == 'c' - if row.attributes['t'] == 's' - tmp_type = :shared - end - if row.attributes['s'] == '2' #or - #row.attributes['s'] == '1' # 2008-07-26 - tmp_type = :time - elsif row.attributes['s'] == '1' and row.attributes['t'] == nil # and ergaenzt 2008-07-03 - tmp_type = :formula - end - formula = nil - f_element_found = false - row.each_element do |cell| - if cell.name == 'f' - f_element_found = true - formula = cell.text - end - if cell.name == 'v' - if tmp_type == :formula and f_element_found == false - #if cell.text.to_f < 1.0 # 2008-07-26 - tmp_type = :time - #else # 2008-07-26 - #tmp_type = :date #2008-07-26 - #end #2008-07-26 - end - if tmp_type == :time #2008-07-26 - if cell.text.to_f >= 1.0 # 2008-07-26 - tmp_type = :date # 2008-07-26 - end # 2008-07-26 - end # 2008-07-26 - if tmp_type == :shared - vt = :string - str_v = @shared_table[cell.text.to_i] - elsif tmp_type == :date - vt = :date - v = cell.text - elsif tmp_type == :time - vt = :time - v = cell.text - elsif tmp_type == :formula - vt = :formula - v = cell.text.to_f #TODO: !!!! - else - vt = :float - v = cell.text - end - x,y = split_coordinate(row.attributes['r']) - tr=nil #TODO: ???s - set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v) - end - end - end - end - end - end - end - end + def format2type(format) + if FORMATS.has_key? format + FORMATS[format] + else + :float end - sheet_found = true #TODO: - if !sheet_found - raise RangeError - end - @cells_read[sheet] = true end -# read all cells in the selected sheet + # read all cells in the selected sheet def read_cells(sheet=nil) sheet = @default_sheet unless sheet sheet_found = false raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil - raise RangeError unless self.sheets.include? sheet + raise RangeError unless self.sheets.include? sheet n = self.sheets.index(sheet) @sheet_doc[n].each_element do |worksheet| worksheet.each_element do |elem| if elem.name == 'sheetData' elem.each_element do |sheetdata| if sheetdata.name == 'row' sheetdata.each_element do |row| if row.name == 'c' if row.attributes['t'] == 's' tmp_type = :shared + else + s_attribute = row.attributes['s'] + format = attribute2format(s_attribute) + tmp_type = format2type(format) end - if row.attributes['s'] == '2' #or - #row.attributes['s'] == '1' # 2008-07-26 - tmp_type = :time - elsif row.attributes['s'] == '1' and row.attributes['t'] == nil # and ergaenzt 2008-07-03 - tmp_type = :formula - end formula = nil - f_element_found = false row.each_element do |cell| if cell.name == 'f' - f_element_found = true formula = cell.text end if cell.name == 'v' - if tmp_type == :formula and f_element_found == false - #if cell.text.to_f < 1.0 # 2008-07-26 - tmp_type = :time - #else # 2008-07-26 - #tmp_type = :date #2008-07-26 - #end #2008-07-26 - end if tmp_type == :time #2008-07-26 if cell.text.to_f >= 1.0 # 2008-07-26 tmp_type = :date # 2008-07-26 end # 2008-07-26 end # 2008-07-26 - excelx_type = :numeric_or_formula + excelx_type = [:numeric_or_formula,format] + excelx_value = cell.text if tmp_type == :shared vt = :string str_v = @shared_table[cell.text.to_i] excelx_type = :string elsif tmp_type == :date @@ -395,11 +414,11 @@ vt = :float v = cell.text end x,y = split_coordinate(row.attributes['r']) tr=nil #TODO: ???s - set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type) + set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute) end end end end end @@ -411,11 +430,11 @@ if !sheet_found raise RangeError end @cells_read[sheet] = true end - + # Checks if the default_sheet exists. If not an RangeError exception is # raised def check_default_sheet sheet_found = false raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil @@ -433,11 +452,11 @@ if ! sheet_found raise RangeError, "sheet '#{@default_sheet}' not found" end end - + # extracts all needed files from the zip file def process_zipfile(zipfilename, zip, path='') @sheet_files = [] Zip::ZipFile.open(zipfilename) {|zf| zf.entries.each {|entry| #entry.extract @@ -449,39 +468,47 @@ if entry.to_s.end_with?('sharedStrings.xml') open(@tmpdir+'/'+@file_nr.to_s+'_roo_sharedStrings.xml','wb') {|f| f << zip.read(entry) } end + if entry.to_s.end_with?('styles.xml') + open(@tmpdir+'/'+@file_nr.to_s+'_roo_styles.xml','wb') {|f| + f << zip.read(entry) + } + end if entry.to_s =~ /sheet([0-9]+).xml$/ nr = $1 - # entry.extract("xaxa_sheet#{nr}.xml") open(@tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}",'wb') {|f| f << zip.read(entry) } @sheet_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}" end } } return end + # extract files from the zip file def extract_content(zipfilename) Zip::ZipFile.open(@filename) do |zip| process_zipfile(zipfilename,zip) end end + # sets the value of a cell def set_value(row,col,value,sheet=nil) sheet = @default_value unless sheet @cell[sheet]["#{row},#{col}"] = value end + # sets the type of a cell def set_type(row,col,type,sheet=nil) sheet = @default_value unless sheet @cell_type[sheet]["#{row},#{col}"] = type end + # read the shared strings xml document def read_shared_strings(doc) doc.each_element do |sst| if sst.name == 'sst' sst.each_element do |si| if si.name == 'si' @@ -492,8 +519,55 @@ end end end end end + end + + # read the styles elements of an excelx document + def read_styles(doc) + @numFmts = [] + @cellXfs = [] + doc.each_element do |e1| + if e1.name == "styleSheet" + e1.each_element do |e2| + if e2.name == "numFmts" + e2.each_element do |e3| + if e3.name == 'numFmt' + numFmtId = e3.attributes['numFmtId'] + formatCode = e3.attributes['formatCode'] + @numFmts << [numFmtId, formatCode] + end + end + elsif e2.name == "cellXfs" + e2.each_element do |e3| + if e3.name == 'xf' + numFmtId = e3.attributes['numFmtId'] + # p numFmtId + @cellXfs << [numFmtId] + end + end + end + end + end + end + end + + # convert internal excelx attribute to a format + def attribute2format(s) + result = nil + @numFmts.each {|nf| + if nf.first == @cellXfs[s.to_i].first + result = nf[1] + break + end + } + unless result + id = @cellXfs[s.to_i].first.to_i + if STANDARD_FORMATS.has_key? id + result = STANDARD_FORMATS[id] + end + end + result end end # class