lib/roo/excelx.rb in roo-1.2.3 vs lib/roo/excelx.rb in roo-1.3.5

- old
+ new

@@ -1,8 +1,6 @@ - -require 'rubygems' -require 'rexml/document' +require 'xml' require 'fileutils' require 'zip/zipfilesystem' require 'date' class String @@ -100,30 +98,31 @@ end @@nr += 1 @file_nr = @@nr extract_content(@filename) file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml")) - @workbook_doc = REXML::Document.new file + @workbook_doc = XML::Parser.io(file).parse file.close @shared_table = [] if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml')) file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml')) - @sharedstring_doc = REXML::Document.new file + @sharedstring_doc = XML::Parser.io(file).parse file.close read_shared_strings(@sharedstring_doc) end @styles_table = [] + @style_definitions = Array.new { |h,k| h[k] = {} } if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml')) file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml')) - @styles_doc = REXML::Document.new file + @styles_doc = XML::Parser.io(file).parse file.close read_styles(@styles_doc) end @sheet_doc = [] @sheet_files.each_with_index do |item, i| file = File.new(item) - @sheet_doc[i] = REXML::Document.new file + @sheet_doc[i] = XML::Parser.io(file).parse file.close end ensure #if ENV["roo_local"] != "thomas-p" FileUtils::rm_r(@tmpdir) @@ -161,11 +160,10 @@ elsif celltype(row,col,sheet) == :datetime date_part,time_part = @cell[sheet][[row,col]].split(' ') yyyy,mm,dd = date_part.split('-') hh,mi,ss = time_part.split(':') return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i) - end @cell[sheet][[row,col]] end # Returns the formula at (row,col). @@ -187,10 +185,37 @@ sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] row,col = normalize(row,col) formula(row,col) != nil end + + class Font + attr_accessor :bold, :italic, :underline + + def bold? + @bold == true + end + + def italic? + @italic == true + end + + def underline? + @underline == true + end + end + + # Given a cell, return the cell's style + def font(row, col, sheet=nil) + sheet = @default_sheet unless sheet + read_cells(sheet) unless @cells_read[sheet] + row,col = normalize(row,col) + s_attribute = @s_attribute[sheet][[row,col]] + s_attribute ||= 0 + s_attribute = s_attribute.to_i + @style_definitions[s_attribute] + end # set a cell to a certain value # (this will not be saved back to the spreadsheet file!) def set(row,col,value,sheet=nil) #:nodoc: sheet = @default_sheet unless sheet @@ -258,22 +283,15 @@ end # returns an array of sheet names in the spreadsheet def sheets return_sheets = [] - @workbook_doc.each_element do |workbook| - workbook.each_element do |el| - if el.name == "sheets" - el.each_element do |sheet| - return_sheets << sheet.attributes['name'] - end - end - end + @workbook_doc.find("//*[local-name()='sheet']").each do |sheet| + return_sheets << sheet.attributes.to_h['name'] end return_sheets end - # shows the internal representation of all cells # for debugging purposes def to_s(sheet=nil) sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] @@ -374,86 +392,66 @@ sheet = @default_sheet unless sheet sheet_found = false raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil raise RangeError unless self.sheets.include? sheet n = self.sheets.index(sheet) - @sheet_doc[n].each_element do |worksheet| - worksheet.each_element do |elem| - if elem.name == 'sheetData' - elem.each_element do |sheetdata| - if sheetdata.name == 'row' - sheetdata.each_element do |row| - if row.name == 'c' - if row.attributes['t'] == 's' - tmp_type = :shared - else - s_attribute = row.attributes['s'] - format = attribute2format(s_attribute) - tmp_type = format2type(format) - end - formula = nil - row.each_element do |cell| -# puts "cell.name: #{cell.name}" if cell.text.include? "22606.5120" -# puts "cell.text: #{cell.text}" if cell.text.include? "22606.5120" - if cell.name == 'f' - formula = cell.text - end - if cell.name == 'v' - #puts "tmp_type: #{tmp_type}" if cell.text.include? "22606.5120" - #puts cell.name - if tmp_type == :time or tmp_type == :datetime #2008-07-26 - #p cell.text - # p cell.text.to_f if cell.text.include? "22606.5120" - if cell.text.to_f >= 1.0 # 2008-07-26 - # puts ">= 1.0" if cell.text.include? "22606.5120" - # puts "cell.text.to_f: #{cell.text.to_f}" if cell.text.include? "22606.5120" - #puts "cell.text.to_f.floor: #{cell.text.to_f.floor}" if cell.text.include? "22606.5120" - if (cell.text.to_f - cell.text.to_f.floor).abs > 0.000001 #TODO: - # puts "abs ist groesser" if cell.text.include? "22606.5120" - # @cell[sheet][key] = DateTime.parse(tr.attributes['date-value']) - tmp_type = :datetime - - else - #puts ":date" - tmp_type = :date # 2008-07-26 - end - else - #puts "<1.0" - end # 2008-07-26 - end # 2008-07-26 - excelx_type = [:numeric_or_formula,format] - excelx_value = cell.text - if tmp_type == :shared - vt = :string - str_v = @shared_table[cell.text.to_i] - excelx_type = :string - elsif tmp_type == :date - vt = :date - v = cell.text - elsif tmp_type == :time - vt = :time - v = cell.text - elsif tmp_type == :datetime - vt = :datetime - v = cell.text - elsif tmp_type == :formula - vt = :formula - v = cell.text.to_f #TODO: !!!! - else - vt = :float - v = cell.text - end - #puts "vt: #{vt}" if cell.text.include? "22606.5120" - x,y = split_coordinate(row.attributes['r']) - tr=nil #TODO: ???s - set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute) - end - end - end + @sheet_doc[n].find("//*[local-name()='c']").each do |c| + s_attribute = c.attributes.to_h['s'].to_i # should be here + if (c.attributes.to_h['t'] == 's') + tmp_type = :shared + elsif (c.attributes.to_h['t'] == 'b') + tmp_type = :boolean + else + # s_attribute = c.attributes.to_h['s'].to_i # was here + format = attribute2format(s_attribute) + tmp_type = format2type(format) + end + formula = nil + c.each_element do |cell| + if cell.name == 'f' + formula = cell.content + end + if cell.name == 'v' + if tmp_type == :time or tmp_type == :datetime + if cell.content.to_f >= 1.0 + if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001 + tmp_type = :datetime + else + tmp_type = :date end - end + else + end end + excelx_type = [:numeric_or_formula,format] + excelx_value = cell.content + if tmp_type == :shared + vt = :string + str_v = @shared_table[cell.content.to_i] + excelx_type = :string + elsif tmp_type == :boolean + vt = :boolean + cell.content.to_i == 1 ? v = 'TRUE' : v = 'FALSE' + elsif tmp_type == :date + vt = :date + v = cell.content + elsif tmp_type == :time + vt = :time + v = cell.content + elsif tmp_type == :datetime + vt = :datetime + v = cell.content + elsif tmp_type == :formula + vt = :formula + v = cell.content.to_f #TODO: !!!! + else + vt = :float + v = cell.content + end + #puts "vt: #{vt}" if cell.text.include? "22606.5120" + x,y = split_coordinate(c.attributes.to_h['r']) + tr=nil #TODO: ???s + set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute) end end end sheet_found = true #TODO: if !sheet_found @@ -465,21 +463,13 @@ # Checks if the default_sheet exists. If not an RangeError exception is # raised def check_default_sheet sheet_found = false raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil - @workbook_doc.each_element do |workbook| - workbook.each_element do |el| - if el.name == "sheets" - el.each_element do |sheet| - if @default_sheet == sheet.attributes['name'] - sheet_found = true - end - end - end - end - end + + sheet_found = true if sheets.include?(@default_sheet) + if ! sheet_found raise RangeError, "sheet '#{@default_sheet}' not found" end end @@ -535,49 +525,64 @@ @cell_type[sheet][[row,col]] = type end # read the shared strings xml document def read_shared_strings(doc) - doc.each_element do |sst| - if sst.name == 'sst' - sst.each_element do |si| - if si.name == 'si' - si.each_element do |elem| - if elem.name == 't' - @shared_table << elem.text - end + doc.find("//*[local-name()='si']").each do |si| + shared_table_entry = '' + si.each_element do |elem| + if (elem.name == 'r') + elem.each_element do |r_elem| + if (r_elem.name == 't') + shared_table_entry << r_elem.content end end end + if (elem.name == 't') + shared_table_entry = elem.content + end end + @shared_table << shared_table_entry end end # read the styles elements of an excelx document def read_styles(doc) @numFmts = [] @cellXfs = [] - doc.each_element do |e1| - if e1.name == "styleSheet" - e1.each_element do |e2| - if e2.name == "numFmts" - e2.each_element do |e3| - if e3.name == 'numFmt' - numFmtId = e3.attributes['numFmtId'] - formatCode = e3.attributes['formatCode'] - @numFmts << [numFmtId, formatCode] + fonts = [] + + doc.find("//*[local-name()='numFmt']").each do |numFmt| + numFmtId = numFmt.attributes.to_h['numFmtId'] + formatCode = numFmt.attributes.to_h['formatCode'] + @numFmts << [numFmtId, formatCode] + end + doc.find("//*[local-name()='fonts']").each do |fonts_el| + fonts_el.each_element do |font_el| + if font_el.name == 'font' + font = Excelx::Font.new + font_el.each_element do |font_sub_el| + case font_sub_el.name + when 'b' + font.bold = true + when 'i' + font.italic = true + when 'u' + font.underline = true end - end - elsif e2.name == "cellXfs" - e2.each_element do |e3| - if e3.name == 'xf' - numFmtId = e3.attributes['numFmtId'] - @cellXfs << [numFmtId] - end - end end + fonts << font end end + end + + doc.find("//*[local-name()='cellXfs']").each do |xfs| + xfs.each do |xf| + numFmtId = xf.attributes.to_h['numFmtId'] + @cellXfs << [numFmtId] + fontId = xf.attributes.to_h['fontId'].to_i + @style_definitions << fonts[fontId] + end end end # convert internal excelx attribute to a format def attribute2format(s)