require 'date' require 'nokogiri' class Roo::Excelx < Roo::Base module Format EXCEPTIONAL_FORMATS = { 'h:mm am/pm' => :date, 'h:mm:ss am/pm' => :date, } STANDARD_FORMATS = { 0 => 'General', 1 => '0', 2 => '0.00', 3 => '#,##0', 4 => '#,##0.00', 9 => '0%', 10 => '0.00%', 11 => '0.00E+00', 12 => '# ?/?', 13 => '# ??/??', 14 => 'mm-dd-yy', 15 => 'd-mmm-yy', 16 => 'd-mmm', 17 => 'mmm-yy', 18 => 'h:mm AM/PM', 19 => 'h:mm:ss AM/PM', 20 => 'h:mm', 21 => 'h:mm:ss', 22 => 'm/d/yy h:mm', 37 => '#,##0 ;(#,##0)', 38 => '#,##0 ;[Red](#,##0)', 39 => '#,##0.00;(#,##0.00)', 40 => '#,##0.00;[Red](#,##0.00)', 45 => 'mm:ss', 46 => '[h]:mm:ss', 47 => 'mmss.0', 48 => '##0.0E+0', 49 => '@', } def to_type(format) format = format.to_s.downcase if type = EXCEPTIONAL_FORMATS[format] type elsif format.include?('#') :float elsif format.include?('d') || format.include?('y') if format.include?('h') || format.include?('s') :datetime else :date end elsif format.include?('h') || format.include?('s') :time elsif format.include?('%') :percentage else :float end end module_function :to_type end # initialization and opening of a spreadsheet file # values for packed: :zip def initialize(filename, options = {}, deprecated_file_warning = :error) if Hash === options packed = options[:packed] file_warning = options[:file_warning] || :error else warn 'Supplying `packed` or `file_warning` as separate arguments to `Roo::Excelx.new` is deprected. Use an options hash instead.' packed = options file_warning = deprecated_file_warning end file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed) make_tmpdir do |tmpdir| filename = download_uri(filename, tmpdir) if uri?(filename) filename = unzip(filename, tmpdir) if packed == :zip @filename = filename unless File.file?(@filename) raise IOError, "file #{@filename} does not exist" end @comments_files = Array.new extract_content(tmpdir, @filename) @workbook_doc = load_xml(File.join(tmpdir, "roo_workbook.xml")) @shared_table = [] if File.exist?(File.join(tmpdir, 'roo_sharedStrings.xml')) @sharedstring_doc = load_xml(File.join(tmpdir, 'roo_sharedStrings.xml')) read_shared_strings(@sharedstring_doc) end @styles_table = [] @style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} } if File.exist?(File.join(tmpdir, 'roo_styles.xml')) @styles_doc = load_xml(File.join(tmpdir, 'roo_styles.xml')) read_styles(@styles_doc) end @sheet_doc = @sheet_files.map do |item| load_xml(item) end @comments_doc = @comments_files.map do |item| load_xml(item) end end super(filename, options) @formula = Hash.new @excelx_type = Hash.new @excelx_value = Hash.new @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt @comment = Hash.new @comments_read = Hash.new end def method_missing(m,*args) # is method name a label name read_labels if @label.has_key?(m.to_s) sheet ||= @default_sheet read_cells(sheet) row,col = label(m.to_s) cell(row,col) else # call super for methods like #a1 super end end # Returns the content of a spreadsheet-cell. # (1,1) is the upper left corner. # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the # cell at the first line and first row. def cell(row, col, sheet=nil) sheet ||= @default_sheet read_cells(sheet) row,col = normalize(row,col) if celltype(row,col,sheet) == :date yyyy,mm,dd = @cell[sheet][[row,col]].split('-') return Date.new(yyyy.to_i,mm.to_i,dd.to_i) elsif celltype(row,col,sheet) == :datetime date_part,time_part = @cell[sheet][[row,col]].split(' ') yyyy,mm,dd = date_part.split('-') hh,mi,ss = time_part.split(':') return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i) end @cell[sheet][[row,col]] end # Returns the formula at (row,col). # Returns nil if there is no formula. # The method #formula? checks if there is a formula. def formula(row,col,sheet=nil) sheet ||= @default_sheet read_cells(sheet) row,col = normalize(row,col) @formula[sheet][[row,col]] && @formula[sheet][[row,col]] end alias_method :formula?, :formula # returns each formula in the selected sheet as an array of elements # [row, col, formula] def formulas(sheet=nil) sheet ||= @default_sheet read_cells(sheet) if @formula[sheet] @formula[sheet].each.collect do |elem| [elem[0][0], elem[0][1], elem[1]] end else [] end end class Font attr_accessor :bold, :italic, :underline def bold? @bold == true end def italic? @italic == true end def underline? @underline == true end end # Given a cell, return the cell's style def font(row, col, sheet=nil) sheet ||= @default_sheet read_cells(sheet) row,col = normalize(row,col) s_attribute = @s_attribute[sheet][[row,col]] s_attribute ||= 0 s_attribute = s_attribute.to_i @style_definitions[s_attribute] end # returns the type of a cell: # * :float # * :string, # * :date # * :percentage # * :formula # * :time # * :datetime def celltype(row,col,sheet=nil) sheet ||= @default_sheet read_cells(sheet) row,col = normalize(row,col) if @formula[sheet][[row,col]] return :formula else @cell_type[sheet][[row,col]] end end # returns the internal type of an excel cell # * :numeric_or_formula # * :string # Note: this is only available within the Excelx class def excelx_type(row,col,sheet=nil) sheet ||= @default_sheet read_cells(sheet) row,col = normalize(row,col) return @excelx_type[sheet][[row,col]] end # returns the internal value of an excelx cell # Note: this is only available within the Excelx class def excelx_value(row,col,sheet=nil) sheet ||= @default_sheet read_cells(sheet) row,col = normalize(row,col) return @excelx_value[sheet][[row,col]] end # returns the internal format of an excel cell def excelx_format(row,col,sheet=nil) sheet ||= @default_sheet read_cells(sheet) row,col = normalize(row,col) s = @s_attribute[sheet][[row,col]] attribute2format(s).to_s end # returns an array of sheet names in the spreadsheet def sheets @workbook_doc.xpath("//xmlns:sheet").map do |sheet| sheet['name'] end end # shows the internal representation of all cells # for debugging purposes def to_s(sheet=nil) sheet ||= @default_sheet read_cells(sheet) @cell[sheet].inspect end # returns the row,col values of the labelled cell # (nil,nil) if label is not defined def label(labelname) read_labels if @label.empty? || !@label.has_key?(labelname) return nil,nil,nil else return @label[labelname][1].to_i, Roo::Base.letter_to_number(@label[labelname][2]), @label[labelname][0] end end # Returns an array which all labels. Each element is an array with # [labelname, [row,col,sheetname]] def labels # sheet ||= @default_sheet # read_cells(sheet) read_labels @label.map do |label| [ label[0], # name [ label[1][1].to_i, # row Roo::Base.letter_to_number(label[1][2]), # column label[1][0], # sheet ] ] end end # returns the comment at (row/col) # nil if there is no comment def comment(row,col,sheet=nil) sheet ||= @default_sheet #read_cells(sheet) read_comments(sheet) unless @comments_read[sheet] row,col = normalize(row,col) return nil unless @comment[sheet] @comment[sheet][[row,col]] end # true, if there is a comment def comment?(row,col,sheet=nil) sheet ||= @default_sheet # read_cells(sheet) read_comments(sheet) unless @comments_read[sheet] row,col = normalize(row,col) comment(row,col) != nil end # returns each comment in the selected sheet as an array of elements # [row, col, comment] def comments(sheet=nil) sheet ||= @default_sheet read_comments(sheet) unless @comments_read[sheet] if @comment[sheet] @comment[sheet].each.collect do |elem| [elem[0][0],elem[0][1],elem[1]] end else [] end end private # helper function to set the internal representation of cells def set_cell_values(sheet,x,y,i,v,value_type,formula, excelx_type=nil, excelx_value=nil, s_attribute=nil) key = [y,x+i] @cell_type[sheet] ||= {} @cell_type[sheet][key] = value_type @formula[sheet] ||= {} @formula[sheet][key] = formula if formula @cell[sheet] ||= {} @cell[sheet][key] = case @cell_type[sheet][key] when :float v.to_f when :string v when :date (base_date+v.to_i).strftime("%Y-%m-%d") when :datetime (base_date+v.to_f).strftime("%Y-%m-%d %H:%M:%S") when :percentage v.to_f when :time v.to_f*(24*60*60) else v end @excelx_type[sheet] ||= {} @excelx_type[sheet][key] = excelx_type @excelx_value[sheet] ||= {} @excelx_value[sheet][key] = excelx_value @s_attribute[sheet] ||= {} @s_attribute[sheet][key] = s_attribute end # read all cells in the selected sheet def read_cells(sheet=nil) sheet ||= @default_sheet validate_sheet!(sheet) return if @cells_read[sheet] @sheet_doc[sheets.index(sheet)].xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row/xmlns:c").each do |c| s_attribute = c['s'].to_i # should be here # c: # 22606 # , format: , tmp_type: float value_type = case c['t'] when 's' :shared when 'b' :boolean # 2011-02-25 BEGIN when 'str' :string # 2011-02-25 END # 2011-09-15 BEGIN when 'inlineStr' :inlinestr # 2011-09-15 END else format = attribute2format(s_attribute) Format.to_type(format) end formula = nil c.children.each do |cell| case cell.name when 'is' cell.children.each do |is| if is.name == 't' inlinestr_content = is.content value_type = :string v = inlinestr_content excelx_type = :string y, x = Roo::Base.split_coordinate(c['r']) excelx_value = inlinestr_content #cell.content set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute) end end when 'f' formula = cell.content when 'v' if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0 value_type = if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001 :datetime else :date end end excelx_type = [:numeric_or_formula,format.to_s] excelx_value = cell.content v = case value_type when :shared value_type = :string excelx_type = :string @shared_table[cell.content.to_i] when :boolean (cell.content.to_i == 1 ? 'TRUE' : 'FALSE') when :date cell.content when :time cell.content when :datetime cell.content when :formula cell.content.to_f #TODO: !!!! when :string excelx_type = :string cell.content else value_type = :float cell.content end y, x = Roo::Base.split_coordinate(c['r']) set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute) end end end @cells_read[sheet] = true # begin comments =begin Datei xl/comments1.xml Kommentar fuer B4 Kommentar fuer B5 =end =begin if @comments_doc[self.sheets.index(sheet)] read_comments(sheet) end =end #end comments end # Reads all comments from a sheet def read_comments(sheet=nil) sheet ||= @default_sheet validate_sheet!(sheet) n = self.sheets.index(sheet) return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> @comments_doc[n].xpath("//xmlns:comments/xmlns:commentList/xmlns:comment").each do |comment| ref = comment.attributes['ref'].to_s row,col = Roo::Base.split_coordinate(ref) comment.xpath('./xmlns:text/xmlns:r/xmlns:t').each do |text| @comment[sheet] ||= {} @comment[sheet][[row,col]] = text.text end end @comments_read[sheet] = true end def read_labels @label ||= Hash[@workbook_doc.xpath("//xmlns:definedName").map do |defined_name| # "Sheet1!$C$5" sheet, coordinates = defined_name.text.split('!$', 2) col,row = coordinates.split('$') [defined_name['name'], [sheet,row,col]] end] end # Extracts all needed files from the zip file def process_zipfile(tmpdir, zipfilename, zip, path='') @sheet_files = [] Zip::ZipFile.open(zipfilename) {|zf| zf.entries.each {|entry| if entry.to_s.end_with?('workbook.xml') open(tmpdir+'/'+'roo_workbook.xml','wb') {|f| f << zip.read(entry) } end # if entry.to_s.end_with?('sharedStrings.xml') # at least one application creates this file with another (incorrect?) # casing. It doesn't hurt, if we ignore here the correct casing - there # won't be both names in the archive. # Changed the casing of all the following filenames. if entry.to_s.downcase.end_with?('sharedstrings.xml') open(tmpdir+'/'+'roo_sharedStrings.xml','wb') {|f| f << zip.read(entry) } end if entry.to_s.downcase.end_with?('styles.xml') open(tmpdir+'/'+'roo_styles.xml','wb') {|f| f << zip.read(entry) } end if entry.to_s.downcase =~ /sheet([0-9]+).xml$/ nr = $1 open(tmpdir+'/'+"roo_sheet#{nr}",'wb') {|f| f << zip.read(entry) } @sheet_files[nr.to_i-1] = tmpdir+'/'+"roo_sheet#{nr}" end if entry.to_s.downcase =~ /comments([0-9]+).xml$/ nr = $1 open(tmpdir+'/'+"roo_comments#{nr}",'wb') {|f| f << zip.read(entry) } @comments_files[nr.to_i-1] = tmpdir+'/'+"roo_comments#{nr}" end } } # return end # extract files from the zip file def extract_content(tmpdir, zipfilename) Zip::ZipFile.open(@filename) do |zip| process_zipfile(tmpdir, zipfilename,zip) end end # read the shared strings xml document def read_shared_strings(doc) doc.xpath("/xmlns:sst/xmlns:si").each do |si| shared_table_entry = '' si.children.each do |elem| if elem.name == 'r' and elem.children elem.children.each do |r_elem| if r_elem.name == 't' shared_table_entry << r_elem.content end end end if elem.name == 't' shared_table_entry = elem.content end end @shared_table << shared_table_entry end end # read the styles elements of an excelx document def read_styles(doc) @cellXfs = [] @numFmts = Hash[doc.xpath("//xmlns:numFmt").map do |numFmt| [numFmt['numFmtId'], numFmt['formatCode']] end] fonts = doc.xpath("//xmlns:fonts/xmlns:font").map do |font_el| Font.new.tap do |font| font.bold = !font_el.xpath('./xmlns:b').empty? font.italic = !font_el.xpath('./xmlns:i').empty? font.underline = !font_el.xpath('./xmlns:u').empty? end end doc.xpath("//xmlns:cellXfs").each do |xfs| xfs.children.each do |xf| @cellXfs << xf['numFmtId'] @style_definitions << fonts[xf['fontId'].to_i] end end end # convert internal excelx attribute to a format def attribute2format(s) id = @cellXfs[s.to_i] @numFmts[id] || Format::STANDARD_FORMATS[id.to_i] end def base_date @base_date ||= read_base_date end # Default to 1900 (minus one day due to excel quirk) but use 1904 if # it's set in the Workbook's workbookPr # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx def read_base_date base_date = Date.new(1899,12,30) @workbook_doc.xpath("//xmlns:workbookPr").map do |workbookPr| if workbookPr["date1904"] && workbookPr["date1904"] =~ /true|1/i base_date = Date.new(1904,01,01) end end base_date end end # class