lib/roo/excelx.rb in roo-1.11.2 vs lib/roo/excelx.rb in roo-1.12.0

- old
+ new

@@ -1,10 +1,9 @@ -require 'fileutils' require 'date' require 'nokogiri' -class Roo::Excelx < Roo::GenericSpreadsheet +class Roo::Excelx < Roo::Base module Format EXCEPTIONAL_FORMATS = { 'h:mm am/pm' => :date, 'h:mm:ss am/pm' => :date, } @@ -64,60 +63,51 @@ module_function :to_type end # initialization and opening of a spreadsheet file # values for packed: :zip - def initialize(filename, packed=nil, file_warning = :error) #, create = false) + def initialize(filename, options = {}, deprecated_file_warning = :error) + if Hash === options + packed = options[:packed] + file_warning = options[:file_warning] || :error + else + warn 'Supplying `packed` or `file_warning` as separate arguments to `Roo::Excelx.new` is deprected. Use an options hash instead.' + packed = options + file_warning = deprecated_file_warning + end + file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed) make_tmpdir do |tmpdir| - filename = open_from_uri(filename, tmpdir) if uri?(filename) + filename = download_uri(filename, tmpdir) if uri?(filename) filename = unzip(filename, tmpdir) if packed == :zip - @cells_read = Hash.new @filename = filename unless File.file?(@filename) raise IOError, "file #{@filename} does not exist" end @comments_files = Array.new extract_content(tmpdir, @filename) - @workbook_doc = File.open(File.join(tmpdir, "roo_workbook.xml")) do |file| - Nokogiri::XML(file) - end + @workbook_doc = load_xml(File.join(tmpdir, "roo_workbook.xml")) @shared_table = [] if File.exist?(File.join(tmpdir, 'roo_sharedStrings.xml')) - @sharedstring_doc = File.open(File.join(tmpdir, 'roo_sharedStrings.xml')) do |file| - Nokogiri::XML(file) - end + @sharedstring_doc = load_xml(File.join(tmpdir, 'roo_sharedStrings.xml')) read_shared_strings(@sharedstring_doc) end @styles_table = [] @style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} } if File.exist?(File.join(tmpdir, 'roo_styles.xml')) - @styles_doc = File.open(File.join(tmpdir, 'roo_styles.xml')) do |file| - Nokogiri::XML(file) - end + @styles_doc = load_xml(File.join(tmpdir, 'roo_styles.xml')) read_styles(@styles_doc) end @sheet_doc = @sheet_files.map do |item| - File.open(item) do |file| - Nokogiri::XML(file) - end + load_xml(item) end @comments_doc = @comments_files.map do |item| - File.open(item) do |file| - Nokogiri::XML(file) - end + load_xml(item) end end - @default_sheet = self.sheets.first - @cell = Hash.new - @cell_type = Hash.new + super(filename, options) @formula = Hash.new - @first_row = Hash.new - @last_row = Hash.new - @first_column = Hash.new - @last_column = Hash.new - @header_line = 1 @excelx_type = Hash.new @excelx_value = Hash.new @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt @comment = Hash.new @comments_read = Hash.new @@ -126,11 +116,11 @@ def method_missing(m,*args) # is method name a label name read_labels if @label.has_key?(m.to_s) sheet ||= @default_sheet - read_cells(sheet) unless @cells_read[sheet] + read_cells(sheet) row,col = label(m.to_s) cell(row,col) else # call super for methods like #a1 super @@ -141,11 +131,11 @@ # (1,1) is the upper left corner. # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the # cell at the first line and first row. def cell(row, col, sheet=nil) sheet ||= @default_sheet - read_cells(sheet) unless @cells_read[sheet] + read_cells(sheet) row,col = normalize(row,col) if celltype(row,col,sheet) == :date yyyy,mm,dd = @cell[sheet][[row,col]].split('-') return Date.new(yyyy.to_i,mm.to_i,dd.to_i) elsif celltype(row,col,sheet) == :datetime @@ -160,32 +150,21 @@ # Returns the formula at (row,col). # Returns nil if there is no formula. # The method #formula? checks if there is a formula. def formula(row,col,sheet=nil) sheet ||= @default_sheet - read_cells(sheet) unless @cells_read[sheet] + read_cells(sheet) row,col = normalize(row,col) - if @formula[sheet][[row,col]] == nil - return nil - else - return @formula[sheet][[row,col]] - end + @formula[sheet][[row,col]] && @formula[sheet][[row,col]] end + alias_method :formula?, :formula - # true, if there is a formula - def formula?(row,col,sheet=nil) - sheet ||= @default_sheet - read_cells(sheet) unless @cells_read[sheet] - row,col = normalize(row,col) - formula(row,col) != nil - end - # returns each formula in the selected sheet as an array of elements # [row, col, formula] def formulas(sheet=nil) sheet ||= @default_sheet - read_cells(sheet) unless @cells_read[sheet] + read_cells(sheet) if @formula[sheet] @formula[sheet].each.collect do |elem| [elem[0][0], elem[0][1], elem[1]] end else @@ -210,11 +189,11 @@ end # Given a cell, return the cell's style def font(row, col, sheet=nil) sheet ||= @default_sheet - read_cells(sheet) unless @cells_read[sheet] + read_cells(sheet) row,col = normalize(row,col) s_attribute = @s_attribute[sheet][[row,col]] s_attribute ||= 0 s_attribute = s_attribute.to_i @style_definitions[s_attribute] @@ -228,11 +207,11 @@ # * :formula # * :time # * :datetime def celltype(row,col,sheet=nil) sheet ||= @default_sheet - read_cells(sheet) unless @cells_read[sheet] + read_cells(sheet) row,col = normalize(row,col) if @formula[sheet][[row,col]] return :formula else @cell_type[sheet][[row,col]] @@ -243,28 +222,28 @@ # * :numeric_or_formula # * :string # Note: this is only available within the Excelx class def excelx_type(row,col,sheet=nil) sheet ||= @default_sheet - read_cells(sheet) unless @cells_read[sheet] + read_cells(sheet) row,col = normalize(row,col) return @excelx_type[sheet][[row,col]] end # returns the internal value of an excelx cell # Note: this is only available within the Excelx class def excelx_value(row,col,sheet=nil) sheet ||= @default_sheet - read_cells(sheet) unless @cells_read[sheet] + read_cells(sheet) row,col = normalize(row,col) return @excelx_value[sheet][[row,col]] end # returns the internal format of an excel cell def excelx_format(row,col,sheet=nil) sheet ||= @default_sheet - read_cells(sheet) unless @cells_read[sheet] + read_cells(sheet) row,col = normalize(row,col) s = @s_attribute[sheet][[row,col]] attribute2format(s).to_s end @@ -277,11 +256,11 @@ # shows the internal representation of all cells # for debugging purposes def to_s(sheet=nil) sheet ||= @default_sheet - read_cells(sheet) unless @cells_read[sheet] + read_cells(sheet) @cell[sheet].inspect end # returns the row,col values of the labelled cell # (nil,nil) if label is not defined @@ -289,45 +268,45 @@ read_labels if @label.empty? || !@label.has_key?(labelname) return nil,nil,nil else return @label[labelname][1].to_i, - Roo::GenericSpreadsheet.letter_to_number(@label[labelname][2]), + Roo::Base.letter_to_number(@label[labelname][2]), @label[labelname][0] end end # Returns an array which all labels. Each element is an array with # [labelname, [row,col,sheetname]] def labels # sheet ||= @default_sheet - # read_cells(sheet) unless @cells_read[sheet] + # read_cells(sheet) read_labels @label.map do |label| [ label[0], # name [ label[1][1].to_i, # row - Roo::GenericSpreadsheet.letter_to_number(label[1][2]), # column + Roo::Base.letter_to_number(label[1][2]), # column label[1][0], # sheet ] ] end end # returns the comment at (row/col) # nil if there is no comment def comment(row,col,sheet=nil) sheet ||= @default_sheet - #read_cells(sheet) unless @cells_read[sheet] + #read_cells(sheet) read_comments(sheet) unless @comments_read[sheet] row,col = normalize(row,col) return nil unless @comment[sheet] @comment[sheet][[row,col]] end # true, if there is a comment def comment?(row,col,sheet=nil) sheet ||= @default_sheet - # read_cells(sheet) unless @cells_read[sheet] + # read_cells(sheet) read_comments(sheet) unless @comments_read[sheet] row,col = normalize(row,col) comment(row,col) != nil end @@ -363,13 +342,13 @@ when :float v.to_f when :string v when :date - (Date.new(1899,12,30)+v.to_i).strftime("%Y-%m-%d") + (base_date+v.to_i).strftime("%Y-%m-%d") when :datetime - (DateTime.new(1899,12,30)+v.to_f).strftime("%Y-%m-%d %H:%M:%S") + (base_date+v.to_f).strftime("%Y-%m-%d %H:%M:%S") when :percentage v.to_f when :time v.to_f*(24*60*60) else @@ -385,10 +364,12 @@ # read all cells in the selected sheet def read_cells(sheet=nil) sheet ||= @default_sheet validate_sheet!(sheet) + return if @cells_read[sheet] + @sheet_doc[sheets.index(sheet)].xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row/xmlns:c").each do |c| s_attribute = c['s'].to_i # should be here # c: <c r="A5" s="2"> # <v>22606</v> # </c>, format: , tmp_type: float @@ -418,11 +399,11 @@ if is.name == 't' inlinestr_content = is.content value_type = :string v = inlinestr_content excelx_type = :string - y, x = Roo::GenericSpreadsheet.split_coordinate(c['r']) + y, x = Roo::Base.split_coordinate(c['r']) excelx_value = inlinestr_content #cell.content set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute) end end when 'f' @@ -459,11 +440,11 @@ cell.content else value_type = :float cell.content end - y, x = Roo::GenericSpreadsheet.split_coordinate(c['r']) + y, x = Roo::Base.split_coordinate(c['r']) set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute) end end end @cells_read[sheet] = true @@ -517,11 +498,11 @@ validate_sheet!(sheet) n = self.sheets.index(sheet) return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> @comments_doc[n].xpath("//xmlns:comments/xmlns:commentList/xmlns:comment").each do |comment| ref = comment.attributes['ref'].to_s - row,col = Roo::GenericSpreadsheet.split_coordinate(ref) + row,col = Roo::Base.split_coordinate(ref) comment.xpath('./xmlns:text/xmlns:r/xmlns:t').each do |text| @comment[sheet] ||= {} @comment[sheet][[row,col]] = text.text end end @@ -633,8 +614,25 @@ # convert internal excelx attribute to a format def attribute2format(s) id = @cellXfs[s.to_i] @numFmts[id] || Format::STANDARD_FORMATS[id.to_i] + end + + def base_date + @base_date ||= read_base_date + end + + # Default to 1900 (minus one day due to excel quirk) but use 1904 if + # it's set in the Workbook's workbookPr + # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx + def read_base_date + base_date = Date.new(1899,12,30) + @workbook_doc.xpath("//xmlns:workbookPr").map do |workbookPr| + if workbookPr["date1904"] && workbookPr["date1904"] =~ /true|1/i + base_date = Date.new(1904,01,01) + end + end + base_date end end # class