lib/roo/excel.rb in roo-1.2.3 vs lib/roo/excel.rb in roo-1.3.5

- old
+ new

@@ -1,18 +1,35 @@ -require 'rubygems' -gem 'parseexcel', '>= 0.5.2' -require 'parseexcel' -CHARGUESS = false -require 'charguess' if CHARGUESS +require 'spreadsheet' +CHARGUESS = begin + require 'charguess' + true +rescue LoadError => e + false +end -module Spreadsheet # :nodoc - module ParseExcel - class Worksheet - include Enumerable - attr_reader :min_row, :max_row, :min_col, :max_col - end +# ruby-spreadsheet has a font object so we're extending it +# with our own functionality but still providing full access +# to the user for other font information +module ExcelFontExtensions + def bold?(*args) + #From ruby-spreadsheet doc: 100 <= weight <= 1000, bold => 700, normal => 400 + case weight + when 700 + true + else + false + end end + + def italic? + italic + end + + def underline? + underline != :none + end + end # Class for handling Excel-Spreadsheets class Excel < GenericSpreadsheet @@ -35,11 +52,11 @@ file_type_check(filename,'.xls','an Excel') @filename = filename unless File.file?(@filename) raise IOError, "file #{@filename} does not exist" end - @workbook = Spreadsheet::ParseExcel.parse(filename) + @workbook = Spreadsheet.open(filename) @default_sheet = nil # no need to set default_sheet if there is only one sheet in the document if self.sheets.size == 1 @default_sheet = self.sheets.first end @@ -55,29 +72,18 @@ @last_row = Hash.new @first_column = Hash.new @last_column = Hash.new @header_line = 1 @cells_read = Hash.new + @fonts = Hash.new end # returns an array of sheet names in the spreadsheet def sheets result = [] - #0.upto(@workbook.worksheets.size - 1) do |i| # spreadsheet - 0.upto(@workbook.sheet_count - 1) do |i| # parseexcel - # TODO: is there a better way to do conversion? - if CHARGUESS - encoding = CharGuess::guess(@workbook.worksheet(i).name) - encoding = 'unicode' unless encoding - - - result << Iconv.new('utf-8',encoding).iconv( - @workbook.worksheet(i).name - ) - else - result << platform_specific_iconv(@workbook.worksheet(i).name) - end + @workbook.worksheets.each do |worksheet| + result << normalize_string(worksheet.name) end return result end # returns the content of a cell. The upper left corner is (1,1) or ('A',1) @@ -167,10 +173,18 @@ # returns NO formulas in excel spreadsheets def formulas(sheet=nil) raise EXCEL_NO_FORMULAS end + # Given a cell, return the cell's font + def font(row, col, sheet=nil) + sheet = @default_sheet unless sheet + read_cells(sheet) unless @cells_read[sheet] + row,col = normalize(row,col) + @fonts[sheet][[row,col]] + end + # shows the internal representation of all cells # mainly for debugging purposes def to_s(sheet=nil) sheet = @default_sheet unless sheet read_cells(sheet) unless @cells_read[sheet] @@ -216,18 +230,14 @@ end # converts name of a sheet to index (0,1,2,..) def sheet_no(name) return name-1 if name.kind_of?(Fixnum) - 0.upto(@workbook.sheet_count - 1) do |i| - #0.upto(@workbook.worksheets.size - 1) do |i| - # TODO: is there a better way to do conversion? - return i if name == platform_specific_iconv( - @workbook.worksheet(i).name) - #Iconv.new('utf-8','unicode').iconv( - # @workbook.worksheet(i).name - # ) + i = 0 + @workbook.worksheets.each do |worksheet| + return i if name == normalize_string(worksheet.name) + i += 1 end raise StandardError, "sheet '#{name}' not found" end def empty_row?(row) @@ -247,11 +257,20 @@ content = true end } ! content end - + + def normalize_string(value) + value = every_second_null?(value) ? remove_every_second_null(value) : value + if CHARGUESS && encoding = CharGuess::guess(value) + Iconv.new('utf-8', encoding) + else + platform_specific_iconv(value) + end + end + def platform_specific_iconv(value) case RUBY_PLATFORM.downcase when /darwin/ result = Iconv.new('utf-8','utf-8').iconv(value) when /solaris/ @@ -289,23 +308,26 @@ end result end # helper function to set the internal representation of cells - def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v) + def set_cell_values(sheet,row,col,i,v,vt,formula,tr,font) #key = "#{y},#{x+i}" - key = [y,x+i] + key = [row,col+i] @cell_type[sheet] = {} unless @cell_type[sheet] @cell_type[sheet][key] = vt @formula[sheet] = {} unless @formula[sheet] @formula[sheet][key] = formula if formula @cell[sheet] = {} unless @cell[sheet] + @fonts[sheet] = {} unless @fonts[sheet] + @fonts[sheet][key] = font + case vt # @cell_type[sheet][key] when :float @cell[sheet][key] = v.to_f when :string - @cell[sheet][key] = str_v + @cell[sheet][key] = v when :date @cell[sheet][key] = v when :datetime @cell[sheet][key] = DateTime.new(v.year,v.month,v.day,v.hour,v.min,v.sec) when :percentage @@ -326,66 +348,93 @@ if @cells_read[sheet] raise "sheet #{sheet} already read" end worksheet = @workbook.worksheet(sheet_no(sheet)) - skip = 0 - x =1 - y=1 - i=0 - worksheet.each(skip) { |row_par| - if row_par - x =1 - row_par.each do # |void| - cell = row_par.at(x-1) - if cell - case cell.type - when :numeric - vt = :float - v = cell.to_f - when :text - vt = :string - str_v = cell.to_s('utf-8') - when :date - if cell.to_s.to_f < 1.0 - vt = :time - f = cell.to_s.to_f*24.0*60.0*60.0 - secs = f.round - h = (secs / 3600.0).floor - secs = secs - 3600*h - m = (secs / 60.0).floor - secs = secs - 60*m - s = secs - v = h*3600+m*60+s - else - if cell.datetime.hour != 0 or - cell.datetime.min != 0 or - cell.datetime.sec != 0 or - cell.datetime.msec != 0 - vt = :datetime - v = cell.datetime - else - vt = :date - v = cell.date - v = sprintf("%04d-%02d-%02d",v.year,v.month,v.day) - end - end - else - vt = cell.type.to_s.downcase.to_sym - v = nil - end # case - formula = tr = nil #TODO:??? - set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v) - end # if cell - - x += 1 + row_index=1 + worksheet.each(0) do |row| + (0..row.size).each do |cell_index| + cell = row.at(cell_index) + next if cell.nil? #skip empty cells + next if cell.class == Spreadsheet::Formula + if date_or_time?(row, cell_index) + vt, v = read_cell_date_or_time(row, cell_index) + else + vt, v = read_cell(row, cell_index) end - end - y += 1 - } + formula = tr = nil #TODO:??? + col_index = cell_index + 1 + font = row.format(cell_index).font + font.extend(ExcelFontExtensions) + set_cell_values(sheet,row_index,col_index,0,v,vt,formula,tr,font) + end #row + row_index += 1 + end # worksheet @cells_read[sheet] = true end - + + # Test the cell to see if it's a valid date/time. + def date_or_time?(row, idx) + format = row.format(idx) + if format.date_or_time? + cell = row.at(idx) + true if Float(cell) > 0 rescue false + else + false + end + end + private :date_or_time? + + # Read the date-time cell and convert to, + # the date-time values for Roo + def read_cell_date_or_time(row, idx) + cell = row.at(idx).to_s.to_f + if cell < 1.0 + value_type = :time + f = cell*24.0*60.0*60.0 + secs = f.round + h = (secs / 3600.0).floor + secs = secs - 3600*h + m = (secs / 60.0).floor + secs = secs - 60*m + s = secs + value = h*3600+m*60+s + else + datetime = row.datetime(idx) + if datetime.hour != 0 or + datetime.min != 0 or + datetime.sec != 0 + value_type = :datetime + value = datetime + else + value_type = :date + value = row.date(idx) + value = sprintf("%04d-%02d-%02d",value.year,value.month,value.day) + end + end + return value_type, value + end + private :read_cell_date_or_time + + # Read the cell and based on the class, + # return the values for Roo + def read_cell(row, idx) + cell = row.at(idx) + case cell + when Float, Integer, Fixnum, Bignum + value_type = :float + value = cell.to_f + when String, TrueClass, FalseClass + value_type = :string + value = cell.to_s + else + value_type = cell.class.to_s.downcase.to_sym + value = nil + end # case + return value_type, value + end + private :read_cell + #TODO: testing only # def inject_null_characters(str) # if str.class != String # return str # end