lib/roo/excel.rb in roo-1.2.3 vs lib/roo/excel.rb in roo-1.3.5
- old
+ new
@@ -1,18 +1,35 @@
-require 'rubygems'
-gem 'parseexcel', '>= 0.5.2'
-require 'parseexcel'
-CHARGUESS = false
-require 'charguess' if CHARGUESS
+require 'spreadsheet'
+CHARGUESS = begin
+ require 'charguess'
+ true
+rescue LoadError => e
+ false
+end
-module Spreadsheet # :nodoc
- module ParseExcel
- class Worksheet
- include Enumerable
- attr_reader :min_row, :max_row, :min_col, :max_col
- end
+# ruby-spreadsheet has a font object so we're extending it
+# with our own functionality but still providing full access
+# to the user for other font information
+module ExcelFontExtensions
+ def bold?(*args)
+ #From ruby-spreadsheet doc: 100 <= weight <= 1000, bold => 700, normal => 400
+ case weight
+ when 700
+ true
+ else
+ false
+ end
end
+
+ def italic?
+ italic
+ end
+
+ def underline?
+ underline != :none
+ end
+
end
# Class for handling Excel-Spreadsheets
class Excel < GenericSpreadsheet
@@ -35,11 +52,11 @@
file_type_check(filename,'.xls','an Excel')
@filename = filename
unless File.file?(@filename)
raise IOError, "file #{@filename} does not exist"
end
- @workbook = Spreadsheet::ParseExcel.parse(filename)
+ @workbook = Spreadsheet.open(filename)
@default_sheet = nil
# no need to set default_sheet if there is only one sheet in the document
if self.sheets.size == 1
@default_sheet = self.sheets.first
end
@@ -55,29 +72,18 @@
@last_row = Hash.new
@first_column = Hash.new
@last_column = Hash.new
@header_line = 1
@cells_read = Hash.new
+ @fonts = Hash.new
end
# returns an array of sheet names in the spreadsheet
def sheets
result = []
- #0.upto(@workbook.worksheets.size - 1) do |i| # spreadsheet
- 0.upto(@workbook.sheet_count - 1) do |i| # parseexcel
- # TODO: is there a better way to do conversion?
- if CHARGUESS
- encoding = CharGuess::guess(@workbook.worksheet(i).name)
- encoding = 'unicode' unless encoding
-
-
- result << Iconv.new('utf-8',encoding).iconv(
- @workbook.worksheet(i).name
- )
- else
- result << platform_specific_iconv(@workbook.worksheet(i).name)
- end
+ @workbook.worksheets.each do |worksheet|
+ result << normalize_string(worksheet.name)
end
return result
end
# returns the content of a cell. The upper left corner is (1,1) or ('A',1)
@@ -167,10 +173,18 @@
# returns NO formulas in excel spreadsheets
def formulas(sheet=nil)
raise EXCEL_NO_FORMULAS
end
+ # Given a cell, return the cell's font
+ def font(row, col, sheet=nil)
+ sheet = @default_sheet unless sheet
+ read_cells(sheet) unless @cells_read[sheet]
+ row,col = normalize(row,col)
+ @fonts[sheet][[row,col]]
+ end
+
# shows the internal representation of all cells
# mainly for debugging purposes
def to_s(sheet=nil)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
@@ -216,18 +230,14 @@
end
# converts name of a sheet to index (0,1,2,..)
def sheet_no(name)
return name-1 if name.kind_of?(Fixnum)
- 0.upto(@workbook.sheet_count - 1) do |i|
- #0.upto(@workbook.worksheets.size - 1) do |i|
- # TODO: is there a better way to do conversion?
- return i if name == platform_specific_iconv(
- @workbook.worksheet(i).name)
- #Iconv.new('utf-8','unicode').iconv(
- # @workbook.worksheet(i).name
- # )
+ i = 0
+ @workbook.worksheets.each do |worksheet|
+ return i if name == normalize_string(worksheet.name)
+ i += 1
end
raise StandardError, "sheet '#{name}' not found"
end
def empty_row?(row)
@@ -247,11 +257,20 @@
content = true
end
}
! content
end
-
+
+ def normalize_string(value)
+ value = every_second_null?(value) ? remove_every_second_null(value) : value
+ if CHARGUESS && encoding = CharGuess::guess(value)
+ Iconv.new('utf-8', encoding)
+ else
+ platform_specific_iconv(value)
+ end
+ end
+
def platform_specific_iconv(value)
case RUBY_PLATFORM.downcase
when /darwin/
result = Iconv.new('utf-8','utf-8').iconv(value)
when /solaris/
@@ -289,23 +308,26 @@
end
result
end
# helper function to set the internal representation of cells
- def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v)
+ def set_cell_values(sheet,row,col,i,v,vt,formula,tr,font)
#key = "#{y},#{x+i}"
- key = [y,x+i]
+ key = [row,col+i]
@cell_type[sheet] = {} unless @cell_type[sheet]
@cell_type[sheet][key] = vt
@formula[sheet] = {} unless @formula[sheet]
@formula[sheet][key] = formula if formula
@cell[sheet] = {} unless @cell[sheet]
+ @fonts[sheet] = {} unless @fonts[sheet]
+ @fonts[sheet][key] = font
+
case vt # @cell_type[sheet][key]
when :float
@cell[sheet][key] = v.to_f
when :string
- @cell[sheet][key] = str_v
+ @cell[sheet][key] = v
when :date
@cell[sheet][key] = v
when :datetime
@cell[sheet][key] = DateTime.new(v.year,v.month,v.day,v.hour,v.min,v.sec)
when :percentage
@@ -326,66 +348,93 @@
if @cells_read[sheet]
raise "sheet #{sheet} already read"
end
worksheet = @workbook.worksheet(sheet_no(sheet))
- skip = 0
- x =1
- y=1
- i=0
- worksheet.each(skip) { |row_par|
- if row_par
- x =1
- row_par.each do # |void|
- cell = row_par.at(x-1)
- if cell
- case cell.type
- when :numeric
- vt = :float
- v = cell.to_f
- when :text
- vt = :string
- str_v = cell.to_s('utf-8')
- when :date
- if cell.to_s.to_f < 1.0
- vt = :time
- f = cell.to_s.to_f*24.0*60.0*60.0
- secs = f.round
- h = (secs / 3600.0).floor
- secs = secs - 3600*h
- m = (secs / 60.0).floor
- secs = secs - 60*m
- s = secs
- v = h*3600+m*60+s
- else
- if cell.datetime.hour != 0 or
- cell.datetime.min != 0 or
- cell.datetime.sec != 0 or
- cell.datetime.msec != 0
- vt = :datetime
- v = cell.datetime
- else
- vt = :date
- v = cell.date
- v = sprintf("%04d-%02d-%02d",v.year,v.month,v.day)
- end
- end
- else
- vt = cell.type.to_s.downcase.to_sym
- v = nil
- end # case
- formula = tr = nil #TODO:???
- set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v)
- end # if cell
-
- x += 1
+ row_index=1
+ worksheet.each(0) do |row|
+ (0..row.size).each do |cell_index|
+ cell = row.at(cell_index)
+ next if cell.nil? #skip empty cells
+ next if cell.class == Spreadsheet::Formula
+ if date_or_time?(row, cell_index)
+ vt, v = read_cell_date_or_time(row, cell_index)
+ else
+ vt, v = read_cell(row, cell_index)
end
- end
- y += 1
- }
+ formula = tr = nil #TODO:???
+ col_index = cell_index + 1
+ font = row.format(cell_index).font
+ font.extend(ExcelFontExtensions)
+ set_cell_values(sheet,row_index,col_index,0,v,vt,formula,tr,font)
+ end #row
+ row_index += 1
+ end # worksheet
@cells_read[sheet] = true
end
-
+
+ # Test the cell to see if it's a valid date/time.
+ def date_or_time?(row, idx)
+ format = row.format(idx)
+ if format.date_or_time?
+ cell = row.at(idx)
+ true if Float(cell) > 0 rescue false
+ else
+ false
+ end
+ end
+ private :date_or_time?
+
+ # Read the date-time cell and convert to,
+ # the date-time values for Roo
+ def read_cell_date_or_time(row, idx)
+ cell = row.at(idx).to_s.to_f
+ if cell < 1.0
+ value_type = :time
+ f = cell*24.0*60.0*60.0
+ secs = f.round
+ h = (secs / 3600.0).floor
+ secs = secs - 3600*h
+ m = (secs / 60.0).floor
+ secs = secs - 60*m
+ s = secs
+ value = h*3600+m*60+s
+ else
+ datetime = row.datetime(idx)
+ if datetime.hour != 0 or
+ datetime.min != 0 or
+ datetime.sec != 0
+ value_type = :datetime
+ value = datetime
+ else
+ value_type = :date
+ value = row.date(idx)
+ value = sprintf("%04d-%02d-%02d",value.year,value.month,value.day)
+ end
+ end
+ return value_type, value
+ end
+ private :read_cell_date_or_time
+
+ # Read the cell and based on the class,
+ # return the values for Roo
+ def read_cell(row, idx)
+ cell = row.at(idx)
+ case cell
+ when Float, Integer, Fixnum, Bignum
+ value_type = :float
+ value = cell.to_f
+ when String, TrueClass, FalseClass
+ value_type = :string
+ value = cell.to_s
+ else
+ value_type = cell.class.to_s.downcase.to_sym
+ value = nil
+ end # case
+ return value_type, value
+ end
+ private :read_cell
+
#TODO: testing only
# def inject_null_characters(str)
# if str.class != String
# return str
# end