require 'fileutils'
require 'zip/zipfilesystem'
require 'date'
require 'rubygems'
require 'nokogiri'
if RUBY_VERSION < '1.9.0'
class String
def end_with?(str)
self[-str.length,str.length] == str
end
end
end
class Excelx < GenericSpreadsheet
FORMATS = {
'General' => :float,
'0' => :float,
'0.00' => :float,
'#,##0' => :float,
'#,##0.00' => :float,
'0%' => :percentage,
'0.00%' => :percentage,
'0.00E+00' => :float,
'# ?/?' => :float, #??? TODO:
'# ??/??' => :float, #??? TODO:
'mm-dd-yy' => :date,
'd-mmm-yy' => :date,
'd-mmm' => :date,
'mmm-yy' => :date,
'h:mm AM/PM' => :date,
'h:mm:ss AM/PM' => :date,
'h:mm' => :time,
'h:mm:ss' => :time,
'm/d/yy h:mm' => :date,
'#,##0 ;(#,##0)' => :float,
'#,##0 ;[Red](#,##0)' => :float,
'#,##0.00;(#,##0.00)' => :float,
'#,##0.00;[Red](#,##0.00)' => :float,
'mm:ss' => :time,
'[h]:mm:ss' => :time,
'mmss.0' => :time,
'##0.0E+0' => :float,
'@' => :float,
#-- zusaetzliche Formate, die nicht standardmaessig definiert sind:
"yyyy\\-mm\\-dd" => :date,
'dd/mm/yy' => :date,
'hh:mm:ss' => :time,
"dd/mm/yy\\ hh:mm" => :datetime,
'dd/mmm/yy' => :date, # 2011-05-21
'yyyy-mm-dd' => :date, # 2011-09-16
# was used in a spreadsheet file from a windows phone
}
STANDARD_FORMATS = {
0 => 'General',
1 => '0',
2 => '0.00',
3 => '#,##0',
4 => '#,##0.00',
9 => '0%',
10 => '0.00%',
11 => '0.00E+00',
12 => '# ?/?',
13 => '# ??/??',
14 => 'mm-dd-yy',
15 => 'd-mmm-yy',
16 => 'd-mmm',
17 => 'mmm-yy',
18 => 'h:mm AM/PM',
19 => 'h:mm:ss AM/PM',
20 => 'h:mm',
21 => 'h:mm:ss',
22 => 'm/d/yy h:mm',
37 => '#,##0 ;(#,##0)',
38 => '#,##0 ;[Red](#,##0)',
39 => '#,##0.00;(#,##0.00)',
40 => '#,##0.00;[Red](#,##0.00)',
45 => 'mm:ss',
46 => '[h]:mm:ss',
47 => 'mmss.0',
48 => '##0.0E+0',
49 => '@',
}
@@nr = 0
# initialization and opening of a spreadsheet file
# values for packed: :zip
def initialize(filename, packed=nil, file_warning = :error) #, create = false)
super()
@file_warning = file_warning
file_type_check(filename,'.xlsx','an Excel-xlsx',packed)
@tmpdir = GenericSpreadsheet.next_tmpdir
@tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP']
unless File.exists?(@tmpdir)
FileUtils::mkdir(@tmpdir)
end
filename = open_from_uri(filename) if filename[0,7] == "http://"
filename = unzip(filename) if packed and packed == :zip
@cells_read = Hash.new
@filename = filename
unless File.file?(@filename)
FileUtils::rm_r(@tmpdir)
raise IOError, "file #{@filename} does not exist"
end
@@nr += 1
@file_nr = @@nr
@comments_files = Array.new
extract_content(@filename)
file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
@workbook_doc = Nokogiri::XML(file)
file.close
@shared_table = []
if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
@sharedstring_doc = Nokogiri::XML(file)
file.close
read_shared_strings(@sharedstring_doc)
end
@styles_table = []
@style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} }
if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
@styles_doc = Nokogiri::XML(file)
file.close
read_styles(@styles_doc)
end
@sheet_doc = []
@sheet_files.each_with_index do |item, i|
file = File.new(item)
@sheet_doc[i] = Nokogiri::XML(file)
file.close
end
@comments_doc = []
@comments_files.each_with_index do |item, i|
file = File.new(item)
@comments_doc[i] = Nokogiri::XML(file)
file.close
end
FileUtils::rm_r(@tmpdir)
@default_sheet = self.sheets.first
@cell = Hash.new
@cell_type = Hash.new
@formula = Hash.new
@first_row = Hash.new
@last_row = Hash.new
@first_column = Hash.new
@last_column = Hash.new
@header_line = 1
@excelx_type = Hash.new
@excelx_value = Hash.new
@s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
@label = Hash.new
@labels_read = false
@comment = Hash.new
@comments_read = Hash.new
end
def method_missing(m,*args)
# is method name a label name
read_labels unless @labels_read
if @label.has_key?(m.to_s)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
row,col = label(m.to_s)
cell(row,col)
else
# call super for methods like #a1
super
end
end
# Returns the content of a spreadsheet-cell.
# (1,1) is the upper left corner.
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
# cell at the first line and first row.
def cell(row, col, sheet=nil)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
row,col = normalize(row,col)
if celltype(row,col,sheet) == :date
yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
elsif celltype(row,col,sheet) == :datetime
date_part,time_part = @cell[sheet][[row,col]].split(' ')
yyyy,mm,dd = date_part.split('-')
hh,mi,ss = time_part.split(':')
return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
end
@cell[sheet][[row,col]]
end
# Returns the formula at (row,col).
# Returns nil if there is no formula.
# The method #formula? checks if there is a formula.
def formula(row,col,sheet=nil)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
row,col = normalize(row,col)
if @formula[sheet][[row,col]] == nil
return nil
else
return @formula[sheet][[row,col]]
end
end
# true, if there is a formula
def formula?(row,col,sheet=nil)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
row,col = normalize(row,col)
formula(row,col) != nil
end
# returns each formula in the selected sheet as an array of elements
# [row, col, formula]
def formulas(sheet=nil)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
if @formula[sheet]
@formula[sheet].each.collect do |elem|
[elem[0][0], elem[0][1], elem[1]]
end
else
[]
end
end
class Font
attr_accessor :bold, :italic, :underline
def bold?
@bold == true
end
def italic?
@italic == true
end
def underline?
@underline == true
end
end
# Given a cell, return the cell's style
def font(row, col, sheet=nil)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
row,col = normalize(row,col)
s_attribute = @s_attribute[sheet][[row,col]]
s_attribute ||= 0
s_attribute = s_attribute.to_i
@style_definitions[s_attribute]
end
# set a cell to a certain value
# (this will not be saved back to the spreadsheet file!)
def set(row,col,value,sheet=nil) #:nodoc:
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
row,col = normalize(row,col)
set_value(row,col,value,sheet)
if value.class == Fixnum
set_type(row,col,:float,sheet)
elsif value.class == String
set_type(row,col,:string,sheet)
elsif value.class == Float
set_type(row,col,:string,sheet)
else
raise ArgumentError, "Type for "+value.to_s+" not set"
end
end
# returns the type of a cell:
# * :float
# * :string,
# * :date
# * :percentage
# * :formula
# * :time
# * :datetime
def celltype(row,col,sheet=nil)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
row,col = normalize(row,col)
if @formula[sheet][[row,col]]
return :formula
else
@cell_type[sheet][[row,col]]
end
end
# returns the internal type of an excel cell
# * :numeric_or_formula
# * :string
# Note: this is only available within the Excelx class
def excelx_type(row,col,sheet=nil)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
row,col = normalize(row,col)
return @excelx_type[sheet][[row,col]]
end
# returns the internal value of an excelx cell
# Note: this is only available within the Excelx class
def excelx_value(row,col,sheet=nil)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
row,col = normalize(row,col)
return @excelx_value[sheet][[row,col]]
end
# returns the internal format of an excel cell
def excelx_format(row,col,sheet=nil)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
row,col = normalize(row,col)
s = @s_attribute[sheet][[row,col]]
result = attribute2format(s).to_s
result
end
# returns an array of sheet names in the spreadsheet
def sheets
return_sheets = []
@workbook_doc.xpath("//*[local-name()='sheet']").each do |sheet|
return_sheets << sheet['name']
end
return_sheets
end
# shows the internal representation of all cells
# for debugging purposes
def to_s(sheet=nil)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
@cell[sheet].inspect
end
# returns the row,col values of the labelled cell
# (nil,nil) if label is not defined
def label(labelname)
read_labels unless @labels_read
unless @label.size > 0
return nil,nil,nil
end
if @label.has_key? labelname
return @label[labelname][1].to_i,
GenericSpreadsheet.letter_to_number(@label[labelname][2]),
@label[labelname][0]
else
return nil,nil,nil
end
end
# Returns an array which all labels. Each element is an array with
# [labelname, [sheetname,row,col]]
def labels
# sheet = @default_sheet unless sheet
# read_cells(sheet) unless @cells_read[sheet]
read_labels unless @labels_read
result = []
@label.each do |label|
result << [ label[0], # name
[ label[1][1].to_i, # row
GenericSpreadsheet.letter_to_number(label[1][2]), # column
label[1][0], # sheet
] ]
end
result
end
# returns the comment at (row/col)
# nil if there is no comment
def comment(row,col,sheet=nil)
sheet = @default_sheet unless sheet
#read_cells(sheet) unless @cells_read[sheet]
read_comments(sheet) unless @comments_read[sheet]
row,col = normalize(row,col)
return nil unless @comment[sheet]
@comment[sheet][[row,col]]
end
# true, if there is a comment
def comment?(row,col,sheet=nil)
sheet = @default_sheet unless sheet
# read_cells(sheet) unless @cells_read[sheet]
read_comments(sheet) unless @comments_read[sheet]
row,col = normalize(row,col)
comment(row,col) != nil
end
# returns each comment in the selected sheet as an array of elements
# [row, col, comment]
def comments(sheet=nil)
sheet = @default_sheet unless sheet
read_comments(sheet) unless @comments_read[sheet]
if @comment[sheet]
@comment[sheet].each.collect do |elem|
[elem[0][0],elem[0][1],elem[1]]
end
else
[]
end
end
private
# helper function to set the internal representation of cells
def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,
excelx_type=nil,
excelx_value=nil,
s_attribute=nil)
key = [y,x+i]
@cell_type[sheet] = {} unless @cell_type[sheet]
@cell_type[sheet][key] = vt
@formula[sheet] = {} unless @formula[sheet]
@formula[sheet][key] = formula if formula
@cell[sheet] = {} unless @cell[sheet]
case @cell_type[sheet][key]
when :float
@cell[sheet][key] = v.to_f
when :string
@cell[sheet][key] = str_v
when :date
@cell[sheet][key] = (Date.new(1899,12,30)+v.to_i).strftime("%Y-%m-%d")
when :datetime
@cell[sheet][key] = (DateTime.new(1899,12,30)+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
when :percentage
@cell[sheet][key] = v.to_f
when :time
@cell[sheet][key] = v.to_f*(24*60*60)
else
@cell[sheet][key] = v
end
@excelx_type[sheet] = {} unless @excelx_type[sheet]
@excelx_type[sheet][key] = excelx_type
@excelx_value[sheet] = {} unless @excelx_value[sheet]
@excelx_value[sheet][key] = excelx_value
@s_attribute[sheet] = {} unless @s_attribute[sheet]
@s_attribute[sheet][key] = s_attribute
end
def format2type(format)
format = format.to_s # weil von Typ Nokogiri::XML::Attr
if FORMATS.has_key? format
FORMATS[format]
else
:float
end
end
# read all cells in the selected sheet
def read_cells(sheet=nil)
sheet = @default_sheet unless sheet
sheet_found = false
raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
raise RangeError unless self.sheets.include? sheet
n = self.sheets.index(sheet)
@sheet_doc[n].xpath("//*[local-name()='c']").each do |c|
s_attribute = c['s'].to_i # should be here
# c:
# 22606
# , format: , tmp_type: float
if c['t'] == 's'
tmp_type = :shared
elsif c['t'] == 'b'
tmp_type = :boolean
# 2011-02-25 BEGIN
elsif c['t'] == 'str'
tmp_type = :string
# 2011-02-25 END
# 2011-09-15 BEGIN
elsif c['t'] == 'inlineStr'
tmp_type = :inlinestr
# 2011-09-15 END
else
s_attribute = c['s'].to_i
format = attribute2format(s_attribute)
tmp_type = format2type(format)
end
formula = nil
c.children.each do |cell|
# 2011-09-15 BEGIN
if cell.name == 'is'
cell.children.each do |is|
if is.name == 't'
inlinestr_content = is.content
vt = :string
str_v = inlinestr_content
excelx_type = :string
y, x = GenericSpreadsheet.split_coordinate(c['r'])
v = nil
tr=nil #TODO: ???s
excelx_value = inlinestr_content #cell.content
set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
end
end
end
# 2011-09-15 END
if cell.name == 'f'
formula = cell.content
end
if cell.name == 'v'
if tmp_type == :time or tmp_type == :datetime
if cell.content.to_f >= 1.0
if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
tmp_type = :datetime
else
tmp_type = :date
end
else
end
end
excelx_type = [:numeric_or_formula,format.to_s]
excelx_value = cell.content
if tmp_type == :shared
vt = :string
str_v = @shared_table[cell.content.to_i]
excelx_type = :string
elsif tmp_type == :boolean
vt = :boolean
cell.content.to_i == 1 ? v = 'TRUE' : v = 'FALSE'
elsif tmp_type == :date
vt = :date
v = cell.content
elsif tmp_type == :time
vt = :time
v = cell.content
elsif tmp_type == :datetime
vt = :datetime
v = cell.content
elsif tmp_type == :formula
vt = :formula
v = cell.content.to_f #TODO: !!!!
# 2011-02-25 BEGIN
elsif tmp_type == :string
vt = :string
str_v = cell.content
excelx_type = :string
# 2011-02-25 END
else
vt = :float
v = cell.content
end
y, x = GenericSpreadsheet.split_coordinate(c['r'])
tr=nil #TODO: ???s
set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
end
end
end
sheet_found = true #TODO:
if !sheet_found
raise RangeError
end
@cells_read[sheet] = true
# begin comments
=begin
Datei xl/comments1.xml
Kommentar fuer B4
Kommentar fuer B5
=end
=begin
if @comments_doc[self.sheets.index(sheet)]
read_comments(sheet)
end
=end
#end comments
end
# Reads all comments from a sheet
def read_comments(sheet=nil)
sheet = @default_sheet unless sheet
#sheet_found = false
raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
raise RangeError unless self.sheets.include? sheet
n = self.sheets.index(sheet)
return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
@comments_doc[n].xpath("//*[local-name()='comments']").each do |comment|
comment.children.each do |cc|
if cc.name == 'commentList'
cc.children.each do |commentlist|
if commentlist.name == 'comment'
ref = commentlist.attributes['ref'].to_s
row,col = GenericSpreadsheet.split_coordinate(ref)
commentlist.children.each do |clc|
if clc.name == 'text'
clc.children.each do |text|
if text.name == 'r'
text.children.each do |r|
if r.name == 't'
comment = r.text
@comment[sheet] = Hash.new unless @comment[sheet]
@comment[sheet][[row,col]] = comment
end
end
end
end
end
end
end
end
end
end
end
@comments_read[sheet] = true
end
def read_labels
@workbook_doc.xpath("//*[local-name()='definedName']").each do |defined_name|
# "Sheet1!$C$5"
sheet = defined_name.text.split('!').first
coordinates = defined_name.text.split('!')[1]
dummy,col,row = coordinates.split('$')
@label[defined_name['name']] = [sheet,row,col]
end
@labels_read = true
end
# Checks if the default_sheet exists. If not an RangeError exception is
# raised
def check_default_sheet
sheet_found = false
raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
sheet_found = true if sheets.include?(@default_sheet)
if ! sheet_found
raise RangeError, "sheet '#{@default_sheet}' not found"
end
end
# Extracts all needed files from the zip file
def process_zipfile(zipfilename, zip, path='')
@sheet_files = []
Zip::ZipFile.open(zipfilename) {|zf|
zf.entries.each {|entry|
if entry.to_s.end_with?('workbook.xml')
open(@tmpdir+'/'+@file_nr.to_s+'_roo_workbook.xml','wb') {|f|
f << zip.read(entry)
}
end
# if entry.to_s.end_with?('sharedStrings.xml')
# at least one application creates this file with another (incorrect?)
# casing. It doesn't hurt, if we ignore here the correct casing - there
# won't be both names in the archive.
# Changed the casing of all the following filenames.
if entry.to_s.downcase.end_with?('sharedstrings.xml')
open(@tmpdir+'/'+@file_nr.to_s+'_roo_sharedStrings.xml','wb') {|f|
f << zip.read(entry)
}
end
if entry.to_s.downcase.end_with?('styles.xml')
open(@tmpdir+'/'+@file_nr.to_s+'_roo_styles.xml','wb') {|f|
f << zip.read(entry)
}
end
if entry.to_s.downcase =~ /sheet([0-9]+).xml$/
nr = $1
open(@tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}",'wb') {|f|
f << zip.read(entry)
}
@sheet_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}"
end
if entry.to_s.downcase =~ /comments([0-9]+).xml$/
nr = $1
open(@tmpdir+'/'+@file_nr.to_s+"_roo_comments#{nr}",'wb') {|f|
f << zip.read(entry)
}
@comments_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_comments#{nr}"
end
}
}
# return
end
# extract files from the zip file
def extract_content(zipfilename)
Zip::ZipFile.open(@filename) do |zip|
process_zipfile(zipfilename,zip)
end
end
# sets the value of a cell
def set_value(row,col,value,sheet=nil)
sheet = @default_value unless sheet
@cell[sheet][[row,col]] = value
end
# sets the type of a cell
def set_type(row,col,type,sheet=nil)
sheet = @default_value unless sheet
@cell_type[sheet][[row,col]] = type
end
# read the shared strings xml document
def read_shared_strings(doc)
doc.xpath("//*[local-name()='si']").each do |si|
shared_table_entry = ''
si.children.each do |elem|
if elem.name == 'r' and elem.children
elem.children.each do |r_elem|
if r_elem.name == 't'
shared_table_entry << r_elem.content
end
end
end
if elem.name == 't'
shared_table_entry = elem.content
end
end
@shared_table << shared_table_entry
end
end
# read the styles elements of an excelx document
def read_styles(doc)
@numFmts = []
@cellXfs = []
fonts = []
doc.xpath("//*[local-name()='numFmt']").each do |numFmt|
numFmtId = numFmt.attributes['numFmtId']
formatCode = numFmt.attributes['formatCode']
@numFmts << [numFmtId, formatCode]
end
doc.xpath("//*[local-name()='fonts']").each do |fonts_el|
fonts_el.children.each do |font_el|
if font_el == 'font'
font = Excelx::Font.new
font_el.each_element do |font_sub_el|
case font_sub_el.name
when 'b'
font.bold = true
when 'i'
font.italic = true
when 'u'
font.underline = true
end
end
fonts << font
end
end
end
doc.xpath("//*[local-name()='cellXfs']").each do |xfs|
xfs.children.each do |xf|
numFmtId = xf['numFmtId']
@cellXfs << [numFmtId]
fontId = xf['fontId'].to_i
@style_definitions << fonts[fontId]
end
end
end
# convert internal excelx attribute to a format
def attribute2format(s)
result = nil
@numFmts.each {|nf|
# to_s weil das eine Nokogiri::XML::Attr und das
# andere ein String ist
if nf.first.to_s == @cellXfs[s.to_i].first
result = nf[1]
break
end
}
unless result
id = @cellXfs[s.to_i].first.to_i
if STANDARD_FORMATS.has_key? id
result = STANDARD_FORMATS[id]
end
end
result
end
end # class