lib/roo/excelx.rb in roo-1.2.3 vs lib/roo/excelx.rb in roo-1.3.5
- old
+ new
@@ -1,8 +1,6 @@
-
-require 'rubygems'
-require 'rexml/document'
+require 'xml'
require 'fileutils'
require 'zip/zipfilesystem'
require 'date'
class String
@@ -100,30 +98,31 @@
end
@@nr += 1
@file_nr = @@nr
extract_content(@filename)
file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
- @workbook_doc = REXML::Document.new file
+ @workbook_doc = XML::Parser.io(file).parse
file.close
@shared_table = []
if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
- @sharedstring_doc = REXML::Document.new file
+ @sharedstring_doc = XML::Parser.io(file).parse
file.close
read_shared_strings(@sharedstring_doc)
end
@styles_table = []
+ @style_definitions = Array.new { |h,k| h[k] = {} }
if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
- @styles_doc = REXML::Document.new file
+ @styles_doc = XML::Parser.io(file).parse
file.close
read_styles(@styles_doc)
end
@sheet_doc = []
@sheet_files.each_with_index do |item, i|
file = File.new(item)
- @sheet_doc[i] = REXML::Document.new file
+ @sheet_doc[i] = XML::Parser.io(file).parse
file.close
end
ensure
#if ENV["roo_local"] != "thomas-p"
FileUtils::rm_r(@tmpdir)
@@ -161,11 +160,10 @@
elsif celltype(row,col,sheet) == :datetime
date_part,time_part = @cell[sheet][[row,col]].split(' ')
yyyy,mm,dd = date_part.split('-')
hh,mi,ss = time_part.split(':')
return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
-
end
@cell[sheet][[row,col]]
end
# Returns the formula at (row,col).
@@ -187,10 +185,37 @@
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
row,col = normalize(row,col)
formula(row,col) != nil
end
+
+ class Font
+ attr_accessor :bold, :italic, :underline
+
+ def bold?
+ @bold == true
+ end
+
+ def italic?
+ @italic == true
+ end
+
+ def underline?
+ @underline == true
+ end
+ end
+
+ # Given a cell, return the cell's style
+ def font(row, col, sheet=nil)
+ sheet = @default_sheet unless sheet
+ read_cells(sheet) unless @cells_read[sheet]
+ row,col = normalize(row,col)
+ s_attribute = @s_attribute[sheet][[row,col]]
+ s_attribute ||= 0
+ s_attribute = s_attribute.to_i
+ @style_definitions[s_attribute]
+ end
# set a cell to a certain value
# (this will not be saved back to the spreadsheet file!)
def set(row,col,value,sheet=nil) #:nodoc:
sheet = @default_sheet unless sheet
@@ -258,22 +283,15 @@
end
# returns an array of sheet names in the spreadsheet
def sheets
return_sheets = []
- @workbook_doc.each_element do |workbook|
- workbook.each_element do |el|
- if el.name == "sheets"
- el.each_element do |sheet|
- return_sheets << sheet.attributes['name']
- end
- end
- end
+ @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
+ return_sheets << sheet.attributes.to_h['name']
end
return_sheets
end
-
# shows the internal representation of all cells
# for debugging purposes
def to_s(sheet=nil)
sheet = @default_sheet unless sheet
read_cells(sheet) unless @cells_read[sheet]
@@ -374,86 +392,66 @@
sheet = @default_sheet unless sheet
sheet_found = false
raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
raise RangeError unless self.sheets.include? sheet
n = self.sheets.index(sheet)
- @sheet_doc[n].each_element do |worksheet|
- worksheet.each_element do |elem|
- if elem.name == 'sheetData'
- elem.each_element do |sheetdata|
- if sheetdata.name == 'row'
- sheetdata.each_element do |row|
- if row.name == 'c'
- if row.attributes['t'] == 's'
- tmp_type = :shared
- else
- s_attribute = row.attributes['s']
- format = attribute2format(s_attribute)
- tmp_type = format2type(format)
- end
- formula = nil
- row.each_element do |cell|
-# puts "cell.name: #{cell.name}" if cell.text.include? "22606.5120"
-# puts "cell.text: #{cell.text}" if cell.text.include? "22606.5120"
- if cell.name == 'f'
- formula = cell.text
- end
- if cell.name == 'v'
- #puts "tmp_type: #{tmp_type}" if cell.text.include? "22606.5120"
- #puts cell.name
- if tmp_type == :time or tmp_type == :datetime #2008-07-26
- #p cell.text
- # p cell.text.to_f if cell.text.include? "22606.5120"
- if cell.text.to_f >= 1.0 # 2008-07-26
- # puts ">= 1.0" if cell.text.include? "22606.5120"
- # puts "cell.text.to_f: #{cell.text.to_f}" if cell.text.include? "22606.5120"
- #puts "cell.text.to_f.floor: #{cell.text.to_f.floor}" if cell.text.include? "22606.5120"
- if (cell.text.to_f - cell.text.to_f.floor).abs > 0.000001 #TODO:
- # puts "abs ist groesser" if cell.text.include? "22606.5120"
- # @cell[sheet][key] = DateTime.parse(tr.attributes['date-value'])
- tmp_type = :datetime
-
- else
- #puts ":date"
- tmp_type = :date # 2008-07-26
- end
- else
- #puts "<1.0"
- end # 2008-07-26
- end # 2008-07-26
- excelx_type = [:numeric_or_formula,format]
- excelx_value = cell.text
- if tmp_type == :shared
- vt = :string
- str_v = @shared_table[cell.text.to_i]
- excelx_type = :string
- elsif tmp_type == :date
- vt = :date
- v = cell.text
- elsif tmp_type == :time
- vt = :time
- v = cell.text
- elsif tmp_type == :datetime
- vt = :datetime
- v = cell.text
- elsif tmp_type == :formula
- vt = :formula
- v = cell.text.to_f #TODO: !!!!
- else
- vt = :float
- v = cell.text
- end
- #puts "vt: #{vt}" if cell.text.include? "22606.5120"
- x,y = split_coordinate(row.attributes['r'])
- tr=nil #TODO: ???s
- set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
- end
- end
- end
+ @sheet_doc[n].find("//*[local-name()='c']").each do |c|
+ s_attribute = c.attributes.to_h['s'].to_i # should be here
+ if (c.attributes.to_h['t'] == 's')
+ tmp_type = :shared
+ elsif (c.attributes.to_h['t'] == 'b')
+ tmp_type = :boolean
+ else
+ # s_attribute = c.attributes.to_h['s'].to_i # was here
+ format = attribute2format(s_attribute)
+ tmp_type = format2type(format)
+ end
+ formula = nil
+ c.each_element do |cell|
+ if cell.name == 'f'
+ formula = cell.content
+ end
+ if cell.name == 'v'
+ if tmp_type == :time or tmp_type == :datetime
+ if cell.content.to_f >= 1.0
+ if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
+ tmp_type = :datetime
+ else
+ tmp_type = :date
end
- end
+ else
+ end
end
+ excelx_type = [:numeric_or_formula,format]
+ excelx_value = cell.content
+ if tmp_type == :shared
+ vt = :string
+ str_v = @shared_table[cell.content.to_i]
+ excelx_type = :string
+ elsif tmp_type == :boolean
+ vt = :boolean
+ cell.content.to_i == 1 ? v = 'TRUE' : v = 'FALSE'
+ elsif tmp_type == :date
+ vt = :date
+ v = cell.content
+ elsif tmp_type == :time
+ vt = :time
+ v = cell.content
+ elsif tmp_type == :datetime
+ vt = :datetime
+ v = cell.content
+ elsif tmp_type == :formula
+ vt = :formula
+ v = cell.content.to_f #TODO: !!!!
+ else
+ vt = :float
+ v = cell.content
+ end
+ #puts "vt: #{vt}" if cell.text.include? "22606.5120"
+ x,y = split_coordinate(c.attributes.to_h['r'])
+ tr=nil #TODO: ???s
+ set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
end
end
end
sheet_found = true #TODO:
if !sheet_found
@@ -465,21 +463,13 @@
# Checks if the default_sheet exists. If not an RangeError exception is
# raised
def check_default_sheet
sheet_found = false
raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
- @workbook_doc.each_element do |workbook|
- workbook.each_element do |el|
- if el.name == "sheets"
- el.each_element do |sheet|
- if @default_sheet == sheet.attributes['name']
- sheet_found = true
- end
- end
- end
- end
- end
+
+ sheet_found = true if sheets.include?(@default_sheet)
+
if ! sheet_found
raise RangeError, "sheet '#{@default_sheet}' not found"
end
end
@@ -535,49 +525,64 @@
@cell_type[sheet][[row,col]] = type
end
# read the shared strings xml document
def read_shared_strings(doc)
- doc.each_element do |sst|
- if sst.name == 'sst'
- sst.each_element do |si|
- if si.name == 'si'
- si.each_element do |elem|
- if elem.name == 't'
- @shared_table << elem.text
- end
+ doc.find("//*[local-name()='si']").each do |si|
+ shared_table_entry = ''
+ si.each_element do |elem|
+ if (elem.name == 'r')
+ elem.each_element do |r_elem|
+ if (r_elem.name == 't')
+ shared_table_entry << r_elem.content
end
end
end
+ if (elem.name == 't')
+ shared_table_entry = elem.content
+ end
end
+ @shared_table << shared_table_entry
end
end
# read the styles elements of an excelx document
def read_styles(doc)
@numFmts = []
@cellXfs = []
- doc.each_element do |e1|
- if e1.name == "styleSheet"
- e1.each_element do |e2|
- if e2.name == "numFmts"
- e2.each_element do |e3|
- if e3.name == 'numFmt'
- numFmtId = e3.attributes['numFmtId']
- formatCode = e3.attributes['formatCode']
- @numFmts << [numFmtId, formatCode]
+ fonts = []
+
+ doc.find("//*[local-name()='numFmt']").each do |numFmt|
+ numFmtId = numFmt.attributes.to_h['numFmtId']
+ formatCode = numFmt.attributes.to_h['formatCode']
+ @numFmts << [numFmtId, formatCode]
+ end
+ doc.find("//*[local-name()='fonts']").each do |fonts_el|
+ fonts_el.each_element do |font_el|
+ if font_el.name == 'font'
+ font = Excelx::Font.new
+ font_el.each_element do |font_sub_el|
+ case font_sub_el.name
+ when 'b'
+ font.bold = true
+ when 'i'
+ font.italic = true
+ when 'u'
+ font.underline = true
end
- end
- elsif e2.name == "cellXfs"
- e2.each_element do |e3|
- if e3.name == 'xf'
- numFmtId = e3.attributes['numFmtId']
- @cellXfs << [numFmtId]
- end
- end
end
+ fonts << font
end
end
+ end
+
+ doc.find("//*[local-name()='cellXfs']").each do |xfs|
+ xfs.each do |xf|
+ numFmtId = xf.attributes.to_h['numFmtId']
+ @cellXfs << [numFmtId]
+ fontId = xf.attributes.to_h['fontId'].to_i
+ @style_definitions << fonts[fontId]
+ end
end
end
# convert internal excelx attribute to a format
def attribute2format(s)