lib/roo/excelx.rb in roo-1.13.2 vs lib/roo/excelx.rb in roo-2.0.0beta1
- old
+ new
@@ -1,10 +1,20 @@
require 'date'
require 'nokogiri'
-require 'spreadsheet'
+require 'roo/link'
+require 'roo/utils'
+require 'zip/filesystem'
class Roo::Excelx < Roo::Base
+ autoload :Workbook, 'roo/excelx/workbook'
+ autoload :SharedStrings, 'roo/excelx/shared_strings'
+ autoload :Styles, 'roo/excelx/styles'
+
+ autoload :Relationships, 'roo/excelx/relationships'
+ autoload :Comments, 'roo/excelx/comments'
+ autoload :SheetDoc, 'roo/excelx/sheet_doc'
+
module Format
EXCEPTIONAL_FORMATS = {
'h:mm am/pm' => :date,
'h:mm:ss am/pm' => :date,
}
@@ -44,11 +54,11 @@
format = format.to_s.downcase
if type = EXCEPTIONAL_FORMATS[format]
type
elsif format.include?('#')
:float
- elsif format.include?('d') || format.include?('y')
+ elsif !format.match(/d+(?![\]])/).nil? || format.include?('y')
if format.include?('h') || format.include?('s')
:datetime
else
:date
end
@@ -62,144 +72,318 @@
end
module_function :to_type
end
- # initialization and opening of a spreadsheet file
- # values for packed: :zip
- def initialize(filename, options = {}, deprecated_file_warning = :error)
- if Hash === options
- packed = options[:packed]
- file_warning = options[:file_warning] || :error
- else
- warn 'Supplying `packed` or `file_warning` as separate arguments to `Roo::Excelx.new` is deprecated. Use an options hash instead.'
- packed = options
- file_warning = deprecated_file_warning
+ class Cell
+ attr_reader :type, :formula, :value, :excelx_type, :excelx_value, :style, :hyperlink, :coordinate
+ attr_writer :value
+
+ def initialize(value, type, formula, excelx_type, excelx_value, style, hyperlink, base_date, coordinate)
+ @type = type
+ @formula = formula
+ @base_date = base_date if [:date, :datetime].include?(@type)
+ @excelx_type = excelx_type
+ @excelx_value = excelx_value
+ @style = style
+ @value = type_cast_value(value)
+ @value = Roo::Link.new(hyperlink, @value.to_s) if hyperlink
+ @coordinate = coordinate
end
- file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
- make_tmpdir do |tmpdir|
- filename = download_uri(filename, tmpdir) if uri?(filename)
- filename = unzip(filename, tmpdir) if packed == :zip
- @filename = filename
- unless File.file?(@filename)
- raise IOError, "file #{@filename} does not exist"
+ def type
+ if @formula
+ :formula
+ elsif @value.is_a?(Roo::Link)
+ :link
+ else
+ @type
end
- @comments_files = Array.new
- @rels_files = Array.new
- extract_content(tmpdir, @filename)
- @workbook_doc = load_xml(File.join(tmpdir, "roo_workbook.xml"))
- @shared_table = []
- if File.exist?(File.join(tmpdir, 'roo_sharedStrings.xml'))
- @sharedstring_doc = load_xml(File.join(tmpdir, 'roo_sharedStrings.xml'))
- read_shared_strings(@sharedstring_doc)
+ end
+
+ class Coordinate
+ attr_accessor :row, :column
+
+ def initialize(row, column)
+ @row, @column = row, column
end
- @styles_table = []
- @style_definitions = Array.new # TODO: ??? { |h,k| h[k] = {} }
- if File.exist?(File.join(tmpdir, 'roo_styles.xml'))
- @styles_doc = load_xml(File.join(tmpdir, 'roo_styles.xml'))
- read_styles(@styles_doc)
+ end
+
+ private
+
+ def type_cast_value(value)
+ case @type
+ when :float, :percentage
+ value.to_f
+ when :date
+ yyyy,mm,dd = (@base_date+value.to_i).strftime("%Y-%m-%d").split('-')
+ Date.new(yyyy.to_i,mm.to_i,dd.to_i)
+ when :datetime
+ create_datetime_from((@base_date+value.to_f.round(6)).strftime("%Y-%m-%d %H:%M:%S.%N"))
+ when :time
+ value.to_f*(24*60*60)
+ when :string
+ value
+ else
+ value
end
- @sheet_doc = load_xmls(@sheet_files)
- @comments_doc = load_xmls(@comments_files)
- @rels_doc = load_xmls(@rels_files)
end
- super(filename, options)
- @formula = Hash.new
- @excelx_type = Hash.new
- @excelx_value = Hash.new
- @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
- @comment = Hash.new
- @comments_read = Hash.new
- @hyperlink = Hash.new
- @hyperlinks_read = Hash.new
+
+ def create_datetime_from(datetime_string)
+ date_part,time_part = round_time_from(datetime_string).split(' ')
+ yyyy,mm,dd = date_part.split('-')
+ hh,mi,ss = time_part.split(':')
+ DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
+ end
+
+ def round_time_from(datetime_string)
+ date_part,time_part = datetime_string.split(' ')
+ yyyy,mm,dd = date_part.split('-')
+ hh,mi,ss = time_part.split(':')
+ Time.new(yyyy.to_i, mm.to_i, dd.to_i, hh.to_i, mi.to_i, ss.to_r).round(0).strftime("%Y-%m-%d %H:%M:%S")
+ end
end
- def method_missing(m,*args)
- # is method name a label name
- read_labels
- if @label.has_key?(m.to_s)
- sheet ||= @default_sheet
- read_cells(sheet)
- row,col = label(m.to_s)
- cell(row,col)
+ class Sheet
+ def initialize(name, rels_path, sheet_path, comments_path, styles, shared_strings, workbook, options = {})
+ @name = name
+ @rels = Relationships.new(rels_path)
+ @comments = Comments.new(comments_path)
+ @styles = styles
+ @sheet = SheetDoc.new(sheet_path, @rels, @styles, shared_strings, workbook, options)
+ end
+
+ def cells
+ @cells ||= @sheet.cells(@rels)
+ end
+
+ def present_cells
+ @present_cells ||= cells.select {|key, cell| cell && cell.value }
+ end
+
+ # Yield each row as array of Excelx::Cell objects
+ # accepts options max_rows (int) (offset by 1 for header)
+ # and pad_cells (boolean)
+ def each_row(options = {}, &block)
+ row_count = 0
+ @sheet.each_row_streaming do |row|
+ break if options[:max_rows] && row_count == options[:max_rows] + 1
+ block.call(cells_for_row_element(row, options)) if block_given?
+ row_count += 1
+ end
+ end
+
+ def row(row_number)
+ first_column.upto(last_column).map do |col|
+ cells[[row_number,col]]
+ end.map {|cell| cell && cell.value }
+ end
+
+ def column(col_number)
+ first_row.upto(last_row).map do |row|
+ cells[[row,col_number]]
+ end.map {|cell| cell && cell.value }
+ end
+
+ # returns the number of the first non-empty row
+ def first_row
+ @first_row ||= present_cells.keys.map {|row, _| row }.min
+ end
+
+ def last_row
+ @last_row ||= present_cells.keys.map {|row, _| row }.max
+ end
+
+ # returns the number of the first non-empty column
+ def first_column
+ @first_column ||= present_cells.keys.map {|_, col| col }.min
+ end
+
+ # returns the number of the last non-empty column
+ def last_column
+ @last_column ||= present_cells.keys.map {|_, col| col }.max
+ end
+
+ def excelx_format(key)
+ cell = cells[key]
+ @styles.style_format(cell.style).to_s if cell
+ end
+
+ def hyperlinks
+ @hyperlinks ||= @sheet.hyperlinks(@rels)
+ end
+
+ def comments
+ @comments.comments
+ end
+
+ def dimensions
+ @sheet.dimensions
+ end
+
+ private
+
+ # Take an xml row and return an array of Excelx::Cell objects
+ # optionally pad array to header width(assumed 1st row).
+ # takes option pad_cells (boolean) defaults false
+ def cells_for_row_element(row_element, options = {})
+ return [] unless row_element
+ cell_col = 0
+ cells = []
+ @sheet.each_cell(row_element) do |cell|
+ cells.concat(pad_cells(cell, cell_col)) if options[:pad_cells]
+ cells << cell
+ cell_col = cell.coordinate.column
+ end
+ cells
+ end
+
+ def pad_cells(cell, last_column)
+ pad = []
+ (cell.coordinate.column - 1 - last_column).times { pad << nil }
+ pad
+ end
+ end
+
+ ExceedsMaxError = Class.new(StandardError)
+
+ # initialization and opening of a spreadsheet file
+ # values for packed: :zip
+ # optional cell_max (int) parameter for early aborting attempts to parse
+ # enormous documents.
+ def initialize(filename, options = {})
+ packed = options[:packed]
+ file_warning = options.fetch(:file_warning, :error)
+ cell_max = options.delete(:cell_max)
+ sheet_options = {}
+ sheet_options[:expand_merged_ranges] = (options[:expand_merged_ranges] || false)
+
+ file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed)
+
+ @tmpdir = make_tmpdir(filename.split('/').last, options[:tmpdir_root])
+ @filename = local_filename(filename, @tmpdir, packed)
+ @comments_files = []
+ @rels_files = []
+ process_zipfile(@tmpdir, @filename)
+
+ @sheet_names = workbook.sheets.map do |sheet|
+ unless options[:only_visible_sheets] && sheet['state'] == 'hidden'
+ sheet['name']
+ end
+ end.compact
+ @sheets = []
+ @sheets_by_name = Hash[@sheet_names.map.with_index do |sheet_name, n|
+ @sheets[n] = Sheet.new(sheet_name, @rels_files[n], @sheet_files[n], @comments_files[n], styles, shared_strings, workbook, sheet_options)
+ [sheet_name, @sheets[n]]
+ end]
+
+ if cell_max
+ cell_count = ::Roo::Utils.num_cells_in_range(sheet_for(options.delete(:sheet)).dimensions)
+ raise ExceedsMaxError.new("Excel file exceeds cell maximum: #{cell_count} > #{cell_max}") if cell_count > cell_max
+ end
+
+ super
+ end
+
+ def method_missing(method,*args)
+ if label = workbook.defined_names[method.to_s]
+ safe_send(sheet_for(label.sheet).cells[label.key], :value)
else
# call super for methods like #a1
super
end
end
+ def sheets
+ @sheet_names
+ end
+
+ def sheet_for(sheet)
+ sheet ||= default_sheet
+ validate_sheet!(sheet)
+ @sheets_by_name[sheet]
+ end
+
# Returns the content of a spreadsheet-cell.
# (1,1) is the upper left corner.
# (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
# cell at the first line and first row.
def cell(row, col, sheet=nil)
- sheet ||= @default_sheet
- read_cells(sheet)
- row,col = normalize(row,col)
- if celltype(row,col,sheet) == :date
- yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
- return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
- elsif celltype(row,col,sheet) == :datetime
- date_part,time_part = @cell[sheet][[row,col]].split(' ')
- yyyy,mm,dd = date_part.split('-')
- hh,mi,ss = time_part.split(':')
- return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
+ key = normalize(row,col)
+ safe_send(sheet_for(sheet).cells[key], :value)
+ end
+
+ def row(rownumber,sheet=nil)
+ sheet_for(sheet).row(rownumber)
+ end
+
+ # returns all values in this column as an array
+ # column numbers are 1,2,3,... like in the spreadsheet
+ def column(column_number,sheet=nil)
+ if column_number.is_a?(::String)
+ column_number = ::Roo::Utils.letter_to_number(column_number)
end
- @cell[sheet][[row,col]]
+ sheet_for(sheet).column(column_number)
end
+ # returns the number of the first non-empty row
+ def first_row(sheet=nil)
+ sheet_for(sheet).first_row
+ end
+
+ # returns the number of the last non-empty row
+ def last_row(sheet=nil)
+ sheet_for(sheet).last_row
+ end
+
+ # returns the number of the first non-empty column
+ def first_column(sheet=nil)
+ sheet_for(sheet).first_column
+ end
+
+ # returns the number of the last non-empty column
+ def last_column(sheet=nil)
+ sheet_for(sheet).last_column
+ end
+
+ # set a cell to a certain value
+ # (this will not be saved back to the spreadsheet file!)
+ def set(row,col,value, sheet = nil) #:nodoc:
+ key = normalize(row,col)
+ cell_type = cell_type_by_value(value)
+ sheet_for(sheet).cells[key] = Cell.new(value, cell_type, nil, cell_type, value, nil, nil, nil, Cell::Coordinate.new(row, col))
+ end
+
+
# Returns the formula at (row,col).
# Returns nil if there is no formula.
# The method #formula? checks if there is a formula.
def formula(row,col,sheet=nil)
- sheet ||= @default_sheet
- read_cells(sheet)
- row,col = normalize(row,col)
- @formula[sheet][[row,col]] && @formula[sheet][[row,col]]
+ key = normalize(row,col)
+ safe_send(sheet_for(sheet).cells[key], :formula)
end
- alias_method :formula?, :formula
- # returns each formula in the selected sheet as an array of elements
- # [row, col, formula]
- def formulas(sheet=nil)
- sheet ||= @default_sheet
- read_cells(sheet)
- if @formula[sheet]
- @formula[sheet].each.collect do |elem|
- [elem[0][0], elem[0][1], elem[1]]
- end
- else
- []
- end
+ # Predicate methods really should return a boolean
+ # value. Hopefully no one was relying on the fact that this
+ # previously returned either nil/formula
+ def formula?(*args)
+ !!formula(*args)
end
- class Font
- attr_accessor :bold, :italic, :underline
-
- def bold?
- @bold == true
+ # returns each formula in the selected sheet as an array of tuples in following format
+ # [[row, col, formula], [row, col, formula],...]
+ def formulas(sheet=nil)
+ sheet_for(sheet).cells.select {|_, cell| cell.formula }.map do |(x, y), cell|
+ [x, y, cell.formula]
end
-
- def italic?
- @italic == true
- end
-
- def underline?
- @underline == true
- end
end
# Given a cell, return the cell's style
def font(row, col, sheet=nil)
- sheet ||= @default_sheet
- read_cells(sheet)
- row,col = normalize(row,col)
- s_attribute = @s_attribute[sheet][[row,col]]
- s_attribute ||= 0
- s_attribute = s_attribute.to_i
- @style_definitions[s_attribute]
+ key = normalize(row,col)
+ definition_index = safe_send(sheet_for(sheet).cells[key], :style)
+ styles.definitions[definition_index] if definition_index
end
# returns the type of a cell:
# * :float
# * :string,
@@ -207,468 +391,252 @@
# * :percentage
# * :formula
# * :time
# * :datetime
def celltype(row,col,sheet=nil)
- sheet ||= @default_sheet
- read_cells(sheet)
- row,col = normalize(row,col)
- if @formula[sheet][[row,col]]
- return :formula
- else
- @cell_type[sheet][[row,col]]
- end
+ key = normalize(row, col)
+ safe_send(sheet_for(sheet).cells[key], :type)
end
# returns the internal type of an excel cell
# * :numeric_or_formula
# * :string
# Note: this is only available within the Excelx class
def excelx_type(row,col,sheet=nil)
- sheet ||= @default_sheet
- read_cells(sheet)
- row,col = normalize(row,col)
- return @excelx_type[sheet][[row,col]]
+ key = normalize(row,col)
+ safe_send(sheet_for(sheet).cells[key], :excelx_type)
end
# returns the internal value of an excelx cell
# Note: this is only available within the Excelx class
def excelx_value(row,col,sheet=nil)
- sheet ||= @default_sheet
- read_cells(sheet)
- row,col = normalize(row,col)
- return @excelx_value[sheet][[row,col]]
+ key = normalize(row,col)
+ safe_send(sheet_for(sheet).cells[key], :excelx_value)
end
# returns the internal format of an excel cell
def excelx_format(row,col,sheet=nil)
- sheet ||= @default_sheet
- read_cells(sheet)
- row,col = normalize(row,col)
- s = @s_attribute[sheet][[row,col]]
- attribute2format(s).to_s
+ key = normalize(row,col)
+ sheet_for(sheet).excelx_format(key)
end
- # returns an array of sheet names in the spreadsheet
- def sheets
- @workbook_doc.xpath("//xmlns:sheet").map do |sheet|
- sheet['name']
- end
+ def empty?(row,col,sheet=nil)
+ sheet = sheet_for(sheet)
+ key = normalize(row,col)
+ cell = sheet.cells[key]
+ !cell || !cell.value || (cell.type == :string && cell.value.empty?) \
+ || (row < sheet.first_row || row > sheet.last_row || col < sheet.first_column || col > sheet.last_column)
end
# shows the internal representation of all cells
# for debugging purposes
def to_s(sheet=nil)
- sheet ||= @default_sheet
- read_cells(sheet)
- @cell[sheet].inspect
+ sheet_for(sheet).cells.inspect
end
# returns the row,col values of the labelled cell
# (nil,nil) if label is not defined
- def label(labelname)
- read_labels
- if @label.empty? || !@label.has_key?(labelname)
- return nil,nil,nil
+ def label(name)
+ labels = workbook.defined_names
+ if labels.empty? || !labels.key?(name)
+ [nil,nil,nil]
else
- return @label[labelname][1].to_i,
- Roo::Base.letter_to_number(@label[labelname][2]),
- @label[labelname][0]
+ [labels[name].row,
+ labels[name].col,
+ labels[name].sheet]
end
end
# Returns an array which all labels. Each element is an array with
# [labelname, [row,col,sheetname]]
def labels
- # sheet ||= @default_sheet
- # read_cells(sheet)
- read_labels
- @label.map do |label|
- [ label[0], # name
- [ label[1][1].to_i, # row
- Roo::Base.letter_to_number(label[1][2]), # column
- label[1][0], # sheet
+ @labels ||= workbook.defined_names.map do |name, label|
+ [ name,
+ [ label.row,
+ label.col,
+ label.sheet,
] ]
end
end
def hyperlink?(row,col,sheet=nil)
- hyperlink(row, col, sheet) != nil
+ !!hyperlink(row, col, sheet)
end
# returns the hyperlink at (row/col)
# nil if there is no hyperlink
def hyperlink(row,col,sheet=nil)
- sheet ||= @default_sheet
- read_hyperlinks(sheet) unless @hyperlinks_read[sheet]
- row,col = normalize(row,col)
- return nil unless @hyperlink[sheet]
- @hyperlink[sheet][[row,col]]
+ key = normalize(row,col)
+ sheet_for(sheet).hyperlinks[key]
end
# returns the comment at (row/col)
# nil if there is no comment
def comment(row,col,sheet=nil)
- sheet ||= @default_sheet
- #read_cells(sheet)
- read_comments(sheet) unless @comments_read[sheet]
- row,col = normalize(row,col)
- return nil unless @comment[sheet]
- @comment[sheet][[row,col]]
+ key = normalize(row,col)
+ sheet_for(sheet).comments[key]
end
# true, if there is a comment
def comment?(row,col,sheet=nil)
- sheet ||= @default_sheet
- # read_cells(sheet)
- read_comments(sheet) unless @comments_read[sheet]
- row,col = normalize(row,col)
- comment(row,col) != nil
+ !!comment(row,col,sheet)
end
- # returns each comment in the selected sheet as an array of elements
- # [row, col, comment]
def comments(sheet=nil)
- sheet ||= @default_sheet
- read_comments(sheet) unless @comments_read[sheet]
- if @comment[sheet]
- @comment[sheet].each.collect do |elem|
- [elem[0][0],elem[0][1],elem[1]]
- end
- else
- []
+ sheet_for(sheet).comments.map do |(x, y), comment|
+ [x, y, comment]
end
end
+ # Yield an array of Excelx::Cell
+ # Takes options for sheet, pad_cells, and max_rows
+ def each_row_streaming(options={})
+ sheet_for(options.delete(:sheet)).each_row(options) { |row| yield row }
+ end
+
private
- def load_xmls(paths)
- paths.compact.map do |item|
- load_xml(item)
+ def clean_sheet(sheet)
+ @sheets_by_name[sheet].cells.each_pair do |coord, value|
+ next unless value.value.is_a?(::String)
+
+ @sheets_by_name[sheet].cells[coord].value = sanitize_value(value.value)
end
+
+ @cleaned[sheet] = true
end
- # helper function to set the internal representation of cells
- def set_cell_values(sheet,x,y,i,v,value_type,formula,
- excelx_type=nil,
- excelx_value=nil,
- s_attribute=nil)
- key = [y,x+i]
- @cell_type[sheet] ||= {}
- @cell_type[sheet][key] = value_type
- @formula[sheet] ||= {}
- @formula[sheet][key] = formula if formula
- @cell[sheet] ||= {}
- @cell[sheet][key] =
- case @cell_type[sheet][key]
- when :float
- v.to_f
- when :string
- v
- when :date
- (base_date+v.to_i).strftime("%Y-%m-%d")
- when :datetime
- (base_date+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
- when :percentage
- v.to_f
- when :time
- v.to_f*(24*60*60)
- else
- v
- end
+ # Internal: extracts the worksheet_ids from the workbook.xml file. xlsx
+ # documents require a workbook.xml file, so a if the file is missing
+ # it is not a valid xlsx file. In these cases, an ArgumentError is
+ # raised.
+ #
+ # wb - a Zip::Entry for the workbook.xml file.
+ # path - A String for Zip::Entry's destination path.
+ #
+ # Examples
+ #
+ # extract_worksheet_ids(<Zip::Entry>, 'tmpdir/roo_workbook.xml')
+ # # => ["rId1", "rId2", "rId3"]
+ #
+ # Returns an Array of Strings.
+ def extract_worksheet_ids(entries, path)
+ wb = entries.find { |e| e.name[/workbook.xml$/] }
+ fail ArgumentError 'missing required workbook file' if wb.nil?
- @cell[sheet][key] = Spreadsheet::Link.new(@hyperlink[sheet][key], @cell[sheet][key].to_s) if hyperlink?(y,x+i)
- @excelx_type[sheet] ||= {}
- @excelx_type[sheet][key] = excelx_type
- @excelx_value[sheet] ||= {}
- @excelx_value[sheet][key] = excelx_value
- @s_attribute[sheet] ||= {}
- @s_attribute[sheet][key] = s_attribute
+ wb.extract(path)
+ workbook_doc = Roo::Utils.load_xml(path).remove_namespaces!
+ workbook_doc.xpath('//sheet').map{ |s| s.attributes['id'].value }
end
- # read all cells in the selected sheet
- def read_cells(sheet=nil)
- sheet ||= @default_sheet
- validate_sheet!(sheet)
- return if @cells_read[sheet]
+ # Internal
+ #
+ # wb_rels - A Zip::Entry for the workbook.xml.rels file.
+ # path - A String for the Zip::Entry's destination path.
+ #
+ # Examples
+ #
+ # extract_worksheets(<Zip::Entry>, 'tmpdir/roo_workbook.xml.rels')
+ # # => {
+ # "rId1"=>"worksheets/sheet1.xml",
+ # "rId2"=>"worksheets/sheet2.xml",
+ # "rId3"=>"worksheets/sheet3.xml"
+ # }
+ #
+ # Returns a Hash.
+ def extract_worksheet_rels(entries, path)
+ wb_rels = entries.find { |e| e.name[/workbook.xml.rels$/] }
+ fail ArgumentError 'missing required workbook file' if wb_rels.nil?
- @sheet_doc[sheets.index(sheet)].xpath("/xmlns:worksheet/xmlns:sheetData/xmlns:row/xmlns:c").each do |c|
- s_attribute = c['s'].to_i # should be here
- # c: <c r="A5" s="2">
- # <v>22606</v>
- # </c>, format: , tmp_type: float
- value_type =
- case c['t']
- when 's'
- :shared
- when 'b'
- :boolean
- # 2011-02-25 BEGIN
- when 'str'
- :string
- # 2011-02-25 END
- # 2011-09-15 BEGIN
- when 'inlineStr'
- :inlinestr
- # 2011-09-15 END
- else
- format = attribute2format(s_attribute)
- Format.to_type(format)
- end
- formula = nil
- c.children.each do |cell|
- case cell.name
- when 'is'
- cell.children.each do |is|
- if is.name == 't'
- inlinestr_content = is.content
- value_type = :string
- v = inlinestr_content
- excelx_type = :string
- y, x = Roo::Base.split_coordinate(c['r'])
- excelx_value = inlinestr_content #cell.content
- set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
- end
- end
- when 'f'
- formula = cell.content
- when 'v'
- if [:time, :datetime].include?(value_type) && cell.content.to_f >= 1.0
- value_type =
- if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001
- :datetime
- else
- :date
- end
- end
- excelx_type = [:numeric_or_formula,format.to_s]
- excelx_value = cell.content
- v =
- case value_type
- when :shared
- value_type = :string
- excelx_type = :string
- @shared_table[cell.content.to_i]
- when :boolean
- (cell.content.to_i == 1 ? 'TRUE' : 'FALSE')
- when :date
- cell.content
- when :time
- cell.content
- when :datetime
- cell.content
- when :formula
- cell.content.to_f #TODO: !!!!
- when :string
- excelx_type = :string
- cell.content
- else
- value_type = :float
- cell.content
- end
- y, x = Roo::Base.split_coordinate(c['r'])
- set_cell_values(sheet,x,y,0,v,value_type,formula,excelx_type,excelx_value,s_attribute)
- end
- end
+ wb_rels.extract(path)
+ rels_doc = Roo::Utils.load_xml(path).remove_namespaces!
+ worksheet_type ='http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet'
+
+ relationships = rels_doc.xpath('//Relationship').select do |relationship|
+ relationship.attributes['Type'].value == worksheet_type
end
- @cells_read[sheet] = true
- # begin comments
-=begin
-Datei xl/comments1.xml
- <?xml version="1.0" encoding="UTF-8" standalone="yes" ?>
- <comments xmlns="http://schemas.openxmlformats.org/spreadsheetml/2006/main">
- <authors>
- <author />
- </authors>
- <commentList>
- <comment ref="B4" authorId="0">
- <text>
- <r>
- <rPr>
- <sz val="10" />
- <rFont val="Arial" />
- <family val="2" />
- </rPr>
- <t>Kommentar fuer B4</t>
- </r>
- </text>
- </comment>
- <comment ref="B5" authorId="0">
- <text>
- <r>
- <rPr>
- <sz val="10" />
- <rFont val="Arial" />
- <family val="2" />
- </rPr>
- <t>Kommentar fuer B5</t>
- </r>
- </text>
- </comment>
- </commentList>
- </comments>
-=end
-=begin
- if @comments_doc[self.sheets.index(sheet)]
- read_comments(sheet)
- end
-=end
- #end comments
- end
- # Reads all comments from a sheet
- def read_comments(sheet=nil)
- sheet ||= @default_sheet
- validate_sheet!(sheet)
- n = self.sheets.index(sheet)
- return unless @comments_doc[n] #>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
- @comments_doc[n].xpath("//xmlns:comments/xmlns:commentList/xmlns:comment").each do |comment|
- ref = comment.attributes['ref'].to_s
- row,col = Roo::Base.split_coordinate(ref)
- comment.xpath('./xmlns:text/xmlns:r/xmlns:t').each do |text|
- @comment[sheet] ||= {}
- @comment[sheet][[row,col]] = text.text
- end
+ relationships.inject({}) do |hash, relationship|
+ attributes = relationship.attributes
+ id = attributes['Id'];
+ hash[id.value] = attributes['Target'].value
+ hash
end
- @comments_read[sheet] = true
end
- # Reads all hyperlinks from a sheet
- def read_hyperlinks(sheet=nil)
- sheet ||= @default_sheet
- validate_sheet!(sheet)
- n = self.sheets.index(sheet)
- if rels_doc = @rels_doc[n]
- rels = Hash[rels_doc.xpath("/xmlns:Relationships/xmlns:Relationship").map do |r|
- [r.attribute('Id').text, r]
- end]
- @sheet_doc[n].xpath("/xmlns:worksheet/xmlns:hyperlinks/xmlns:hyperlink").each do |h|
- if rel_element = rels[h.attribute('id').text]
- row,col = Roo::Base.split_coordinate(h.attributes['ref'].to_s)
- @hyperlink[sheet] ||= {}
- @hyperlink[sheet][[row,col]] = rel_element.attribute('Target').text
- end
- end
+ def extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
+ sheet_ids.each_with_index do |id, i|
+ name = sheets[id]
+ entry = entries.find { |entry| entry.name =~ /#{name}$/ }
+ path = "#{tmpdir}/roo_sheet#{i + 1}"
+ @sheet_files << path
+ entry.extract(path)
end
- @hyperlinks_read[sheet] = true
end
- def read_labels
- @label ||= Hash[@workbook_doc.xpath("//xmlns:definedName").map do |defined_name|
- # "Sheet1!$C$5"
- sheet, coordinates = defined_name.text.split('!$', 2)
- col,row = coordinates.split('$')
- [defined_name['name'], [sheet,row,col]]
- end]
- end
-
# Extracts all needed files from the zip file
- def process_zipfile(tmpdir, zipfilename, zip, path='')
+ def process_zipfile(tmpdir, zipfilename)
@sheet_files = []
- Roo::ZipFile.open(zipfilename) {|zf|
- zf.entries.each {|entry|
- entry_name = entry.to_s.downcase
+ entries = Zip::File.open(zipfilename).to_a.sort_by(&:name)
- path =
- if entry_name.end_with?('workbook.xml')
- "#{tmpdir}/roo_workbook.xml"
- elsif entry_name.end_with?('sharedstrings.xml')
- "#{tmpdir}/roo_sharedStrings.xml"
- elsif entry_name.end_with?('styles.xml')
- "#{tmpdir}/roo_styles.xml"
- elsif entry_name =~ /sheet([0-9]+).xml$/
- nr = $1
- @sheet_files[nr.to_i-1] = "#{tmpdir}/roo_sheet#{nr}"
- elsif entry_name =~ /comments([0-9]+).xml$/
- nr = $1
- @comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}"
- elsif entry_name =~ /sheet([0-9]+).xml.rels$/
- nr = $1
- @rels_files[nr.to_i-1] = "#{tmpdir}/roo_rels#{nr}"
- end
- if path
- extract_file(zip, entry, path)
- end
- }
- }
- end
+ # NOTE: When Google or Numbers 3.1 exports to xlsx, the worksheet filenames
+ # are not in order. With Numbers 3.1, the first sheet is always
+ # sheet.xml, not sheet1.xml. With Google, the order of the worksheets is
+ # independent of a worksheet's filename (i.e. sheet6.xml can be the
+ # first worksheet).
+ #
+ # workbook.xml lists the correct order of worksheets and
+ # workbook.xml.rels lists the filenames for those worksheets.
+ #
+ # workbook.xml:
+ # <sheet state="visible" name="IS" sheetId="1" r:id="rId3"/>
+ # <sheet state="visible" name="BS" sheetId="2" r:id="rId4"/>
+ # workbook.xml.rel:
+ # <Relationship Id="rId4" Target="worksheets/sheet5.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
+ # <Relationship Id="rId3" Target="worksheets/sheet4.xml" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet"/>
+ sheet_ids = extract_worksheet_ids(entries, "#{tmpdir}/roo_workbook.xml")
+ sheets = extract_worksheet_rels(entries, "#{tmpdir}/roo_workbook.xml.rels")
+ extract_sheets_in_order(entries, sheet_ids, sheets, tmpdir)
- def extract_file(source_zip, entry, destination_path)
- open(destination_path,'wb') {|f|
- f << source_zip.read(entry)
- }
- end
-
- # extract files from the zip file
- def extract_content(tmpdir, zipfilename)
- Roo::ZipFile.open(@filename) do |zip|
- process_zipfile(tmpdir, zipfilename,zip)
- end
- end
-
- # read the shared strings xml document
- def read_shared_strings(doc)
- doc.xpath("/xmlns:sst/xmlns:si").each do |si|
- shared_table_entry = ''
- si.children.each do |elem|
- if elem.name == 'r' and elem.children
- elem.children.each do |r_elem|
- if r_elem.name == 't'
- shared_table_entry << r_elem.content
- end
- end
- end
- if elem.name == 't'
- shared_table_entry = elem.content
- end
+ entries.each do |entry|
+ path =
+ case entry.name.downcase
+ when /sharedstrings.xml$/
+ "#{tmpdir}/roo_sharedStrings.xml"
+ when /styles.xml$/
+ "#{tmpdir}/roo_styles.xml"
+ when /comments([0-9]+).xml$/
+ # FIXME: Most of the time, The order of the comment files are the same
+ # the sheet order, i.e. sheet1.xml's comments are in comments1.xml.
+ # In some situations, this isn't true. The true location of a
+ # sheet's comment file is in the sheet1.xml.rels file. SEE
+ # ECMA-376 12.3.3 in "Ecma Office Open XML Part 1".
+ nr = Regexp.last_match[1].to_i
+ @comments_files[nr - 1] = "#{tmpdir}/roo_comments#{nr}"
+ when /sheet([0-9]+).xml.rels$/
+ # FIXME: Roo seems to use sheet[\d].xml.rels for hyperlinks only, but
+ # it also stores the location for sharedStrings, comments,
+ # drawings, etc.
+ nr = Regexp.last_match[1].to_i
+ @rels_files[nr - 1] = "#{tmpdir}/roo_rels#{nr}"
end
- @shared_table << shared_table_entry
- end
- end
- # read the styles elements of an excelx document
- def read_styles(doc)
- @cellXfs = []
-
- @numFmts = Hash[doc.xpath("//xmlns:numFmt").map do |numFmt|
- [numFmt['numFmtId'], numFmt['formatCode']]
- end]
- fonts = doc.xpath("//xmlns:fonts/xmlns:font").map do |font_el|
- Font.new.tap do |font|
- font.bold = !font_el.xpath('./xmlns:b').empty?
- font.italic = !font_el.xpath('./xmlns:i').empty?
- font.underline = !font_el.xpath('./xmlns:u').empty?
- end
+ entry.extract(path) if path
end
-
- doc.xpath("//xmlns:cellXfs").each do |xfs|
- xfs.children.each do |xf|
- @cellXfs << xf['numFmtId']
- @style_definitions << fonts[xf['fontId'].to_i]
- end
- end
end
- # convert internal excelx attribute to a format
- def attribute2format(s)
- id = @cellXfs[s.to_i]
- @numFmts[id] || Format::STANDARD_FORMATS[id.to_i]
+ def styles
+ @styles ||= Styles.new(File.join(@tmpdir, 'roo_styles.xml'))
end
- def base_date
- @base_date ||= read_base_date
+ def shared_strings
+ @shared_strings ||= SharedStrings.new(File.join(@tmpdir, 'roo_sharedStrings.xml'))
end
- # Default to 1900 (minus one day due to excel quirk) but use 1904 if
- # it's set in the Workbook's workbookPr
- # http://msdn.microsoft.com/en-us/library/ff530155(v=office.12).aspx
- def read_base_date
- base_date = Date.new(1899,12,30)
- @workbook_doc.xpath("//xmlns:workbookPr").map do |workbookPr|
- if workbookPr["date1904"] && workbookPr["date1904"] =~ /true|1/i
- base_date = Date.new(1904,01,01)
- end
- end
- base_date
+ def workbook
+ @workbook ||= Workbook.new(File.join(@tmpdir, "roo_workbook.xml"))
end
-end # class
+ def safe_send(object, method, *args)
+ object.send(method, *args) if object && object.respond_to?(method)
+ end
+end