lib/roo/excelx.rb in roo-1.12.2 vs lib/roo/excelx.rb in roo-1.13.0

- old
+ new

@@ -1,7 +1,8 @@ require 'date' require 'nokogiri' +require 'spreadsheet' class Roo::Excelx < Roo::Base module Format EXCEPTIONAL_FORMATS = { 'h:mm am/pm' => :date, @@ -68,11 +69,11 @@ def initialize(filename, options = {}, deprecated_file_warning = :error) if Hash === options packed = options[:packed] file_warning = options[:file_warning] || :error else - warn 'Supplying `packed` or `file_warning` as separate arguments to `Roo::Excelx.new` is deprected. Use an options hash instead.' + warn 'Supplying `packed` or `file_warning` as separate arguments to `Roo::Excelx.new` is deprecated. Use an options hash instead.' packed = options file_warning = deprecated_file_warning end file_type_check(filename,'.xlsx','an Excel-xlsx', file_warning, packed) @@ -82,10 +83,11 @@ @filename = filename unless File.file?(@filename) raise IOError, "file #{@filename} does not exist" end @comments_files = Array.new + @rels_files = Array.new extract_content(tmpdir, @filename) @workbook_doc = load_xml(File.join(tmpdir, "roo_workbook.xml")) @shared_table = [] if File.exist?(File.join(tmpdir, 'roo_sharedStrings.xml')) @sharedstring_doc = load_xml(File.join(tmpdir, 'roo_sharedStrings.xml')) @@ -101,18 +103,23 @@ load_xml(item) end @comments_doc = @comments_files.compact.map do |item| load_xml(item) end + @rels_doc = @rels_files.map do |item| + load_xml(item) + end end super(filename, options) @formula = Hash.new @excelx_type = Hash.new @excelx_value = Hash.new @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt @comment = Hash.new @comments_read = Hash.new + @hyperlink = Hash.new + @hyperlinks_read = Hash.new end def method_missing(m,*args) # is method name a label name read_labels @@ -288,10 +295,24 @@ label[1][0], # sheet ] ] end end + def hyperlink?(row,col,sheet=nil) + hyperlink(row, col, sheet) != nil + end + + # returns the hyperlink at (row/col) + # nil if there is no hyperlink + def hyperlink(row,col,sheet=nil) + sheet ||= @default_sheet + read_hyperlinks(sheet) unless @hyperlinks_read[sheet] + row,col = normalize(row,col) + return nil unless @hyperlink[sheet] + @hyperlink[sheet][[row,col]] + end + # returns the comment at (row/col) # nil if there is no comment def comment(row,col,sheet=nil) sheet ||= @default_sheet #read_cells(sheet) @@ -352,10 +373,12 @@ when :time v.to_f*(24*60*60) else v end + + @cell[sheet][key] = Spreadsheet::Link.new(@hyperlink[sheet][key], @cell[sheet][key]) if hyperlink?(y,x+i) @excelx_type[sheet] ||= {} @excelx_type[sheet][key] = excelx_type @excelx_value[sheet] ||= {} @excelx_value[sheet][key] = excelx_value @s_attribute[sheet] ||= {} @@ -507,10 +530,30 @@ end end @comments_read[sheet] = true end + # Reads all hyperlinks from a sheet + def read_hyperlinks(sheet=nil) + sheet ||= @default_sheet + validate_sheet!(sheet) + n = self.sheets.index(sheet) + if rels_doc = @rels_doc[n] + rels = Hash[rels_doc.xpath("/xmlns:Relationships/xmlns:Relationship").map do |r| + [r.attribute('Id').text, r] + end] + @sheet_doc[n].xpath("/xmlns:worksheet/xmlns:hyperlinks/xmlns:hyperlink").each do |h| + if rel_element = rels[h.attribute('id').text] + row,col = Roo::Base.split_coordinate(h.attributes['ref'].to_s) + @hyperlink[sheet] ||= {} + @hyperlink[sheet][[row,col]] = rel_element.attribute('Target').text + end + end + end + @hyperlinks_read[sheet] = true + end + def read_labels @label ||= Hash[@workbook_doc.xpath("//xmlns:definedName").map do |defined_name| # "Sheet1!$C$5" sheet, coordinates = defined_name.text.split('!$', 2) col,row = coordinates.split('$') @@ -521,46 +564,39 @@ # Extracts all needed files from the zip file def process_zipfile(tmpdir, zipfilename, zip, path='') @sheet_files = [] Roo::ZipFile.open(zipfilename) {|zf| zf.entries.each {|entry| - if entry.to_s.end_with?('workbook.xml') - open(tmpdir+'/'+'roo_workbook.xml','wb') {|f| - f << zip.read(entry) - } + entry_name = entry.to_s.downcase + + path = + if entry_name.end_with?('workbook.xml') + "#{tmpdir}/roo_workbook.xml" + elsif entry_name.end_with?('sharedstrings.xml') + "#{tmpdir}/roo_sharedStrings.xml" + elsif entry_name.end_with?('styles.xml') + "#{tmpdir}/roo_styles.xml" + elsif entry_name =~ /sheet([0-9]+).xml$/ + nr = $1 + @sheet_files[nr.to_i-1] = "#{tmpdir}/roo_sheet#{nr}" + elsif entry_name =~ /comments([0-9]+).xml$/ + nr = $1 + @comments_files[nr.to_i-1] = "#{tmpdir}/roo_comments#{nr}" + elsif entry_name =~ /sheet([0-9]+).xml.rels$/ + nr = $1 + @rels_files[nr.to_i-1] = "#{tmpdir}/roo_rels#{nr}" + end + if path + extract_file(zip, entry, path) end - # if entry.to_s.end_with?('sharedStrings.xml') - # at least one application creates this file with another (incorrect?) - # casing. It doesn't hurt, if we ignore here the correct casing - there - # won't be both names in the archive. - # Changed the casing of all the following filenames. - if entry.to_s.downcase.end_with?('sharedstrings.xml') - open(tmpdir+'/'+'roo_sharedStrings.xml','wb') {|f| - f << zip.read(entry) - } - end - if entry.to_s.downcase.end_with?('styles.xml') - open(tmpdir+'/'+'roo_styles.xml','wb') {|f| - f << zip.read(entry) - } - end - if entry.to_s.downcase =~ /sheet([0-9]+).xml$/ - nr = $1 - open(tmpdir+'/'+"roo_sheet#{nr}",'wb') {|f| - f << zip.read(entry) - } - @sheet_files[nr.to_i-1] = tmpdir+'/'+"roo_sheet#{nr}" - end - if entry.to_s.downcase =~ /comments([0-9]+).xml$/ - nr = $1 - open(tmpdir+'/'+"roo_comments#{nr}",'wb') {|f| - f << zip.read(entry) - } - @comments_files[nr.to_i-1] = tmpdir+'/'+"roo_comments#{nr}" - end } } - # return + end + + def extract_file(source_zip, entry, destination_path) + open(destination_path,'wb') {|f| + f << source_zip.read(entry) + } end # extract files from the zip file def extract_content(tmpdir, zipfilename) Roo::ZipFile.open(@filename) do |zip|