require 'forwardable' require 'roo/excelx/extractor' module Roo class Excelx class SheetDoc < Excelx::Extractor extend Forwardable delegate [:styles, :workbook, :shared_strings, :base_date] => :@shared def initialize(path, relationships, shared, options = {}) super(path) @shared = shared @options = options @relationships = relationships end def cells(relationships) @cells ||= extract_cells(relationships) end def hyperlinks(relationships) @hyperlinks ||= extract_hyperlinks(relationships) end # Get the dimensions for the sheet. # This is the upper bound of cells that might # be parsed. (the document may be sparse so cell count is only upper bound) def dimensions @dimensions ||= extract_dimensions end # Yield each row xml element to caller def each_row_streaming(&block) Roo::Utils.each_element(@path, 'row', &block) end # Yield each cell as Excelx::Cell to caller for given # row xml def each_cell(row_xml) return [] unless row_xml row_xml.children.each do |cell_element| # If you're sure you're not going to need this hyperlinks you can discard it hyperlinks = unless @options[:no_hyperlinks] key = ::Roo::Utils.ref_to_key(cell_element['r']) hyperlinks(@relationships)[key] end yield cell_from_xml(cell_element, hyperlinks) end end private def cell_value_type(type, format) case type when 's'.freeze :shared when 'b'.freeze :boolean when 'str'.freeze :string when 'inlineStr'.freeze :inlinestr else Excelx::Format.to_type(format) end end # Internal: Creates a cell based on an XML clell.. # # cell_xml - a Nokogiri::XML::Element. e.g. # # 22606 # # hyperlink - a String for the hyperlink for the cell or nil when no # hyperlink is present. # # Examples # # cells_from_xml(, nil) # # => # # Returns a type of . def cell_from_xml(cell_xml, hyperlink) coordinate = extract_coordinate(cell_xml['r']) return Excelx::Cell::Empty.new(coordinate) if cell_xml.children.empty? # NOTE: This is error prone, to_i will silently turn a nil into a 0. # This works by coincidence because Format[0] is General. style = cell_xml['s'].to_i format = styles.style_format(style) value_type = cell_value_type(cell_xml['t'], format) formula = nil cell_xml.children.each do |cell| case cell.name when 'is' content_arr = cell.search('t').map(&:content) unless content_arr.empty? return Excelx::Cell.create_cell(:string, content_arr.join(''), formula, style, hyperlink, coordinate) end when 'f' formula = cell.content when 'v' return create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate) end end Excelx::Cell::Empty.new(coordinate) end def create_cell_from_value(value_type, cell, formula, format, style, hyperlink, base_date, coordinate) # NOTE: format.to_s can replace excelx_type as an argument for # Cell::Time, Cell::DateTime, Cell::Date or Cell::Number, but # it will break some brittle tests. excelx_type = [:numeric_or_formula, format.to_s] # NOTE: There are only a few situations where value != cell.content # 1. when a sharedString is used. value = sharedString; # cell.content = id of sharedString # 2. boolean cells: value = 'TRUE' | 'FALSE'; cell.content = '0' | '1'; # But a boolean cell should use TRUE|FALSE as the formatted value # and use a Boolean for it's value. Using a Boolean value breaks # Roo::Base#to_csv. # 3. formula case value_type when :shared value = shared_strings.use_html?(cell.content.to_i) ? shared_strings.to_html[cell.content.to_i] : shared_strings[cell.content.to_i] Excelx::Cell.create_cell(:string, value, formula, style, hyperlink, coordinate) when :boolean, :string value = cell.content Excelx::Cell.create_cell(value_type, value, formula, style, hyperlink, coordinate) when :time, :datetime cell_content = cell.content.to_f # NOTE: A date will be a whole number. A time will have be > 1. And # in general, a datetime will have decimals. But if the cell is # using a custom format, it's possible to be interpreted incorrectly. # cell_content.to_i == cell_content && standard_style?=> :date # # Should check to see if the format is standard or not. If it's a # standard format, than it's a date, otherwise, it is a datetime. # @styles.standard_style?(style_id) # STANDARD_STYLES.keys.include?(style_id.to_i) cell_type = if cell_content < 1.0 :time elsif (cell_content - cell_content.floor).abs > 0.000001 :datetime else :date end Excelx::Cell.create_cell(cell_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate) when :date Excelx::Cell.create_cell(value_type, cell.content, formula, excelx_type, style, hyperlink, base_date, coordinate) else Excelx::Cell.create_cell(:number, cell.content, formula, excelx_type, style, hyperlink, coordinate) end end def extract_coordinate(coordinate) row, column = ::Roo::Utils.split_coordinate(coordinate) Excelx::Coordinate.new(row, column) end def extract_hyperlinks(relationships) return {} unless (hyperlinks = doc.xpath('/worksheet/hyperlinks/hyperlink')) Hash[hyperlinks.map do |hyperlink| if hyperlink.attribute('id') && (relationship = relationships[hyperlink.attribute('id').text]) [::Roo::Utils.ref_to_key(hyperlink.attributes['ref'].to_s), relationship.attribute('Target').text] end end.compact] end def expand_merged_ranges(cells) # Extract merged ranges from xml merges = {} doc.xpath('/worksheet/mergeCells/mergeCell').each do |mergecell_xml| tl, br = mergecell_xml['ref'].split(/:/).map { |ref| ::Roo::Utils.ref_to_key(ref) } for row in tl[0]..br[0] do for col in tl[1]..br[1] do next if row == tl[0] && col == tl[1] merges[[row, col]] = tl end end end # Duplicate value into all cells in merged range merges.each do |dst, src| cells[dst] = cells[src] end end def extract_cells(relationships) extracted_cells = Hash[doc.xpath('/worksheet/sheetData/row/c').map do |cell_xml| key = ::Roo::Utils.ref_to_key(cell_xml['r']) [key, cell_from_xml(cell_xml, hyperlinks(relationships)[key])] end] expand_merged_ranges(extracted_cells) if @options[:expand_merged_ranges] extracted_cells end def extract_dimensions Roo::Utils.each_element(@path, 'dimension') do |dimension| return dimension.attributes['ref'].value end end end end end