require 'rubygems' require 'rexml/document' require 'fileutils' require 'zip/zipfilesystem' require 'date' class Fixnum def as_letter "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[self-1,1] end end class Openoffice def initialize(filename) @cells_read = false @filename = filename @tmpdir = "oo_"+$$.to_s unless File.exists?(@tmpdir) FileUtils::mkdir(@tmpdir) end extract_content file = File.new(File.join(@tmpdir, "roo_content.xml")) @doc = REXML::Document.new file file.close @cell = Hash.new @cell_type = Hash.new if DateTime.now > Date.new(2007,5,31) FileUtils::rm_r(@tmpdir) end @default_sheet = nil end # return the content of a spreadsheet-cell # (1,1) is the upper left corner # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the # cell at first line, first row def cell(row,col) read_cells unless @cells_read row,col = normalize(row,col) @cell["#{row},#{col}"] end # returns the open-office type of a cell def celltype(row,col) read_cells unless @cells_read row,col = normalize(row,col) @cell_type["#{row},#{col}"] end # returns an array of sheets in the spreadsheet def sheets return_sheets = [] oo_document_count = 0 @doc.each_element do |oo_document| oo_document_count += 1 oo_element_count = 0 oo_document.each_element do |oo_element| oo_element_count += 1 if oo_element.name == "body" oo_element.each_element do |be| if be.name == "spreadsheet" be.each_element do |se| if se.name == "table" return_sheets << se.attributes['name'] end end end end end end end return_sheets end # set the working sheet in the document def default_sheet=(s) @default_sheet = s end # version of the openoffice document def officeversion read_cells unless @cells_read @officeversion end # shows the internal representation of all cells # mainly for debugging purposes def to_s read_cells unless @cells_read @cell.inspect end # returns all values in this row as an array # row numbers are 1,2,3,... like in the spreadsheet def row(rownumber) read_cells unless @cells_read result = [] @cell.each_pair {|key,value| y,x = key.split(',') x = x.to_i y = y.to_i if y == rownumber result[x-1,rownumber] = value end } result end # returns the number of the last non-empty row def last_row read_cells unless @cells_read result = 0 @cell.each_pair {|key,value| y,x = key.split(',') y = y.to_i result = [result, y].max if value } result end # returns the number of the last non-empty column def last_column read_cells unless @cells_read result = 0 @cell.each_pair {|key,value| y,x = key.split(',') x = x.to_i result = [result, x].max if value } result end # returns the number of the first non-empty row def first_row read_cells unless @cells_read result = 999_999 # more than a spreadsheet can hold @cell.each_pair {|key,value| y,x = key.split(',') y = y.to_i result = [result, y].min if value } result end # returns the number of the first non-empty column def first_column read_cells unless @cells_read result = 999_999 # more than a spreadsheet can hold @cell.each_pair {|key,value| y,x = key.split(',') x = x.to_i result = [result, x].min if value } result end def first_column_as_letter number_to_letter(first_column) end def last_column_as_letter number_to_letter(last_column) end def first_row_as_letter number_to_letter(first_row) end def last_row_as_letter number_to_letter(last_row) end def as_letter(n) number_to_letter(last_row) end def number_to_letter(n) letters="" while n > 0 num = n%26 letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[num-1,1] + letters n = n.div(26) end letters end def empty?(row, col) read_cells unless @cells_read return true unless cell(row, col) return true if celltype(row, col) == "string" && cell(row, col).empty? false end private # read all cells in the selected sheet def read_cells oo_document_count = 0 @doc.each_element do |oo_document| @officeversion = oo_document.attributes['version'] oo_document_count += 1 oo_element_count = 0 oo_document.each_element do |oo_element| oo_element_count += 1 # p oo_element.name if oo_element.name == "body" # puts "Body gefunden " oo_element.each_element do |be| # p be.name if be.name == "spreadsheet" be.each_element do |se| # p se if se.name == "table" if se.attributes['name']==@default_sheet x=1 y=1 # puts "table gefunden" #se.each_element se.each_element do |te| # p te.name if te.name == "table-column" # p te.attributes rep = te.attributes["number-columns-repeated"] # p "rep = "+rep.to_s elsif te.name == "table-row" if te.attributes['number-rows-repeated'] skip_y = te.attributes['number-rows-repeated'].to_i y = y + skip_y - 1 # minus 1 because this line will be counted as a line element end # p te te.each_element do |tr| # p tr if tr.name == 'table-cell' skip = tr.attributes['number-columns-repeated'] if skip x += (skip.to_i - 1) end vt = tr.attributes['value-type'] v = tr.attributes['value'] # puts "#{vt} #{v}" @cell_type["#{y},#{x}"] = vt if @cell_type["#{y},#{x}"] == 'float' @cell["#{y},#{x}"] = v.to_f elsif @cell_type["#{y},#{x}"] == 'string' tr.each_element do |str| if str.name == 'p' @cell["#{y},#{x}"] = str.text end end elsif @cell_type["#{y},#{x}"] == 'date' @cell["#{y},#{x}"] = tr.attributes['date-value'] else @cell["#{y},#{x}"] = v end x += 1 end end y += 1 x = 1 end end end # sheet end end end end end end end @cells_read = true end def process_zipfile(zip, path='') if zip.file.file? path if path == "content.xml" open(@tmpdir+'/'+'roo_content.xml','w') {|f| f << zip.read(path) } end else unless path.empty? path += '/' end zip.dir.foreach(path) do |filename| process_zipfile(zip, path+filename) end end end def extract_content Zip::ZipFile.open(@filename) do |zip| process_zipfile(zip) end end def Openoffice.letter_to_number(letters) result = 0 while letters && letters.length > 0 character = letters[0,1].upcase num = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".index(character)+1 result = result * 26 + num letters = letters[1..-1] end result end # converts cell coordinate to numeric values of row,col def normalize(row,col) if row.class == String if col.class == Fixnum # ('A',1): # ('B', 5) -> (5, 2) row, col = col, row else raise FormatError end end if col.class == String col = Openoffice.letter_to_number(col) end return row,col end end