require 'rubygems' require 'rexml/document' # require 'matrix' require 'fileutils' require 'zip/zipfilesystem' require 'date' class Openoffice def initialize(filename) @cells_read = false @filename = filename @tmpdir = "oo_"+$$.to_s unless File.exists?(@tmpdir) FileUtils::mkdir(@tmpdir) #TODO: end extract_content file = File.new(@tmpdir+"/"+"roo_content.xml") # TODO: @doc = REXML::Document.new file @cell = Hash.new @cell_type = Hash.new if DateTime.now < Date.new(2007,6,1) FileUtils::rm_r(@tmpdir) end @default_sheet = nil end # return the content of a spreadsheet-cell # (1,1) is the upper left corner # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the # cell at first line, first row def cell(row,col) if row.class == String if col.class == Fixnum # ('A',1): # ('B', 5) -> (5, 2) row, col = col, row else raise FormatError end end if col.class == String col = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".index(col.upcase)+1 end read_cells unless @cells_read @cell["#{row},#{col}"] end # returns the open-office type of a cell def celltype(row,col) read_cells unless @cells_read @cell_type["#{row},#{col}"] end # returns an array of sheets in the spreadsheet def sheets return_sheets = [] oo_document_count = 0 @doc.each_element do |oo_document| oo_document_count += 1 oo_element_count = 0 oo_document.each_element do |oo_element| oo_element_count += 1 if oo_element.name == "body" oo_element.each_element do |be| if be.name == "spreadsheet" be.each_element do |se| if se.name == "table" return_sheets << se.attributes['name'] end end end end end end end return_sheets end # set the working sheet in the document def default_sheet=(s) @default_sheet = s end # version of the openoffice document def officeversion read_cells unless @cells_read @officeversion end def to_s read_cells unless @cells_read @cell.inspect end # returns all values in this row as an array # row numbers are 1,2,3,... like in the spreadsheet def row(rownumber) read_cells unless @cells_read result = [] @cell.each_pair {|key,value| y,x = key.split(',') x = x.to_i y = y.to_i if y == rownumber result[x-1,rownumber] = value end } result end # returns the number of the last non-empty row def last_row read_cells unless @cells_read result = 0 @cell.each_pair {|key,value| y,x = key.split(',') y = y.to_i result = [result, y].max } result end # returns the number of the last non-empty column def last_column read_cells unless @cells_read result = 0 @cell.each_pair {|key,value| y,x = key.split(',') x = x.to_i result = [result, x].max } result end # returns the number of the first non-empty row def first_row read_cells unless @cells_read result = 999_999 # more than a spreadsheet can hold @cell.each_pair {|key,value| y,x = key.split(',') y = y.to_i result = [result, y].min } result end # returns the number of the first non-empty column def first_column read_cells unless @cells_read result = 999_999 # more than a spreadsheet can hold @cell.each_pair {|key,value| y,x = key.split(',') x = x.to_i result = [result, x].min } result end def first_column_as_letter number_to_letter(first_column) end def last_column_as_letter number_to_letter(last_column) end private def number_to_letter(n) "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[n-1,1] end # read all cells in the selected sheet def read_cells oo_document_count = 0 @doc.each_element do |oo_document| @officeversion = oo_document.attributes['version'] oo_document_count += 1 oo_element_count = 0 oo_document.each_element do |oo_element| oo_element_count += 1 # p oo_element.name if oo_element.name == "body" # puts "Body gefunden " oo_element.each_element do |be| # p be.name if be.name == "spreadsheet" be.each_element do |se| # p se if se.name == "table" if se.attributes['name']==@default_sheet x=1 y=1 # puts "table gefunden" #se.each_element se.each_element do |te| # p te.name if te.name == "table-column" # p te.attributes rep = te.attributes["number-columns-repeated"] # p "rep = "+rep.to_s elsif te.name == "table-row" if te.attributes['number-rows-repeated'] skip_y = te.attributes['number-rows-repeated'].to_i y = y + skip_y - 1 # minus 1 because this line will be counted as a line element end # p te te.each_element do |tr| # p tr if tr.name == 'table-cell' skip = tr.attributes['number-columns-repeated'] if skip x += (skip.to_i - 1) end vt = tr.attributes['value-type'] v = tr.attributes['value'] # puts "#{vt} #{v}" @cell_type["#{y},#{x}"] = vt if @cell_type["#{y},#{x}"] == 'float' @cell["#{y},#{x}"] = v.to_f elsif @cell_type["#{y},#{x}"] == 'string' tr.each_element do |str| if str.name == 'p' @cell["#{y},#{x}"] = str.text end end elsif @cell_type["#{y},#{x}"] == 'date' @cell["#{y},#{x}"] = tr.attributes['date-value'] else @cell["#{y},#{x}"] = v end x += 1 end end y += 1 x = 1 end end # p se.attributes['name'] # return_sheets << se.attributes['name'] end # richtiges sheet end end end end end end # puts oo_element_count.to_s+" oo_element_count " end # puts oo_document_count.to_s+" oo_document_count " # p @cell @cells_read = true end def process_zipfile(zip, path='') if zip.file.file? path # puts %{#{path}: "#{zip.read(path)}"} # puts %{#{path}:} if path == "content.xml" open(@tmpdir+'/'+'roo_content.xml','w') {|f| f << zip.read(path) } end else unless path.empty? path += '/' # puts path end zip.dir.foreach(path) do |filename| process_zipfile(zip, path+filename) end end end def extract_content Zip::ZipFile.open(@filename) do |zip| process_zipfile(zip) end end end