require 'rubygems' require 'rexml/document' require 'matrix' require 'fileutils' require 'zip/zipfilesystem' class Openoffice def initialize(filename=nil) @filename = filename unless File.exists?("/tmp/oo_"+$$.to_s) FileUtils::mkdir("/tmp/oo_"+$$.to_s) #TODO: end # `unzip -o "#{filename}"` extract_content file = File.new("roo_content.xml") @doc = REXML::Document.new file @cell = Hash.new @cell_type = Hash.new end def cell(row,col) if col.class == String col = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".index(col)+1 end read_cells unless @cells_read @cell["#{row},#{col}"] end def celltype(row,col) read_cells unless @cells_read @cell_type["#{row},#{col}"] end # read all cells in the selected sheet def read_cells oo_document_count = 0 @doc.each_element do |oo_document| oo_document_count += 1 oo_element_count = 0 oo_document.each_element do |oo_element| oo_element_count += 1 # p oo_element.name if oo_element.name == "body" # puts "Body gefunden " oo_element.each_element do |be| # p be.name if be.name == "spreadsheet" be.each_element do |se| # p se if se.name == "table" if se.attributes['name']==@default_sheet x=1 y=1 # puts "table gefunden" #se.each_element se.each_element do |te| # p te.name if te.name == "table-column" # p te.attributes rep = te.attributes["number-columns-repeated"] # p "rep = "+rep.to_s elsif te.name == "table-row" # p te te.each_element do |tr| # p tr if tr.name == 'table-cell' vt = tr.attributes['value-type'] v = tr.attributes['value'] # puts "#{vt} #{v}" @cell_type["#{y},#{x}"] = vt if @cell_type["#{y},#{x}"] == 'float' @cell["#{y},#{x}"] = v.to_f elsif @cell_type["#{y},#{x}"] == 'string' tr.each_element do |str| if str.name == 'p' @cell["#{y},#{x}"] = str.text end end elsif @cell_type["#{y},#{x}"] == 'date' @cell["#{y},#{x}"] = tr.attributes['date-value'] else @cell["#{y},#{x}"] = v end x += 1 end end y += 1 x = 1 end end # p se.attributes['name'] # return_sheets << se.attributes['name'] end # richtiges sheet end end end end end end # puts oo_element_count.to_s+" oo_element_count " end # puts oo_document_count.to_s+" oo_document_count " # p @cell @cells_read = true end # returns a list of sheets in this document def sheets return_sheets = [] # p valid_xml?(doc) oo_document_count = 0 @doc.each_element do |oo_document| oo_document_count += 1 #p oo_document oo_element_count = 0 oo_document.each_element do |oo_element| oo_element_count += 1 # p oo_element.name if oo_element.name == "body" # puts "Body gefunden " oo_element.each_element do |be| # p be.name if be.name == "spreadsheet" be.each_element do |se| # p se if se.name == "table" # puts "table gefunden" #se.each_element # p se.attributes['name'] return_sheets << se.attributes['name'] end end end end end end # puts oo_element_count.to_s+" oo_element_count " end # puts oo_document_count.to_s+" oo_document_count " return_sheets end def default_sheet=(s) @default_sheet = s end private def process_zipfile(zip, path='') if zip.file.file? path # puts %{#{path}: "#{zip.read(path)}"} # puts %{#{path}:} if path == "content.xml" open('roo_content.xml','w') {|f| f << zip.read(path) } end else unless path.empty? path += '/' # puts path end zip.dir.foreach(path) do |filename| process_zipfile(zip, path+filename) end end end def extract_content Zip::ZipFile.open(@filename) do |zip| process_zipfile(zip) end end end