require 'rubygems' require 'rexml/document' require 'fileutils' require 'zip/zipfilesystem' require 'date' require 'llip' require 'base64' #require 'lib/roo/spreadsheetparser' class Openoffice @@nr = 0 # initialization and opening of a spreasheet file # file will be created if 'create' is true # and the file does not exist def initialize(filename, create = false) if filename[-4..-1] != ".ods" warn "are you sure, this is an openoffice file?" end if create and ! File.exists?(filename) self.create_openoffice(filename) end @cells_read = false @filename = filename @tmpdir = "oo_"+$$.to_s unless File.exists?(@tmpdir) FileUtils::mkdir(@tmpdir) end @@nr += 1 @file_nr = @@nr extract_content file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_content.xml")) @doc = REXML::Document.new file file.close @cell = Hash.new @cell_type = Hash.new @formula = Hash.new if ENV["roo_local"] != "thomas-p" FileUtils::rm_r(@tmpdir) end @default_sheet = nil @first_column = @last_column = nil @first_row = @last_row = nil trap('INT') { FileUtils::rm_r(@tmpdir) } end # creates a new empty openoffice-spreadsheet file def create_openoffice(filename) #TODO: a better way for creating the file contents # now you have to call mkbase64...rb to create an include file with all # the empty files in an openoffice zip-file load 'base64include.rb' # puts @@empty_spreadsheet f = File.open(filename,'w') f.print(Base64.decode64(@@empty_spreadsheet)) f.close end # reopens and read a spreadsheet document if false def reload @cells_read = false @tmpdir = "oo_"+$$.to_s unless File.exists?(@tmpdir) FileUtils::mkdir(@tmpdir) end extract_content file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_content.xml")) @doc = REXML::Document.new file file.close @cell = Hash.new @cell_type = Hash.new FileUtils::rm_r(@tmpdir) @default_sheet = nil @first_column = @last_column = nil @first_row = @last_row = nil end end def reload default_sheet = @default_sheet initialize(@filename) self.default_sheet = default_sheet @first_row = @last_row = @first_column = @last_column = nil end # returns the content of a spreadsheet-cell # (1,1) is the upper left corner # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the # cell at first line, first row def cell(row,col) read_cells unless @cells_read row,col = normalize(row,col) if celltype(row,col) == :date yyyy,mm,dd = @cell["#{row},#{col}"].split('-') return Date.new(yyyy.to_i,mm.to_i,dd.to_i) end @cell["#{row},#{col}"] end # returns the formula at (row,col) # nil if there is no formula def formula(row,col) read_cells unless @cells_read row,col = normalize(row,col) if @formula["#{row},#{col}"] == nil return nil else return @formula["#{row},#{col}"]["oooc:".length..-1] end end # true, if there is a formula def formula?(row,col) read_cells unless @cells_read row,col = normalize(row,col) formula(row,col) != nil end # set a cell to a certain value # (this will not be saved back to the spreadsheet file!) def set(row,col,value) read_cells unless @cells_read row,col = normalize(row,col) set_value(row,col,value) if value.class == Fixnum set_type(row,col,:float) elsif value.class == String set_type(row,col,:string) elsif value.class == Float set_type(row,col,:string) else raise ArgumentError, "Typ fuer "+value.to_s+" nicht gesetzt" end end # returns the open-office type of a cell def celltype(row,col) read_cells unless @cells_read row,col = normalize(row,col) if @formula["#{row},#{col}"] return :formula else @cell_type["#{row},#{col}"] end end # returns an array of sheet names in the spreadsheet def sheets return_sheets = [] oo_document_count = 0 @doc.each_element do |oo_document| oo_document_count += 1 oo_element_count = 0 oo_document.each_element do |oo_element| oo_element_count += 1 if oo_element.name == "body" oo_element.each_element do |be| if be.name == "spreadsheet" be.each_element do |se| if se.name == "table" return_sheets << se.attributes['name'] end end end end end end end return_sheets end # set the working sheet in the document def default_sheet=(s) @default_sheet = s @first_row = @last_row = @first_column = @last_column = nil @cells_read = false @cell = Hash.new @cell_type = Hash.new @formula = Hash.new end alias set_default_sheet default_sheet= # version of the openoffice document # at 2007 this is always "1.0" def officeversion read_cells(:ignore_default_sheet => true) unless @cells_read @officeversion end # shows the internal representation of all cells # mainly for debugging purposes def to_s read_cells unless @cells_read @cell.inspect end # returns all values in this row as an array # row numbers are 1,2,3,... like in the spreadsheet def row(rownumber) read_cells unless @cells_read result = [] tmp_arr = [] @cell.each_pair {|key,value| y,x = key.split(',') x = x.to_i y = y.to_i if y == rownumber tmp_arr[x] = value end } result = tmp_arr[1..-1] while result[-1] == nil result = result[0..-2] end result end # returns the number of the last non-empty row def last_row read_cells unless @cells_read if @last_row return @last_row end impossible_value = 0 result = impossible_value @cell.each_pair {|key,value| y,x = key.split(',') y = y.to_i result = [result, y].max if value } result = nil if result == impossible_value @last_row = result result end # returns the number of the last non-empty column def last_column read_cells unless @cells_read if @last_column return @last_column end impossible_value = 0 result = impossible_value @cell.each_pair {|key,value| y,x = key.split(',') x = x.to_i result = [result, x].max if value } result = nil if result == impossible_value @last_column = result result end # returns the number of the first non-empty row def first_row read_cells unless @cells_read if @first_row return @first_row end impossible_value = 999_999 # more than a spreadsheet can hold result = impossible_value @cell.each_pair {|key,value| y,x = key.split(',') y = y.to_i result = [result, y].min if value } result = nil if result == impossible_value @first_row = result result end # returns the number of the first non-empty column def first_column read_cells unless @cells_read if @first_column return @first_column end impossible_value = 999_999 # more than a spreadsheet can hold result = impossible_value @cell.each_pair {|key,value| y,x = key.split(',') x = x.to_i result = [result, x].min if value } result = nil if result == impossible_value @first_column = result result end # first non-empty column as a letter def first_column_as_letter Openoffice.number_to_letter(first_column) end # last non-empty column as a letter def last_column_as_letter Openoffice.number_to_letter(last_column) end # true if cell is empty def empty?(row, col) read_cells unless @cells_read return true unless cell(row, col) return true if celltype(row, col) == :string && cell(row, col).empty? false end # save spreadsheet def save 42 end # evaluate the formula at this cell # experimental: DO NOT USE THIS! def solve(row,col) parser = SpreadsheetParser.new visitor = Visitor.new #puts cell(row,col) puts formula(row,col) formula = formula(row,col)[1..-1] # .downcase puts formula #eval formula #parser.parse(formula) parser.parse(formula).accept(visitor) end # returns each formula in the selected sheet as an array of elements # [row, col, formula] def formulas theformulas = Array.new read_cells unless @cells_read first_row.upto(last_row) {|row| first_column.upto(last_column) {|col| if formula?(row,col) f = [row, col, formula(row,col)] theformulas << f end } } theformulas end # returns a rectangular area (default: all cells) as yaml-output # you can add additional attributes with the prefix parameter like: # oo.to_yaml({"file"=>"flightdata_2007-06-26", "sheet" => "1"}) def to_yaml(prefix={}, from_row=nil, from_column=nil, to_row=nil, to_column=nil) result = "--- \n" (from_row||first_row).upto(to_row||last_row) do |row| (from_column||first_column).upto(to_column||last_column) do |col| unless self.empty?(row,col) result << "cell_#{row}_#{col}: \n" prefix.each {|k,v| result << " #{k}: #{v} \n" } result << " row: #{row} \n" result << " col: #{col} \n" result << " celltype: #{self.celltype(row,col)} \n" result << " value: #{self.cell(row,col)} \n" end end end result end private # read all cells in the selected sheet def read_cells(*args) if :ignore_default_sheet == false raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil end oo_document_count = 0 @doc.each_element do |oo_document| @officeversion = oo_document.attributes['version'] oo_document_count += 1 oo_element_count = 0 oo_document.each_element do |oo_element| oo_element_count += 1 if oo_element.name == "body" oo_element.each_element do |be| if be.name == "spreadsheet" be.each_element do |se| if se.name == "table" if se.attributes['name']==@default_sheet x=1 y=1 se.each_element do |te| if te.name == "table-column" rep = te.attributes["number-columns-repeated"] elsif te.name == "table-row" if te.attributes['number-rows-repeated'] skip_y = te.attributes['number-rows-repeated'].to_i y = y + skip_y - 1 # minus 1 because this line will be counted as a line element end te.each_element do |tr| if tr.name == 'table-cell' skip = tr.attributes['number-columns-repeated'] formula = tr.attributes['formula'] vt = tr.attributes['value-type'] v = tr.attributes['value'] if vt == 'string' tr.each_element do |str| if str.name == 'p' v = str.text end end end if skip if v == nil x += (skip.to_i - 1) else 0.upto(skip.to_i-1) do |i| @cell_type["#{y},#{x+i}"] = Openoffice.oo_type_2_roo_type(vt) @formula["#{y},#{x+i}"] = formula if formula if @cell_type["#{y},#{x+i}"] == :float @cell["#{y},#{x+i}"] = v.to_f elsif @cell_type["#{y},#{x+i}"] == :string @cell["#{y},#{x+i}"] = v elsif @cell_type["#{y},#{x+i}"] == :date @cell["#{y},#{x+i}"] = tr.attributes['date-value'] else @cell["#{y},#{x+i}"] = v end end x += 1 end end # if skip @formula["#{y},#{x}"] = formula if formula @cell_type["#{y},#{x}"] = Openoffice.oo_type_2_roo_type(vt) if @cell_type["#{y},#{x}"] == :float @cell["#{y},#{x}"] = v.to_f elsif @cell_type["#{y},#{x}"] == :string tr.each_element do |str| if str.name == 'p' @cell["#{y},#{x}"] = str.text end end elsif @cell_type["#{y},#{x}"] == :date @cell["#{y},#{x}"] = tr.attributes['date-value'] else @cell["#{y},#{x}"] = v end x += 1 end end y += 1 x = 1 end end end # sheet end end end end end end end @cells_read = true end def process_zipfile(zip, path='') #p path if zip.file.file? path if path == "content.xml" open(@tmpdir+'/'+@file_nr.to_s+'_roo_content.xml','w') {|f| f << zip.read(path) } end else unless path.empty? path += '/' end zip.dir.foreach(path) do |filename| process_zipfile(zip, path+filename) end end end def extract_content Zip::ZipFile.open(@filename) do |zip| process_zipfile(zip) end end # converts cell coordinate to numeric values of row,col def normalize(row,col) if row.class == String if col.class == Fixnum # ('A',1): # ('B', 5) -> (5, 2) row, col = col, row else raise ArgumentError end end if col.class == String col = Openoffice.letter_to_number(col) end return row,col end # convert a number to something like this: 'AB' def Openoffice.number_to_letter(n) letters="" while n > 0 num = n%26 letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[num-1,1] + letters n = n.div(26) end letters end # convert letters like 'AB' to a number def Openoffice.letter_to_number(letters) result = 0 while letters && letters.length > 0 character = letters[0,1].upcase num = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".index(character)+1 result = result * 26 + num letters = letters[1..-1] end result end def set_value(row,col,value) @cell["#{row},#{col}"] = value end def set_type(row,col,type) @cell_type["#{row},#{col}"] = type end A_ROO_TYPE = { "float" => :float, "string" => :string, "date" => :date, "percentage" => :percentage, } def Openoffice.oo_type_2_roo_type(ootype) return A_ROO_TYPE[ootype] end end # class