lib/roo/openoffice.rb in roo-0.0.3 vs lib/roo/openoffice.rb in roo-0.1.0

- old
+ new

@@ -2,10 +2,11 @@ require 'rubygems' require 'rexml/document' # require 'matrix' require 'fileutils' require 'zip/zipfilesystem' +require 'date' class Openoffice def initialize(filename) @cells_read = false @@ -17,11 +18,13 @@ extract_content file = File.new(@tmpdir+"/"+"roo_content.xml") # TODO: @doc = REXML::Document.new file @cell = Hash.new @cell_type = Hash.new - FileUtils::rm_r(@tmpdir) + if DateTime.now < Date.new(2007,6,1) + FileUtils::rm_r(@tmpdir) + end @default_sheet = nil end # return the content of a spreadsheet-cell # (1,1) is the upper left corner @@ -52,54 +55,45 @@ # returns an array of sheets in the spreadsheet def sheets return_sheets = [] - # p valid_xml?(doc) oo_document_count = 0 @doc.each_element do |oo_document| oo_document_count += 1 - #p oo_document oo_element_count = 0 oo_document.each_element do |oo_element| oo_element_count += 1 - # p oo_element.name if oo_element.name == "body" - # puts "Body gefunden " oo_element.each_element do |be| - # p be.name if be.name == "spreadsheet" be.each_element do |se| - # p se if se.name == "table" - # puts "table gefunden" - #se.each_element - # p se.attributes['name'] return_sheets << se.attributes['name'] end end end end end end - # puts oo_element_count.to_s+" oo_element_count " end - # puts oo_document_count.to_s+" oo_document_count " return_sheets end # set the working sheet in the document def default_sheet=(s) @default_sheet = s end + # version of the openoffice document def officeversion read_cells unless @cells_read @officeversion end def to_s + read_cells unless @cells_read @cell.inspect end # returns all values in this row as an array # row numbers are 1,2,3,... like in the spreadsheet @@ -114,13 +108,73 @@ result[x-1,rownumber] = value end } result end + + # returns the number of the last non-empty row + def last_row + read_cells unless @cells_read + result = 0 + @cell.each_pair {|key,value| + y,x = key.split(',') + y = y.to_i + result = [result, y].max + } + result + end + # returns the number of the last non-empty column + def last_column + read_cells unless @cells_read + result = 0 + @cell.each_pair {|key,value| + y,x = key.split(',') + x = x.to_i + result = [result, x].max + } + result + end + + # returns the number of the first non-empty row + def first_row + read_cells unless @cells_read + result = 999_999 # more than a spreadsheet can hold + @cell.each_pair {|key,value| + y,x = key.split(',') + y = y.to_i + result = [result, y].min + } + result + end + + # returns the number of the first non-empty column + def first_column + read_cells unless @cells_read + result = 999_999 # more than a spreadsheet can hold + @cell.each_pair {|key,value| + y,x = key.split(',') + x = x.to_i + result = [result, x].min + } + result + end + + def first_column_as_letter + number_to_letter(first_column) + end + + def last_column_as_letter + number_to_letter(last_column) + end + private + def number_to_letter(n) + "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[n-1,1] + end + # read all cells in the selected sheet def read_cells oo_document_count = 0 @doc.each_element do |oo_document| @officeversion = oo_document.attributes['version'] @@ -148,9 +202,13 @@ if te.name == "table-column" # p te.attributes rep = te.attributes["number-columns-repeated"] # p "rep = "+rep.to_s elsif te.name == "table-row" + if te.attributes['number-rows-repeated'] + skip_y = te.attributes['number-rows-repeated'].to_i + y = y + skip_y - 1 # minus 1 because this line will be counted as a line element + end # p te te.each_element do |tr| # p tr if tr.name == 'table-cell' skip = tr.attributes['number-columns-repeated']