openoffice.rb in roo-0.1.0

- old
+ new

@@ -2,10 +2,11 @@
 require 'rubygems'
 require 'rexml/document'
 # require 'matrix'
 require 'fileutils'
 require 'zip/zipfilesystem'
+require 'date'
 
 class Openoffice
 
   def initialize(filename)
     @cells_read = false
@@ -17,11 +18,13 @@
     extract_content
     file = File.new(@tmpdir+"/"+"roo_content.xml") # TODO:
     @doc = REXML::Document.new file
     @cell = Hash.new
     @cell_type = Hash.new
-    FileUtils::rm_r(@tmpdir)
+    if DateTime.now < Date.new(2007,6,1)
+      FileUtils::rm_r(@tmpdir)
+    end
     @default_sheet = nil
   end
 
   # return the content of a spreadsheet-cell
   # (1,1) is the upper left corner
@@ -52,54 +55,45 @@
 
 
   # returns an array of sheets in the spreadsheet
   def sheets
     return_sheets = []
-    # p valid_xml?(doc)
     oo_document_count = 0
     @doc.each_element do |oo_document|
       oo_document_count += 1
-      #p oo_document
       oo_element_count = 0
       oo_document.each_element do |oo_element|
         oo_element_count += 1
-        # p oo_element.name
         if oo_element.name == "body"
-          # puts "Body gefunden "
           oo_element.each_element do |be|
-            # p be.name
             if be.name == "spreadsheet"
               be.each_element do |se|
-                # p se
                 if se.name == "table"
-                  # puts "table gefunden"
-                  #se.each_element
-                  # p se.attributes['name']
                   return_sheets << se.attributes['name']
                 end
               end
             end
           end
         end
       end
-      # puts oo_element_count.to_s+" oo_element_count "
     end
-    # puts oo_document_count.to_s+" oo_document_count "
     return_sheets
   end
 
   # set the working sheet in the document
   def default_sheet=(s)
     @default_sheet = s
   end
 
+  # version of the openoffice document 
   def officeversion
     read_cells unless @cells_read
     @officeversion
   end
 
   def to_s
+    read_cells unless @cells_read
     @cell.inspect
   end
 
   # returns all values in this row as an array
   # row numbers are 1,2,3,... like in the spreadsheet
@@ -114,13 +108,73 @@
         result[x-1,rownumber] = value
       end
     }
     result
   end
+  
+  # returns the number of the last non-empty row
+  def last_row
+    read_cells unless @cells_read
+    result = 0
+    @cell.each_pair {|key,value|
+      y,x = key.split(',')
+      y = y.to_i
+      result = [result, y].max
+    }
+    result
+  end
 
+  # returns the number of the last non-empty column
+  def last_column
+    read_cells unless @cells_read
+    result = 0
+    @cell.each_pair {|key,value|
+      y,x = key.split(',')
+      x = x.to_i
+      result = [result, x].max
+    }
+    result
+  end
+
+  # returns the number of the first non-empty row
+  def first_row
+    read_cells unless @cells_read
+    result = 999_999 # more than a spreadsheet can hold
+    @cell.each_pair {|key,value|
+      y,x = key.split(',')
+      y = y.to_i
+      result = [result, y].min
+    }
+    result
+  end
+
+  # returns the number of the first non-empty column
+  def first_column
+    read_cells unless @cells_read
+    result = 999_999 # more than a spreadsheet can hold
+    @cell.each_pair {|key,value|
+      y,x = key.split(',')
+      x = x.to_i
+      result = [result, x].min
+    }
+    result
+  end
+
+  def first_column_as_letter
+    number_to_letter(first_column)
+  end
+
+  def last_column_as_letter
+    number_to_letter(last_column)
+  end
+
 private
 
+  def number_to_letter(n)
+    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[n-1,1]
+  end
+
   # read all cells in the selected sheet
   def read_cells
     oo_document_count = 0
     @doc.each_element do |oo_document|
       @officeversion = oo_document.attributes['version']
@@ -148,9 +202,13 @@
                     if te.name == "table-column"
                       # p te.attributes
                       rep = te.attributes["number-columns-repeated"]
                       # p "rep = "+rep.to_s
                     elsif te.name == "table-row"
+                      if te.attributes['number-rows-repeated']
+                        skip_y = te.attributes['number-rows-repeated'].to_i 
+                        y = y + skip_y - 1 # minus 1 because this line will be counted as a line element
+                      end
                       # p te
                       te.each_element do |tr|
                         # p tr
                         if tr.name == 'table-cell'
                            skip = tr.attributes['number-columns-repeated']