require 'rubygems'
require 'rexml/document'
# require 'matrix'
require 'fileutils'
require 'zip/zipfilesystem'
require 'date'

class Openoffice

  def initialize(filename)
    @cells_read = false
    @filename = filename
    @tmpdir = "oo_"+$$.to_s
    unless File.exists?(@tmpdir)
      FileUtils::mkdir(@tmpdir) #TODO:
    end
    extract_content
    file = File.new(@tmpdir+"/"+"roo_content.xml") # TODO:
    @doc = REXML::Document.new file
    @cell = Hash.new
    @cell_type = Hash.new
    if DateTime.now < Date.new(2007,6,1)
      FileUtils::rm_r(@tmpdir)
    end
    @default_sheet = nil
  end

  # return the content of a spreadsheet-cell
  # (1,1) is the upper left corner
  # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
  # cell at first line, first row
  def cell(row,col)
    if row.class == String
      if col.class == Fixnum
        # ('A',1):
        # ('B', 5) -> (5, 2)
        row, col = col, row
      else
        raise FormatError
      end
    end
    if col.class == String
      col = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".index(col.upcase)+1
    end
    read_cells unless @cells_read
    @cell["#{row},#{col}"]
  end

  # returns the open-office type of a cell
  def celltype(row,col)
    read_cells unless @cells_read
    @cell_type["#{row},#{col}"]
  end


  # returns an array of sheets in the spreadsheet
  def sheets
    return_sheets = []
    oo_document_count = 0
    @doc.each_element do |oo_document|
      oo_document_count += 1
      oo_element_count = 0
      oo_document.each_element do |oo_element|
        oo_element_count += 1
        if oo_element.name == "body"
          oo_element.each_element do |be|
            if be.name == "spreadsheet"
              be.each_element do |se|
                if se.name == "table"
                  return_sheets << se.attributes['name']
                end
              end
            end
          end
        end
      end
    end
    return_sheets
  end

  # set the working sheet in the document
  def default_sheet=(s)
    @default_sheet = s
  end

  # version of the openoffice document 
  def officeversion
    read_cells unless @cells_read
    @officeversion
  end

  def to_s
    read_cells unless @cells_read
    @cell.inspect
  end

  # returns all values in this row as an array
  # row numbers are 1,2,3,... like in the spreadsheet
  def row(rownumber)
    read_cells unless @cells_read
    result = []
    @cell.each_pair {|key,value|
      y,x = key.split(',')
      x = x.to_i
      y = y.to_i
      if y == rownumber
        result[x-1,rownumber] = value
      end
    }
    result
  end
  
  # returns the number of the last non-empty row
  def last_row
    read_cells unless @cells_read
    result = 0
    @cell.each_pair {|key,value|
      y,x = key.split(',')
      y = y.to_i
      result = [result, y].max
    }
    result
  end

  # returns the number of the last non-empty column
  def last_column
    read_cells unless @cells_read
    result = 0
    @cell.each_pair {|key,value|
      y,x = key.split(',')
      x = x.to_i
      result = [result, x].max
    }
    result
  end

  # returns the number of the first non-empty row
  def first_row
    read_cells unless @cells_read
    result = 999_999 # more than a spreadsheet can hold
    @cell.each_pair {|key,value|
      y,x = key.split(',')
      y = y.to_i
      result = [result, y].min
    }
    result
  end

  # returns the number of the first non-empty column
  def first_column
    read_cells unless @cells_read
    result = 999_999 # more than a spreadsheet can hold
    @cell.each_pair {|key,value|
      y,x = key.split(',')
      x = x.to_i
      result = [result, x].min
    }
    result
  end

  def first_column_as_letter
    number_to_letter(first_column)
  end

  def last_column_as_letter
    number_to_letter(last_column)
  end

private

  def number_to_letter(n)
    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[n-1,1]
  end

  # read all cells in the selected sheet
  def read_cells
    oo_document_count = 0
    @doc.each_element do |oo_document|
      @officeversion = oo_document.attributes['version']
      oo_document_count += 1
      oo_element_count = 0
      oo_document.each_element do |oo_element|
        oo_element_count += 1
        # p oo_element.name
        if oo_element.name == "body"
          # puts "Body gefunden "
          oo_element.each_element do |be|
            # p be.name
            if be.name == "spreadsheet"
              be.each_element do |se|
                # p se
                if se.name == "table"
                  if se.attributes['name']==@default_sheet

                  x=1
                  y=1
                  # puts "table gefunden"
                  #se.each_element
                  se.each_element do |te|
                    # p te.name
                    if te.name == "table-column"
                      # p te.attributes
                      rep = te.attributes["number-columns-repeated"]
                      # p "rep = "+rep.to_s
                    elsif te.name == "table-row"
                      if te.attributes['number-rows-repeated']
                        skip_y = te.attributes['number-rows-repeated'].to_i 
                        y = y + skip_y - 1 # minus 1 because this line will be counted as a line element
                      end
                      # p te
                      te.each_element do |tr|
                        # p tr
                        if tr.name == 'table-cell'
                           skip = tr.attributes['number-columns-repeated']
                           if skip
                             x += (skip.to_i - 1)
                           end
                           vt = tr.attributes['value-type']
                           v  = tr.attributes['value']
                           # puts "#{vt} #{v}"
                           @cell_type["#{y},#{x}"] = vt
                           if @cell_type["#{y},#{x}"] == 'float'
                             @cell["#{y},#{x}"] = v.to_f
                           elsif @cell_type["#{y},#{x}"] == 'string'
                             tr.each_element do |str|
                               if str.name == 'p'
                                 @cell["#{y},#{x}"] = str.text
                               end
                             end
                           elsif @cell_type["#{y},#{x}"] == 'date'
                             @cell["#{y},#{x}"] = tr.attributes['date-value']
                           else
                             @cell["#{y},#{x}"] = v
                           end
                           x += 1
                        end
                      end
                      y += 1
                      x = 1
                    end
                  end
                  # p se.attributes['name']
     #             return_sheets << se.attributes['name']
                end # richtiges sheet
                end
              end
            end
          end
        end
      end
      # puts oo_element_count.to_s+" oo_element_count "
    end
    # puts oo_document_count.to_s+" oo_document_count "
    # p @cell
    @cells_read = true
  end

  def process_zipfile(zip, path='')
    if zip.file.file? path
      # puts %{#{path}: "#{zip.read(path)}"}
      # puts %{#{path}:}
     
      if path == "content.xml"
        open(@tmpdir+'/'+'roo_content.xml','w') {|f|
          f << zip.read(path) 
        }
      end 
    else
      unless path.empty?
        path += '/' 
        # puts path
      end 
      zip.dir.foreach(path) do |filename|
        process_zipfile(zip, path+filename)
      end 
    end 
  end


  def extract_content
    Zip::ZipFile.open(@filename) do |zip|
      process_zipfile(zip)
    end
  end
    
end