require 'rubygems'
require 'rexml/document'
require 'fileutils'
require 'zip/zipfilesystem'
require 'date'

class Fixnum
  def as_letter
    "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[self-1,1]
  end
end

class Openoffice

  def initialize(filename)
    @cells_read = false
    @filename = filename
    @tmpdir = "oo_"+$$.to_s
    unless File.exists?(@tmpdir)
      FileUtils::mkdir(@tmpdir) 
    end
    extract_content
    file = File.new(File.join(@tmpdir, "roo_content.xml")) 
    @doc = REXML::Document.new file
    file.close
    @cell = Hash.new
    @cell_type = Hash.new
    if DateTime.now > Date.new(2007,5,31)
      FileUtils::rm_r(@tmpdir)
    end
    @default_sheet = nil
  end

  # return the content of a spreadsheet-cell
  # (1,1) is the upper left corner
  # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
  # cell at first line, first row
  def cell(row,col)
    read_cells unless @cells_read
    row,col = normalize(row,col)
    @cell["#{row},#{col}"]
  end

  # returns the open-office type of a cell
  def celltype(row,col)
    read_cells unless @cells_read
    row,col = normalize(row,col)
    @cell_type["#{row},#{col}"]
  end


  # returns an array of sheets in the spreadsheet
  def sheets
    return_sheets = []
    oo_document_count = 0
    @doc.each_element do |oo_document|
      oo_document_count += 1
      oo_element_count = 0
      oo_document.each_element do |oo_element|
        oo_element_count += 1
        if oo_element.name == "body"
          oo_element.each_element do |be|
            if be.name == "spreadsheet"
              be.each_element do |se|
                if se.name == "table"
                  return_sheets << se.attributes['name']
                end
              end
            end
          end
        end
      end
    end
    return_sheets
  end

  # set the working sheet in the document
  def default_sheet=(s)
    @default_sheet = s
  end

  # version of the openoffice document 
  def officeversion
    read_cells unless @cells_read
    @officeversion
  end

  # shows the internal representation of all cells
  # mainly for debugging purposes
  def to_s
    read_cells unless @cells_read
    @cell.inspect
  end

  # returns all values in this row as an array
  # row numbers are 1,2,3,... like in the spreadsheet
  def row(rownumber)
    read_cells unless @cells_read
    result = []
    @cell.each_pair {|key,value|
      y,x = key.split(',')
      x = x.to_i
      y = y.to_i
      if y == rownumber
        result[x-1,rownumber] = value
      end
    }
    result
  end
  
  # returns the number of the last non-empty row
  def last_row
    read_cells unless @cells_read
    result = 0
    @cell.each_pair {|key,value|
      y,x = key.split(',')
      y = y.to_i
      result = [result, y].max if value
    }
    result
  end

  # returns the number of the last non-empty column
  def last_column
    read_cells unless @cells_read
    result = 0
    @cell.each_pair {|key,value|
      y,x = key.split(',')
      x = x.to_i
      result = [result, x].max if value
    }
    result
  end

  # returns the number of the first non-empty row
  def first_row
    read_cells unless @cells_read
    result = 999_999 # more than a spreadsheet can hold
    @cell.each_pair {|key,value|
      y,x = key.split(',')
      y = y.to_i
      result = [result, y].min if value
    }
    result
  end

  # returns the number of the first non-empty column
  def first_column
    read_cells unless @cells_read
    result = 999_999 # more than a spreadsheet can hold
    @cell.each_pair {|key,value|
      y,x = key.split(',')
      x = x.to_i
      result = [result, x].min if value
    }
    result
  end

  def first_column_as_letter
    number_to_letter(first_column)
  end

  def last_column_as_letter
    number_to_letter(last_column)
  end

  def first_row_as_letter
    number_to_letter(first_row)
  end

  def last_row_as_letter
    number_to_letter(last_row)
  end

  def as_letter(n)
    number_to_letter(last_row)
  end

  def number_to_letter(n)
    letters=""
    while n > 0
      num = n%26
      letters = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"[num-1,1] + letters
      n = n.div(26)
     end
     letters
  end

  def empty?(row, col)
    read_cells unless @cells_read
    return true unless cell(row, col)
    return true if celltype(row, col) == "string" && cell(row, col).empty?
    false
  end

private

  # read all cells in the selected sheet
  def read_cells
    oo_document_count = 0
    @doc.each_element do |oo_document|
      @officeversion = oo_document.attributes['version']
      oo_document_count += 1
      oo_element_count = 0
      oo_document.each_element do |oo_element|
        oo_element_count += 1
        # p oo_element.name
        if oo_element.name == "body"
          # puts "Body gefunden "
          oo_element.each_element do |be|
            # p be.name
            if be.name == "spreadsheet"
              be.each_element do |se|
                # p se
                if se.name == "table"
                  if se.attributes['name']==@default_sheet

                  x=1
                  y=1
                  # puts "table gefunden"
                  #se.each_element
                  se.each_element do |te|
                    # p te.name
                    if te.name == "table-column"
                      # p te.attributes
                      rep = te.attributes["number-columns-repeated"]
                      # p "rep = "+rep.to_s
                    elsif te.name == "table-row"
                      if te.attributes['number-rows-repeated']
                        skip_y = te.attributes['number-rows-repeated'].to_i 
                        y = y + skip_y - 1 # minus 1 because this line will be counted as a line element
                      end
                      # p te
                      te.each_element do |tr|
                        # p tr
                        if tr.name == 'table-cell'
                           skip = tr.attributes['number-columns-repeated']
                           if skip
                             x += (skip.to_i - 1)
                           end
                           vt = tr.attributes['value-type']
                           v  = tr.attributes['value']
                           # puts "#{vt} #{v}"
                           @cell_type["#{y},#{x}"] = vt
                           if @cell_type["#{y},#{x}"] == 'float'
                             @cell["#{y},#{x}"] = v.to_f
                           elsif @cell_type["#{y},#{x}"] == 'string'
                             tr.each_element do |str|
                               if str.name == 'p'
                                 @cell["#{y},#{x}"] = str.text
                               end
                             end
                           elsif @cell_type["#{y},#{x}"] == 'date'
                             @cell["#{y},#{x}"] = tr.attributes['date-value']
                           else
                             @cell["#{y},#{x}"] = v
                           end
                           x += 1
                        end
                      end
                      y += 1
                      x = 1
                    end
                  end
                end # sheet 
                end
              end
            end
          end
        end
      end
    end
    @cells_read = true
  end

  def process_zipfile(zip, path='')
    if zip.file.file? path
      if path == "content.xml"
        open(@tmpdir+'/'+'roo_content.xml','w') {|f|
          f << zip.read(path) 
        }
      end 
    else
      unless path.empty?
        path += '/' 
      end 
      zip.dir.foreach(path) do |filename|
        process_zipfile(zip, path+filename)
      end 
    end 
  end


  def extract_content
    Zip::ZipFile.open(@filename) do |zip|
      process_zipfile(zip)
    end
  end

  def Openoffice.letter_to_number(letters)
    result = 0
    while letters && letters.length > 0
      character = letters[0,1].upcase
      num = "ABCDEFGHIJKLMNOPQRSTUVWXYZ".index(character)+1
      result = result * 26 + num
      letters = letters[1..-1]
    end
    result
  end

  # converts cell coordinate to numeric values of row,col
  def normalize(row,col)
    if row.class == String
      if col.class == Fixnum
        # ('A',1):
        # ('B', 5) -> (5, 2)
        row, col = col, row
      else
        raise FormatError
      end
    end
    if col.class == String
      col = Openoffice.letter_to_number(col)
    end 
    return row,col
  end 

end