require 'rubygems'
gem 'libxml-ruby', '>= 0.8.3'
require 'xml'
require 'fileutils'
require 'zip/zipfilesystem'
require 'date'

class  String
  def end_with?(str)
    self[-str.length,str.length] == str
  end
end

class Excelx < GenericSpreadsheet
  FORMATS = {
    'General' => :float,
    '0' => :float,
    '0.00' => :float,
    '#,##0' => :float,
    '#,##0.00' => :float,
    '0%' => :percentage,
    '0.00%' => :percentage,
    '0.00E+00' => :float,
    '# ?/?' => :float, #??? TODO:
    '# ??/??' => :float, #??? TODO:
    'mm-dd-yy' => :date,
    'd-mmm-yy' => :date,
    'd-mmm' => :date,
    'mmm-yy' => :date,
    'h:mm AM/PM' => :date,
    'h:mm:ss AM/PM' => :date,
    'h:mm' => :time,
    'h:mm:ss' => :time,
    'm/d/yy h:mm' => :date,
    '#,##0 ;(#,##0)' => :float,
    '#,##0 ;[Red](#,##0)' => :float,
    '#,##0.00;(#,##0.00)' => :float,
    '#,##0.00;[Red](#,##0.00)' => :float,
    'mm:ss' => :time,
    '[h]:mm:ss' => :time,
    'mmss.0' => :time,
    '##0.0E+0' => :float,
    '@' => :float,
    #-- zusaetzliche Formate, die nicht standardmaessig definiert sind:
    "yyyy\\-mm\\-dd" => :date,
    'dd/mm/yy' => :date,
    'hh:mm:ss' => :time,
    "dd/mm/yy\\ hh:mm" => :datetime,
  }
  STANDARD_FORMATS = { 
    0 => 'General',
    1 => '0',
    2 => '0.00',
    3 => '#,##0',
    4 => '#,##0.00',
    9 => '0%',
    10 => '0.00%',
    11 => '0.00E+00',
    12 => '# ?/?',
    13 => '# ??/??',
    14 => 'mm-dd-yy',
    15 => 'd-mmm-yy',
    16 => 'd-mmm',
    17 => 'mmm-yy',
    18 => 'h:mm AM/PM',
    19 => 'h:mm:ss AM/PM',
    20 => 'h:mm',
    21 => 'h:mm:ss',
    22 => 'm/d/yy h:mm',
    37 => '#,##0 ;(#,##0)',
    38 => '#,##0 ;[Red](#,##0)',
    39 => '#,##0.00;(#,##0.00)',
    40 => '#,##0.00;[Red](#,##0.00)',
    45 => 'mm:ss',
    46 => '[h]:mm:ss',
    47 => 'mmss.0',
    48 => '##0.0E+0',
    49 => '@',
  }
  @@nr = 0

  # initialization and opening of a spreadsheet file
  # values for packed: :zip
  def initialize(filename, packed=nil, file_warning = :error) #, create = false)
    super()
    @file_warning = file_warning
    @tmpdir = "oo_"+$$.to_s
    @tmpdir = File.join(ENV['ROO_TMP'], @tmpdir) if ENV['ROO_TMP'] 
    unless File.exists?(@tmpdir)
      FileUtils::mkdir(@tmpdir)
    end
    filename = open_from_uri(filename) if filename[0,7] == "http://"
    filename = unzip(filename) if packed and packed == :zip
    begin
      file_type_check(filename,'.xlsx','an Excel-xlsx')
      @cells_read = Hash.new
      @filename = filename
      unless File.file?(@filename)
        raise IOError, "file #{@filename} does not exist"
      end
      @@nr += 1
      @file_nr = @@nr
      extract_content(@filename)
      file = File.new(File.join(@tmpdir, @file_nr.to_s+"_roo_workbook.xml"))
      @workbook_doc = XML::Parser.io(file).parse
      file.close
      @shared_table = []
      if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
        file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_sharedStrings.xml'))
        @sharedstring_doc = XML::Parser.io(file).parse
        file.close
        read_shared_strings(@sharedstring_doc)
      end
      @styles_table = []
      @style_definitions = Array.new { |h,k| h[k] = {} }
      if File.exist?(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
        file = File.new(File.join(@tmpdir, @file_nr.to_s+'_roo_styles.xml'))
        @styles_doc = XML::Parser.io(file).parse
        file.close
        read_styles(@styles_doc)
      end
      @sheet_doc = []
      @sheet_files.each_with_index do |item, i|
        file = File.new(item)
        @sheet_doc[i] = XML::Parser.io(file).parse
        file.close
      end
    ensure
      #if ENV["roo_local"] != "thomas-p"
      FileUtils::rm_r(@tmpdir)
      #end
    end
    @default_sheet = nil
    # no need to set default_sheet if there is only one sheet in the document
    if self.sheets.size == 1
      @default_sheet = self.sheets.first
    end
    @cell = Hash.new
    @cell_type = Hash.new
    @formula = Hash.new
    @first_row = Hash.new
    @last_row = Hash.new
    @first_column = Hash.new
    @last_column = Hash.new
    @header_line = 1
    @excelx_type = Hash.new
    @excelx_value = Hash.new
    @s_attribute = Hash.new # TODO: ggf. wieder entfernen nur lokal benoetigt
  end

  # Returns the content of a spreadsheet-cell.
  # (1,1) is the upper left corner.
  # (1,1), (1,'A'), ('A',1), ('a',1) all refers to the
  # cell at the first line and first row.
  def cell(row, col, sheet=nil)
    sheet = @default_sheet unless sheet
    read_cells(sheet) unless @cells_read[sheet]
    row,col = normalize(row,col)
    if celltype(row,col,sheet) == :date
      yyyy,mm,dd = @cell[sheet][[row,col]].split('-')
      return Date.new(yyyy.to_i,mm.to_i,dd.to_i)
    elsif celltype(row,col,sheet) == :datetime
      date_part,time_part = @cell[sheet][[row,col]].split(' ')
      yyyy,mm,dd = date_part.split('-')
      hh,mi,ss = time_part.split(':')
      return DateTime.civil(yyyy.to_i,mm.to_i,dd.to_i,hh.to_i,mi.to_i,ss.to_i)
      
    end
    @cell[sheet][[row,col]]
  end

  # Returns the formula at (row,col).
  # Returns nil if there is no formula.
  # The method #formula? checks if there is a formula.
  def formula(row,col,sheet=nil)
    sheet = @default_sheet unless sheet
    read_cells(sheet) unless @cells_read[sheet]
    row,col = normalize(row,col)
    if @formula[sheet][[row,col]] == nil
      return nil
    else
      return @formula[sheet][[row,col]]
    end
  end

  # true, if there is a formula
  def formula?(row,col,sheet=nil)
    sheet = @default_sheet unless sheet
    read_cells(sheet) unless @cells_read[sheet]
    row,col = normalize(row,col)
    formula(row,col) != nil
  end
  
  class Font
    attr_accessor :bold, :italic, :underline
    
    def bold?
      @bold == true
    end
        
    def italic? 
     @italic == true
    end
    
    def underline?
      @underline == true
    end    
  end
  
  # Given a cell, return the cell's style
  def font(row, col, sheet=nil)
   sheet = @default_sheet unless sheet
   read_cells(sheet) unless @cells_read[sheet]
   row,col = normalize(row,col)
   s_attribute = @s_attribute[sheet][[row,col]]
   s_attribute ||= 0
   s_attribute = s_attribute.to_i
   @style_definitions[s_attribute]
  end 

  # set a cell to a certain value
  # (this will not be saved back to the spreadsheet file!)
  def set(row,col,value,sheet=nil) #:nodoc:
    sheet = @default_sheet unless sheet
    read_cells(sheet) unless @cells_read[sheet]
    row,col = normalize(row,col)
    set_value(row,col,value,sheet)
    if value.class == Fixnum
      set_type(row,col,:float,sheet)
    elsif value.class == String
      set_type(row,col,:string,sheet)
    elsif value.class == Float
      set_type(row,col,:string,sheet)
    else
      raise ArgumentError, "Type for "+value.to_s+" not set"
    end
  end

  # returns the type of a cell:
  # * :float
  # * :string,
  # * :date
  # * :percentage
  # * :formula
  # * :time
  # * :datetime
  def celltype(row,col,sheet=nil)
    sheet = @default_sheet unless sheet
    read_cells(sheet) unless @cells_read[sheet]
    row,col = normalize(row,col)
    if @formula[sheet][[row,col]]
      return :formula
    else
      @cell_type[sheet][[row,col]]
    end
  end

  # returns the internal type of an excel cell
  # * :numeric_or_formula
  # * :string  
  # Note: this is only available within the Excelx class 
  def excelx_type(row,col,sheet=nil)
    sheet = @default_sheet unless sheet
    read_cells(sheet) unless @cells_read[sheet]
    row,col = normalize(row,col)
    return @excelx_type[sheet][[row,col]]
  end
  
  # returns the internal value of an excelx cell
  # Note: this is only available within the Excelx class 
  def excelx_value(row,col,sheet=nil)
    sheet = @default_sheet unless sheet
    read_cells(sheet) unless @cells_read[sheet]
    row,col = normalize(row,col)
    return @excelx_value[sheet][[row,col]]
  end
  
  # returns the internal format of an excel cell
  def excelx_format(row,col,sheet=nil)
    sheet = @default_sheet unless sheet
    read_cells(sheet) unless @cells_read[sheet]
    row,col = normalize(row,col)
    s = @s_attribute[sheet][[row,col]]
    result = attribute2format(s)
    result
  end
  
  # returns an array of sheet names in the spreadsheet
  def sheets
    return_sheets = []
    @workbook_doc.find("//*[local-name()='sheet']").each do |sheet|
      return_sheets << sheet.attributes.to_h['name']
    end
    return_sheets
  end
  # shows the internal representation of all cells
  # for debugging purposes
  def to_s(sheet=nil)
    sheet = @default_sheet unless sheet
    read_cells(sheet) unless @cells_read[sheet]
    @cell[sheet].inspect
  end

  # returns each formula in the selected sheet as an array of elements
  # [row, col, formula]
  def formulas(sheet=nil)
    theformulas = Array.new
    sheet = @default_sheet unless sheet
    read_cells(sheet) unless @cells_read[sheet]
    first_row(sheet).upto(last_row(sheet)) {|row|
      first_column(sheet).upto(last_column(sheet)) {|col|
        if formula?(row,col,sheet)
          f = [row, col, formula(row,col,sheet)]
          theformulas << f
        end
      }
    }
    theformulas
  end

  private

  # helper function to set the internal representation of cells
  def set_cell_values(sheet,x,y,i,v,vt,formula,tr,str_v,
      excelx_type=nil,
      excelx_value=nil,
      s_attribute=nil)
    key = [y,x+i]
    @cell_type[sheet] = {} unless @cell_type[sheet]
    @cell_type[sheet][key] = vt
    @formula[sheet] = {} unless @formula[sheet]
    @formula[sheet][key] = formula  if formula
    @cell[sheet]    = {} unless @cell[sheet]
    case @cell_type[sheet][key]
    when :float
      @cell[sheet][key] = v.to_f
    when :string
      @cell[sheet][key] = str_v
    when :date
      @cell[sheet][key] = (Date.new(1899,12,30)+v.to_i).strftime("%Y-%m-%d") 
    when :datetime
      @cell[sheet][key] = (DateTime.new(1899,12,30)+v.to_f).strftime("%Y-%m-%d %H:%M:%S")
    when :percentage
      @cell[sheet][key] = v.to_f
    when :time
      @cell[sheet][key] = v.to_f*(24*60*60)
    else
      @cell[sheet][key] = v
    end
    @excelx_type[sheet] = {} unless @excelx_type[sheet]
    @excelx_type[sheet][key] = excelx_type
    @excelx_value[sheet] = {} unless @excelx_value[sheet]
    @excelx_value[sheet][key] = excelx_value
    @s_attribute[sheet] = {} unless @s_attribute[sheet]
    @s_attribute[sheet][key] = s_attribute
  end

  # splits a coordinate like "AA12" into the parts "AA" (String) and 12 (Fixnum)
  def split_coord(s)
    letter = ""
    number = 0
    i = 0
    while i<s.length and "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz".include?(s[i,1])
      letter += s[i,1]
      i+=1
    end
    while i<s.length and "0123456789".include?(s[i,1])
      number = number*10 + s[i,1].to_i
      i+=1
    end
    if letter=="" or number==0
      raise ArgumentError
    end
    return letter,number
  end

  def split_coordinate(str)
    letter,number = split_coord(str)
    x = GenericSpreadsheet.letter_to_number(letter)
    y = number
    return x,y
  end

  # read all cells in the selected sheet
  def format2type(format)
    if FORMATS.has_key? format
      FORMATS[format]
    else
      :float
    end
  end

  # read all cells in the selected sheet
  def read_cells(sheet=nil)
    sheet = @default_sheet unless sheet
    sheet_found = false
    raise ArgumentError, "Error: sheet '#{sheet||'nil'}' not valid" if @default_sheet == nil and sheet==nil
    raise RangeError unless self.sheets.include? sheet
    n = self.sheets.index(sheet)
    @sheet_doc[n].find("//*[local-name()='c']").each do |c|
       s_attribute = c.attributes.to_h['s'].to_i   # should be here
       if (c.attributes.to_h['t'] == 's')
         tmp_type = :shared
       elsif (c.attributes.to_h['t'] == 'b')
         tmp_type = :boolean
       else
       #  s_attribute = c.attributes.to_h['s'].to_i     # was here
         format = attribute2format(s_attribute)
         tmp_type = format2type(format)
       end
      formula = nil
      c.each_element do |cell|
        if cell.name == 'f'
          formula = cell.content
        end
        if cell.name == 'v'
          if tmp_type == :time or tmp_type == :datetime
            if cell.content.to_f >= 1.0 
              if (cell.content.to_f - cell.content.to_f.floor).abs > 0.000001 
                tmp_type = :datetime 
              else
                tmp_type = :date
              end
            else
            end 
          end
          excelx_type = [:numeric_or_formula,format]
          excelx_value = cell.content
          if tmp_type == :shared
            vt = :string
            str_v = @shared_table[cell.content.to_i]
            excelx_type = :string
          elsif tmp_type == :boolean
            vt = :boolean
            cell.content.to_i == 1 ? v = 'TRUE' : v = 'FALSE'
          elsif tmp_type == :date
            vt = :date
            v = cell.content
          elsif tmp_type == :time
            vt = :time
            v = cell.content
          elsif tmp_type == :datetime
            vt = :datetime
            v = cell.content
          elsif tmp_type == :formula
            vt = :formula
            v = cell.content.to_f #TODO: !!!!
          else
            vt = :float
            v = cell.content
          end
          #puts "vt: #{vt}" if cell.text.include? "22606.5120"
          x,y = split_coordinate(c.attributes.to_h['r'])
          tr=nil #TODO: ???s
          set_cell_values(sheet,x,y,0,v,vt,formula,tr,str_v,excelx_type,excelx_value,s_attribute)
        end
      end
    end
    sheet_found = true #TODO:
    if !sheet_found
      raise RangeError
    end
    @cells_read[sheet] = true
  end
  
  # Checks if the default_sheet exists. If not an RangeError exception is
  # raised
  def check_default_sheet
    sheet_found = false
    raise ArgumentError, "Error: default_sheet not set" if @default_sheet == nil
    
    sheet_found = true if sheets.include?(@default_sheet)
    
    if ! sheet_found
      raise RangeError, "sheet '#{@default_sheet}' not found"
    end
  end

  # extracts all needed files from the zip file
  def process_zipfile(zipfilename, zip, path='')
    @sheet_files = []
    Zip::ZipFile.open(zipfilename) {|zf|
      zf.entries.each {|entry|
        #entry.extract
        if entry.to_s.end_with?('workbook.xml')
          open(@tmpdir+'/'+@file_nr.to_s+'_roo_workbook.xml','wb') {|f|
            f << zip.read(entry)
          }
        end
        if entry.to_s.end_with?('sharedStrings.xml')
          open(@tmpdir+'/'+@file_nr.to_s+'_roo_sharedStrings.xml','wb') {|f|
            f << zip.read(entry)
          }
        end
        if entry.to_s.end_with?('styles.xml')
          open(@tmpdir+'/'+@file_nr.to_s+'_roo_styles.xml','wb') {|f|
            f << zip.read(entry)
          }
        end
        if entry.to_s =~ /sheet([0-9]+).xml$/
          nr = $1
          open(@tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}",'wb') {|f|
            f << zip.read(entry)
          }
          @sheet_files[nr.to_i-1] = @tmpdir+'/'+@file_nr.to_s+"_roo_sheet#{nr}"
        end
      }
    }
    return
  end

  # extract files from the zip file
  def extract_content(zipfilename)
    Zip::ZipFile.open(@filename) do |zip|
      process_zipfile(zipfilename,zip)
    end
  end

  # sets the value of a cell
  def set_value(row,col,value,sheet=nil)
    sheet = @default_value unless sheet
    @cell[sheet][[row,col]] = value
  end

  # sets the type of a cell
  def set_type(row,col,type,sheet=nil)
    sheet = @default_value unless sheet
    @cell_type[sheet][[row,col]] = type
  end

  # read the shared strings xml document
  def read_shared_strings(doc)
    doc.find("//*[local-name()='si']").each do |si|
      shared_table_entry = ''
      si.each_element do |elem|
        if (elem.name == 'r')
          elem.each_element do |r_elem|
            if (r_elem.name == 't')
              shared_table_entry << r_elem.content
            end
          end
        end
        if (elem.name == 't')
          shared_table_entry = elem.content
        end
      end
      @shared_table << shared_table_entry
    end
  end

  # read the styles elements of an excelx document
  def read_styles(doc)
    @numFmts = []
    @cellXfs = []
    fonts = []
    
    doc.find("//*[local-name()='numFmt']").each do |numFmt|
      numFmtId = numFmt.attributes.to_h['numFmtId']
      formatCode = numFmt.attributes.to_h['formatCode']
      @numFmts << [numFmtId, formatCode]
    end
    doc.find("//*[local-name()='fonts']").each do |fonts_el|
      fonts_el.each_element do |font_el|
        if font_el.name == 'font'
          font = Excelx::Font.new
          font_el.each_element do |font_sub_el|
            case font_sub_el.name
              when 'b'
                font.bold = true
              when 'i'
                font.italic = true
              when 'u'
                font.underline = true
              end
          end
          fonts << font
        end
      end
    end
    
    doc.find("//*[local-name()='cellXfs']").each do |xfs|
        xfs.each do |xf|
          numFmtId = xf.attributes.to_h['numFmtId']
          @cellXfs << [numFmtId]
          fontId = xf.attributes.to_h['fontId'].to_i
          @style_definitions << fonts[fontId]
        end
    end
  end

  # convert internal excelx attribute to a format
  def attribute2format(s)
    result = nil
    @numFmts.each {|nf|
      if nf.first == @cellXfs[s.to_i].first
        result = nf[1]
        break
      end
    }
    unless result
      id = @cellXfs[s.to_i].first.to_i
      if STANDARD_FORMATS.has_key? id
        result = STANDARD_FORMATS[id]
      end
    end
    result
  end

end # class