# This allows me to have named columns and optionally named rows in a
# data frame, to work calculations (usually on the columns), to
# transpose the matrix and store the transposed matrix until the object
# is tainted. 
class DataFrame
  
  class << self
    
    # This is the neatest part of this neat gem.
    # DataFrame.from_csv can be called in a lot of ways:
    # DataFrame.from_csv(csv_contents)
    # DataFrame.from_csv(filename)
    # DataFrame.from_csv(url)
    # If you need to define converters for FasterCSV, do it before calling
    # this method: 
    # FasterCSV::Converters[:special] = lambda{|f| f == 'foo' ? 'bar' : 'foo'}
    # DataFrame.from_csv('http://example.com/my_special_url.csv', :converters => :special)
    # This returns bar where 'foo' was found and 'foo' everywhere else.
    def from_csv(obj, opts={})
      labels, table = infer_csv_contents(obj, opts)
      name = infer_name_from_contents(obj, opts)
      return nil unless labels and table
      df = new(*labels)
      df.import(table)
      df.name = name
      df
    end
    
    protected
    
      # Only works for names sources, urls and files
      def infer_name_from_contents(obj, opts={})
        begin
          File.split(obj).last.split('.')[0..-2].join('.').titleize
        rescue
          nil
        end
      end
      
      def infer_csv_contents(obj, opts={})
        contents = File.read(obj) if File.exist?(obj)
        begin
          open(obj) {|f| contents = f.read} unless contents
        rescue
          nil
        end
        contents ||= obj if obj.is_a?(String)
        return nil unless contents
        table = FCSV.parse(contents, default_csv_opts.merge(opts))
        labels = table.shift
        while table.last.empty?
          table.pop
        end
        [labels, table]
      end
      
      def default_csv_opts; {:converters => :all}; end
  end
  
  # Include the methods from arff.rb
  include ARFF
  
  # Loads a batch of rows.  Expects an array of arrays, else you don't
  # know what you have. 
  def import(rows)
    rows.each do |row|
      self.add_item(row)
    end
  end
  
  def inspect
    "DataFrame rows: #{self.rows.size} labels: #{self.labels.inspect}"
  end
  
  # The labels of the data items
  attr_reader :labels
  alias :variables :labels
  
  # The items stored in the frame
  attr_reader :items
  
  # An optional name, useful for arff files
  attr_accessor :name
  
  def initialize(*labels)
    @labels = labels.map {|e| e.to_underscore_sym }
    @items = TransposableArray.new
  end
  
  def add_item(item)
    self.items << item
  end
  alias :add :add_item
  
  def row_labels
    @row_labels ||= []
  end
  
  def row_labels=(ary)
    raise ArgumentError, "Row labels must be an array" unless ary.is_a?(Array)
    @row_labels = ary
  end
  
  def render_column(sym)
    i = @labels.index(sym)
    return nil unless i
    @items.transpose[i]
  end
  
  # The rows as an array of arrays, an alias for items.
  alias :rows :items
  
  # The columns as a Dictionary or Hash
  # This is cached, call columns(true) to reset the cache.
  def columns(reset=false)
    @columns = nil if reset
    return @columns if @columns
    
    container = defined?(Dictionary) ? Dictionary.new : Hash.new
    i = 0
    
    @columns = @items.transpose.inject(container) do |cont, col|
      cont[@labels[i]] = col
      i += 1
      cont
    end
  end
  alias :to_hash :columns
  alias :to_dictionary :columns
  
  def render_row(sym)
    i = self.row_labels.index(sym)
    return nil unless i
    @items[i]
  end
  
  def method_missing(sym, *args, &block)
    if self.labels.include?(sym)
      render_column(sym)
    elsif self.row_labels.include?(sym)
      render_row(sym)
    elsif @items.respond_to?(sym)
      @items.send(sym, *args, &block)
    else
      super
    end
  end
  
  def drop!(*labels)
    labels.each do |label|
      drop_one!(label)
    end
    self
  end
  
  def drop_one!(label)
    i = self.labels.index(label)
    return nil unless i
    self.items.each do |item|
      item.delete_at(i)
    end
    self.labels.delete_at(i)
    self
  end
  protected :drop_one!
  
  def replace!(column, values=nil, &block)
    column = validate_column(column)
    if not values
      values = self.send(column)
      values.map! {|e| block.call(e)}
    end
    replace_column(column, values)
    self
  end
  
  def replace_column(column, values)
    column = validate_column(column)
    index = self.labels.index(column)
    list = []
    self.items.each_with_index do |item, i|
      consolidated = item
      consolidated[index] = values[i]
      list << consolidated
    end
    @items = list.dup
  end
  protected :replace_column
  
  def validate_column(column)
    column = column.to_sym
    raise ArgumentError, "Must provide the name of an existing column.  Provided #{column.inspect}, needed to provide one of #{self.labels.inspect}" unless self.labels.include?(column)
    column
  end
  protected :validate_column
  
  # Takes a block to evaluate on each row.  The row can be converted into
  # an OpenStruct or a Hash for easier filter methods. Note, don't try this
  # with a hash or open struct unless you have facets available.
  def filter!(as=Array, &block)
    as = infer_class(as)
    items = []
    self.items.each do |row|
      value = block.call(cast_row(row, as))
      items << row if value
    end
    @items = items.dup
    self
  end
  
  def filter(as=Array, &block)
    new_data_frame = self.clone
    new_data_frame.filter!(as, &block)
  end
  
  def infer_class(obj)
    obj = obj.to_s.classify.constantize if obj.is_a?(Symbol)
    obj = obj.classify.constantize if obj.is_a?(String)
    obj
  end
  protected :infer_class
  
  def cast_row(row, as)
    if as == Hash
      obj = {}
      self.labels.each_with_index do |label, i|
        obj[label] = row[i]
      end
      obj
    elsif as == OpenStruct
      obj = OpenStruct.new
      self.labels.each_with_index do |label, i|
        obj.table[label] = row[i]
      end
      obj
    elsif as == Array
      row
    else
      as.new(*row)
    end
  end
  protected :cast_row
  
  # Creates a new data frame, only with the specified columns.
  def subset_from_columns(*cols)
    new_labels = self.labels.inject([]) do |list, label|
      list << label if cols.include?(label)
      list
    end
    new_data_frame = DataFrame.new(*self.labels)
    new_data_frame.import(self.items)
    self.labels.each do |label|
      new_data_frame.drop!(label) unless new_labels.include?(label)
    end
    new_data_frame
  end
  
  # A weird name.  This creates a column for every category in a column
  # and marks each row by its value 
  def j_binary_ize!(*columns)
    # Allows to mix a hash with the columns.
    options = columns.find_all {|e| e.is_a?(Hash)}.inject({}) {|h, e| h.merge!(e)}
    columns.delete_if {|e| e.is_a?(Hash)}
    
    # Generates new columns
    columns.each do |col|
      values = render_column(col.to_underscore_sym)
      values.categories.each do |category|
        full_name = (col.to_s + "_" + category.to_s).to_sym
        if options[:allow_overlap]
          category_map = values.inject([]) do |list, e|
            list << values.all_categories(e)
          end
          self.append!(full_name, category_map.map{|e| e.include?(category)})
        else
          self.append!(full_name, values.category_map.map{|e| e == category})
        end
      end
    end
  end
  
  # Adds a unique column to the table
  def append!(column_name, value=nil)
    raise ArgumentError, "Can't have duplicate column names" if self.labels.include?(column_name)
    self.labels << column_name.to_underscore_sym
    if value.is_a?(Array)
      self.items.each_with_index do |item, i|
        item << value[i]
      end
    else
      self.items.each do |item|
        item << value
      end
    end
    # Because we are tainting the sub arrays, the TaintableArray doesn't know it's been changed.
    self.items.taint
  end
  
  def filter_by_category(hash)
    new_data_frame = self.dup
    hash.each do |key, value|
      key = key.to_underscore_sym
      next unless self.labels.include?(key)
      value = [value] unless value.is_a?(Array) or value.is_a?(Range)
      new_data_frame.filter!(:hash) {|row| value.include?(row[key])}
    end
    new_data_frame
  end

  def filter_by_category!(hash)
    hash.each do |key, value|
      key = key.to_underscore_sym
      next unless self.labels.include?(key)
      value = [value] unless value.is_a?(Array) or value.is_a?(Range)
      self.filter!(:hash) {|row| value.include?(row[key])}
    end
  end
    
end