require File.join(File.dirname(__FILE__), "/spec_helper")

describe DataFrame do
  
  before do
    @labels = [:these, :are, :the, :labels]
    @df = DataFrame.new(*@labels)
  end
  
  it "should initialize with labels" do
    df = DataFrame.new(*@labels)
    df.labels.should eql(@labels)
  end
  
  it "should initialize with an empty items list" do
    @df.items.should be_is_a(TransposableArray)
    @df.items.should be_empty
  end
  
  it "should be able to add an item" do
    item = [1,2,3,4]
    @df.add_item(item)
    @df.items.should eql([item])
  end

  it "should use just_enumerable_stats" do
    [1,2,3].std.should eql(1)
    lambda{[1,2,3].cor([2,3,5])}.should_not raise_error
  end

  context "column and row operations" do
    before do
      @df.add_item([1,2,3,4])
      @df.add_item([5,6,7,8])
      @df.add_item([9,10,11,12])
    end
    
    it "should have a method for every label, the column in the data frame" do
      @df.these.should eql([1,5,9])
    end
    
    it "should make columns easily computable" do
      @df.these.std.should eql([1,5,9].std)
    end

    it "should defer unknown methods to the items in the data frame" do
      @df[0].should eql([1,2,3,4])
      @df << [13,14,15,16]
      @df.last.should eql([13,14,15,16])
      @df.map { |e| e.sum }.should eql([10,26,42,58])
    end
    
    it "should allow optional row labels" do
      @df.row_labels.should eql([])
    end
    
    it "should have a setter for row labels" do
      @df.row_labels = [:other, :things, :here]
      @df.row_labels.should eql([:other, :things, :here])
    end
    
    it "should be able to access rows by their labels" do
      @df.row_labels = [:other, :things, :here]
      @df.here.should eql([9,10,11,12])
    end
    
    it "should make rows easily computable" do
      @df.row_labels = [:other, :things, :here]
      @df.here.sum.should eql(42)
    end
  end
  
  it "should be able to import more than one row at a time" do
    @df.import([[2,2,2,2],[3,3,3,3],[4,4,4,4]])
    @df.row_labels = [:twos, :threes, :fours]
    @df.twos.should eql([2,2,2,2])
    @df.threes.should eql([3,3,3,3])
    @df.fours.should eql([4,4,4,4])
  end
  
  context "csv" do
    it "should compute easily from csv" do
      contents = %{X,Y,month,day,FFMC,DMC,DC,ISI,temp,RH,wind,rain,area
7,5,mar,fri,86.2,26.2,94.3,5.1,8.2,51,6.7,0,0
7,4,oct,tue,90.6,35.4,669.1,6.7,18,33,0.9,0,0
}
      labels = [:x, :y, :month, :day, :ffmc, :dmc, :dc, :isi, :temp, :rh, :wind, :rain, :area]
      
      @df = DataFrame.from_csv(contents)
      @df.labels.should eql(labels)
      @df.x.should eql([7,7])
      @df.area.should eql([0,0])
    end
  end
  
  it "should be able to remove a column" do
    @df = DataFrame.new :twos, :threes, :fours
    @df.import([[2,3,4], [2,3,4], [2,3,4], [2,3,4]])
    @df.drop!(:twos)
    @df.items.all? {|i| i.should eql([3,4])}
    @df.labels.should eql([:threes, :fours])
  end
  
  it "should be able to remove more than one column at a time" do
    @df = DataFrame.new :twos, :threes, :fours
    @df.import([[2,3,4], [2,3,4], [2,3,4], [2,3,4]])
    @df.drop!(:twos, :fours)
    @df.items.all? {|i| i.should eql([3])}
    @df.labels.should eql([:threes])
  end
  
  it "should offer a hash-like structure of columns" do
    @df.add [1,2,3,4]
    @df.add [5, 6, 7, 8]
    @df.columns[:these].should eql([1, 5])
    @df.columns[:are].should eql([2, 6])
    @df.columns[:the].should eql([3, 7])
    @df.columns[:labels].should eql([4, 8])
  end
  
  it "should alias items with rows" do
    @df.add [1,2,3,4]
    @df.add [5, 6, 7, 8]
    @df.rows.should eql(@df.items)
  end
  
  it "should be able to export a hash" do
    @df.add [1,2,3,4]
    @df.add [5, 6, 7, 8]
    hash = @df.to_hash
    values = [[1,5],[2,6],[3,7],[4,8]]
    hash.keys.size.should eql(@labels.size)
    hash.keys.all? {|e| @labels.should be_include(e)}
    hash.values.size.should eql(@labels.size)
    hash.values.all? {|e| values.should be_include(e)}
  end
  
  it "should use variables like labels" do
    @df.labels.should eql(@labels)
    @df.variables.should eql(@labels)
  end
  
  context "replace!" do
    before do
      @df.add [1,2,3,4]
      @df.add [5, 6, 7, 8]
      @doubler = lambda{|e| e * 2}
    end

    it "should only replace columns that actually exist" do
      lambda{@df.replace!(:not_a_column, &@doubler)}.should raise_error(
        ArgumentError, /Must provide the name of an existing column./)
      lambda{@df.replace!(:these, &@doubler)}.should_not raise_error
    end

    it "should be able to replace a column with a block" do
      @df.replace!(:these) {|e| e * 2}
      @df.these.should eql([2,10])
    end
    
    it "should be able to replace a column with an array" do
      @a = [5,9]
      @df.replace!(:these, @a)
      @df.these.should eql(@a)
    end
  end
  
  context "filter!" do
    before do
      @df.add [1,2,3,4]
      @df.add [5, 6, 7, 8]
    end
    
    it "should be able to filter a data frame with a block using an OpenStruct for each row" do
      @df.filter!(:open_struct) {|row| row.these == 5}
      @df.items.should eql([[5, 6, 7, 8]])
    end
    
    it "should be able to filter a data frame with a block using a Hash for each row" do
      @df.filter!(:hash) {|row| row[:these] == 5}
      @df.items.should eql([[5, 6, 7, 8]])
    end
    
    S4 = Struct.new(:one, :two, :three, :four)
    it "should be able to filter a data frame with a block using another class that uses the row as input" do
      @df.filter!(S4) {|row| row.one == 5}
      @df.items.should eql([[5, 6, 7, 8]])
    end
    
    it "should be able to filter a data frame with a block using an array for each row" do
      @df.filter! {|row| row.first == 5}
      @df.items.should eql([[5, 6, 7, 8]])
    end
    
    it "should be able to do fancy things with the row as the filter" do
      @df.filter! {|row| row.sum > 10}
      @df.items.should eql([[5, 6, 7, 8]])
    end
    
    it "should be able to generate a new data frame with filter" do
      new_df = @df.filter(:open_struct) {|row| row.these == 5}
      new_df.items.should eql([[5, 6, 7, 8]])
      @df.items.should eql([[1, 2, 3, 4], [5, 6, 7, 8]])
    end
    
  end
  
  context "filter_by_category" do
    
    before do
      @df = DataFrame.new(:weather, :date)

      (1..31).each do |i|
        @df.add [(i % 3 == 1) ? :fair : :good, Date.parse("07/#{i}/2009")]
      end

      @d1 = Date.parse("07/15/2009")
      @d2 = Date.parse("07/31/2009")

    end
    
    it "should be able to filter by category" do
      filtered = @df.filter_by_category(:weather => :good)
      filtered.weather.uniq.should eql([:good])
      @df.weather.uniq.should be_include(:fair)
    end
    
    it "should be able to manage ranges for filter values" do
      filtered = @df.filter_by_category(:date => (@d1..@d2))
      filtered.date.should_not be_include(Date.parse("07/01/2009"))
      filtered.date.should_not be_include(Date.parse("07/14/2009"))
      filtered.date.should be_include(Date.parse("07/15/2009"))
      filtered.date.should be_include(Date.parse("07/31/2009"))
      @df.date.should be_include(Date.parse("07/01/2009"))
    end
    
    it "should be able to take an array of values to filter with" do
      filtered = @df.filter_by_category(:date => [@d1, @d2])
      filtered.date.should_not be_include(Date.parse("07/01/2009"))
      filtered.date.should be_include(Date.parse("07/15/2009"))
      filtered.date.should be_include(Date.parse("07/31/2009"))
    end
    
    it "should have a destructive version" do
      @df.filter_by_category!(:date => [@d1, @d2])
      @df.date.should_not be_include(Date.parse("07/01/2009"))
      @df.date.should be_include(Date.parse("07/15/2009"))
      @df.date.should be_include(Date.parse("07/31/2009"))
    end
    
  end
  
  context "subset_from_columns" do
    before do
      @df.add [1,2,3,4]
      @df.add [5, 6, 7, 8]
    end

    it "should be able to create a subset of columns" do
      new_data_frame = @df.subset_from_columns(:these, :labels)
      new_data_frame.should_not eql(@df)
      new_data_frame.labels.should eql([:these, :labels])
      new_data_frame.items.should eql([[1,4],[5,8]])
      new_data_frame.these.should eql([1,5])
    end
  end
  
  it "should be able to j_binary_ize! a column, taking its categories and creating a column for each" do
    df = DataFrame.new(:observations)
    df.add [:many]
    df.add [:fine]
    df.add [:things]
    df.add [:are]
    df.add [:available]
    df.j_binary_ize!(:observations)
    df.observations_many.should eql([true, false, false, false, false])
    df.observations_fine.should eql([false, true, false, false, false])
    df.observations_things.should eql([false, false, true, false, false])
    df.observations_are.should eql([false, false, false, true, false])
    df.observations_available.should eql([false, false, false, false, true])
    df.observations.should eql([:many, :fine, :things, :are, :available])
  end
  
  it "should be able to j_binary_ize! a more normal column" do
    df = DataFrame.new(:observations)
    df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
    df.observations.add_category(:small) {|e| e <= 3}
    df.observations.add_category(:large) {|e| e >= 3}
    df.j_binary_ize!(:observations)
    df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
    df.observations_large.should eql([false, false, false, true, true, true, false, false, false])
  end
  
  it "should be able to j_binary_ize with non-adjacent sets (sets that allow a value to have more than one category)" do
    df = DataFrame.new(:observations)
    df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
    df.observations.add_category(:small) {|e| e <= 3}
    df.observations.add_category(:large) {|e| e >= 3}
    df.j_binary_ize!(:observations, :allow_overlap => true)
    df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
    df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
  end
  
  it "should be able to hold multiple ideas of a columns categories by resetting the category and re-running j_binary_ize" do
    df = DataFrame.new(:observations)
    df.import([1,2,3,4,5,4,3,2,1].map{|e| Array(e)})
    df.observations.add_category(:small) {|e| e <= 3}
    df.observations.add_category(:large) {|e| e >= 3}
    df.j_binary_ize!(:observations, :allow_overlap => true)
    df.observations.set_categories(:odd => lambda{|e| e.odd?}, :even => lambda{|e| e.even?})
    df.j_binary_ize!(:observations)
    df.observations_small.should eql([true, true, true, false, false, false, true, true, true])
    df.observations_large.should eql([false, false, true, true, true, true, true, false, false])
    df.observations.should eql([1,2,3,4,5,4,3,2,1])
    df.observations_even.should eql([false, true, false, true, false, true, false, true, false])
    df.observations_odd.should eql([true, false, true, false, true, false, true, false, true])
  end
  
  context "append!" do
    
    before do
      @df.add [1,2,3,4]
      @df.add [5, 6, 7, 8]
    end
    
    it "should be able to append an array of values to the data frame" do
      @df.append!(:new_column, [5,5])
      @df.new_column.should eql([5,5])
    end
    
    it "should be able to append a default value to the data frame" do
      @df.append!(:new_column, :value)
      @df.new_column.should eql([:value, :value])
    end
    
    it "should use nil as the default value" do
      @df.append!(:new_column)
      @df.new_column.should eql([nil, nil])
    end
  end
end