describe Daru::Core::GroupBy do
  before do
    @df = Daru::DataFrame.new({
      a: %w{foo bar foo bar   foo bar foo foo},
      b: %w{one one two three two two one three},
      c:   [1  ,2   ,3  ,1     ,3   ,6  ,3  ,8],
      d:   [11 ,22 ,33 ,44   ,55 ,66 ,77 ,88]
    }, order: [:a, :b, :c, :d])

    @sl_group = @df.group_by(:a)
    @dl_group = @df.group_by([:a, :b])
    @tl_group = @df.group_by([:a,:b,:c])

    @sl_index = Daru::Index.new(['bar', 'foo'])
    @dl_multi_index = Daru::MultiIndex.from_tuples([
      ['bar', 'one'],
      ['bar', 'three'],
      ['bar', 'two'],
      ['foo', 'one'],
      ['foo', 'three'],
      ['foo', 'two']
    ])
    @tl_multi_index = Daru::MultiIndex.from_tuples([
      ['bar', 'one'  , 2],
      ['bar', 'three', 1],
      ['bar', 'two'  , 6],
      ['foo', 'one'  , 1],
      ['foo', 'one'  , 3],
      ['foo', 'three', 8],
      ['foo', 'two'  , 3]
    ])

  end

  context 'with nil values' do
    before do
      @df[:w_nils] = Daru::Vector.new([11 ,nil ,33 ,nil   ,nil ,66 ,77 ,88])
    end

    it 'groups by nil values' do
      expect(@df.group_by(:w_nils).groups[[nil]]).to eq([1,3,4])
    end

    it "uses a multi-index when nils are part of the grouping keys" do
      expect(@df.group_by(:a, :w_nils).send(:multi_indexed_grouping?)).to be true
    end
  end

  context "#initialize" do
    let(:df_emp) { Daru::DataFrame.new(
      employee: %w[John Jane Mark John Jane Mark],
      month: %w[June June June July July July],
      salary: [1000, 500, 700, 1200, 600, 600]
    ) }
    let(:employee_grp) { df_emp.group_by(:employee).df }
    let(:mi_single) { Daru::MultiIndex.from_tuples([
        ['Jane', 1], ['Jane', 4], ['John', 0],
        ['John', 3], ['Mark', 2], ['Mark', 5]
        ]
      )}

    let(:emp_month_grp) { df_emp.group_by([:employee, :month]).df }
    let(:mi_double) { Daru::MultiIndex.from_tuples([
        ['Jane', 'July', 4], ['Jane', 'June', 1], ['John', 'July', 3],
        ['John', 'June', 0], ['Mark', 'July', 5], ['Mark', 'June', 2]
        ]
      )}

    let(:emp_month_salary_grp) {
      df_emp.group_by([:employee, :month, :salary]).df }
    let(:mi_triple) { Daru::MultiIndex.from_tuples([
        ['Jane', 'July', 600, 4], ['Jane', 'June', 500, 1],
        ['John', 'July', 1200, 3], ['John', 'June', 1000, 0],
        ['Mark', 'July', 600, 5], ['Mark', 'June', 700, 2]
        ]
      )}

    it "groups by a single tuple" do
      expect(@sl_group.groups).to eq({
        ['bar'] => [1,3,5],
        ['foo'] => [0,2,4,6,7]
      })
    end

    it "returns dataframe with MultiIndex, groups by single layer hierarchy" do
      expect(employee_grp).to eq(Daru::DataFrame.new({
        month: ["June", "July", "June", "July", "June", "July"],
        salary: [500, 600, 1000, 1200, 700, 600]
        }, index: mi_single))
    end

    it "returns dataframe with MultiIndex, groups by double layer hierarchy" do
      expect(emp_month_grp).to eq(Daru::DataFrame.new({
        salary: [600, 500, 1200, 1000, 600, 700]
        }, index: mi_double))
    end

    it "returns dataframe with MultiIndex, groups by triple layer hierarchy" do
      expect(emp_month_salary_grp).to eq(Daru::DataFrame.new({
        }, index: mi_triple))
    end

    it "groups by a double layer hierarchy" do
      expect(@dl_group.groups).to eq({
        ['foo', 'one']   => [0,6],
        ['bar', 'one']   => [1],
        ['foo', 'two']   => [2,4],
        ['bar', 'three'] => [3],
        ['bar', 'two']   => [5],
        ['foo', 'three'] => [7]
      })
    end

    it "groups by a triple layer hierarchy" do
      expect(@tl_group.groups).to eq({
        ['bar', 'one'  , 2] => [1],
        ['bar', 'three', 1] => [3],
        ['bar', 'two'  , 6] => [5],
        ['foo', 'one'  , 1] => [0],
        ['foo', 'one'  , 3] => [6],
        ['foo', 'three', 8] => [7],
        ['foo', 'two'  , 3] => [2,4]
      })
    end

    it "raises error if a non-existent vector is passed as args" do
      expect {
        @df.group_by([:a, :ted])
      }.to raise_error
    end
  end

  context "#size" do
    it "returns a vector containing the size of each group" do
      expect(@dl_group.size).to eq(Daru::Vector.new([1,1,1,2,1,2], index: @dl_multi_index))
    end

    it "returns an empty vector if given an empty dataframe" do
      df = Daru::DataFrame.new({ a: [], b: [] })
      expect(df.group_by(:a).size).to eq(Daru::Vector.new([]))
    end
  end

  context "#get_group" do
    it "returns the whole sub-group for single layer grouping" do
      expect(@sl_group.get_group(['bar'])).to eq(Daru::DataFrame.new({
        a: ['bar', 'bar', 'bar'],
        b: ['one', 'three', 'two'],
        c: [2,1,6],
        d: [22,44,66]
        }, index: [1,3,5]
      ))
    end

    it "returns the whole sub-group for double layer grouping" do
      expect(@dl_group.get_group(['bar', 'one'])).to eq(Daru::DataFrame.new({
        a: ['bar'],
        b: ['one'],
        c: [2],
        d: [22]
        }, index: [1]
      ))
    end

    it "returns the whole sub-group for triple layer grouping" do
      expect(@tl_group.get_group(['foo','two',3])).to eq(Daru::DataFrame.new({
        a: ['foo', 'foo'],
        b: ['two', 'two'],
        c: [3,3],
        d: [33,55]
        }, index: [2,4]
      ))
    end

    it "raises error for incomplete specification" do
      expect {
        @tl_group.get_group(['foo'])
      }.to raise_error
    end

    it "raises error for over specification" do
      expect {
        @sl_group.get_group(['bar', 'one'])
      }.to raise_error
    end
  end

  context '#each_group' do
    it 'enumerates groups' do
      ret = []
      @dl_group.each_group { |g| ret << g }
      expect(ret.count).to eq 6
      expect(ret).to all be_a(Daru::DataFrame)
      expect(ret.first).to eq(Daru::DataFrame.new({
        a: ['bar'],
        b: ['one'],
        c: [2],
        d: [22]
        }, index: [1]
      ))
    end
  end

  context '#each_group without block' do
    it 'enumerates groups' do
      enum = @dl_group.each_group

      expect(enum.count).to eq 6
      expect(enum).to all be_a(Daru::DataFrame)
      expect(enum.to_a.last).to eq(Daru::DataFrame.new({
        a: ['foo', 'foo'],
        b: ['two', 'two'],
        c: [3, 3],
        d: [33, 55]
        }, index: [2, 4]
      ))
    end
  end

  context '#first' do
    it 'gets the first row from each group' do
      expect(@dl_group.first).to eq(Daru::DataFrame.new({
        a: %w{bar bar   bar foo foo   foo },
        b: %w{one three two one three two },
        c:   [2  ,1    ,6  ,1  ,8    ,3   ],
        d:   [22 ,44   ,66 ,11 ,88   ,33  ]
      }, index: [1,3,5,0,7,2]))
    end
  end

  context '#last' do
    it 'gets the last row from each group' do
      expect(@dl_group.last).to eq(Daru::DataFrame.new({
        a: %w{bar bar   bar foo foo   foo },
        b: %w{one three two one three two },
        c:   [2  ,1    ,6  ,3  ,8    ,3   ],
        d:   [22 ,44   ,66 ,77 ,88   ,55  ]
      }, index: [1,3,5,6,7,4]))
    end
  end

  context "#mean" do
    it "computes mean of the numeric columns of a single layer group" do
      expect(@sl_group.mean).to eq(Daru::DataFrame.new({
        :c => [3.0, 3.6],
        :d => [44.0, 52.8]
        }, index: @sl_index
      ))
    end

    it "computes mean of the numeric columns of a double layer group" do
      expect(@dl_group.mean).to eq(Daru::DataFrame.new({
        c: [2,1,6,2,8,3],
        d: [22,44,66,44,88,44]
        }, index: @dl_multi_index))
    end

    it "computes mean of the numeric columns of a triple layer group" do
      expect(@tl_group.mean).to eq(Daru::DataFrame.new({
        d: [22,44,66,11,77,88,44]
        }, index: @tl_multi_index
      ))
    end
  end

  context "#sum" do
    it "calculates the sum of the numeric columns of a single layer group" do
      expect(@sl_group.sum).to eq(Daru::DataFrame.new({
        c: [9, 18],
        d: [132, 264]
        }, index: @sl_index
      ))
    end

    it "calculates the sum of the numeric columns of a double layer group" do
      expect(@dl_group.sum).to eq(Daru::DataFrame.new({
        c: [2,1,6,4,8,6],
        d: [22,44,66,88,88,88]
        }, index: @dl_multi_index))
    end

    it "calculates the sum of the numeric columns of a triple layer group" do
      expect(@tl_group.sum).to eq(Daru::DataFrame.new({
        d: [22,44,66,11,77,88,88]
        }, index: @tl_multi_index))
    end
  end

  [:median, :std, :max, :min].each do |numeric_method|
    it "works somehow" do
      expect(@sl_group.send(numeric_method).index).to eq @sl_index
      expect(@dl_group.send(numeric_method).index).to eq @dl_multi_index
      expect(@tl_group.send(numeric_method).index).to eq @tl_multi_index
    end
  end

  context "#product" do
    it "calculates product for single layer groups" do
      # TODO
    end

    it "calculates product for double layer groups" do
      # TODO
    end

    it "calculates product for triple layer groups" do
      # TODO
    end
  end

  context "#count" do
    it "counts the number of elements in a single layer group" do
      expect(@sl_group.count).to eq(Daru::DataFrame.new({
        b: [3,5],
        c: [3,5],
        d: [3,5]
        }, index: @sl_index))
    end

    it "counts the number of elements in a double layer group" do
      expect(@dl_group.count).to eq(Daru::DataFrame.new({
        c: [1,1,1,2,1,2],
        d: [1,1,1,2,1,2]
        }, index: @dl_multi_index))
    end

    it "counts the number of elements in a triple layer group" do
      expect(@tl_group.count).to eq(Daru::DataFrame.new({
        d: [1,1,1,1,1,1,2]
        }, index: @tl_multi_index))
    end
  end

  context "#std" do
    it "calculates sample standard deviation for single layer groups" do
      # TODO
    end

    it "calculates sample standard deviation for double layer groups" do
      # TODO
    end

    it "calculates sample standard deviation for triple layer groups" do
      # TODO
    end
  end

  context "#max" do
    it "calculates max value for single layer groups" do
      # TODO
    end

    it "calculates max value for double layer groups" do
      # TODO
    end

    it "calculates max value for triple layer groups" do
      # TODO
    end
  end

  context "#min" do
    it "calculates min value for single layer groups" do
      # TODO
    end

    it "calculates min value for double layer groups" do
      # TODO
    end

    it "calculates min value for triple layer groups" do
      # TODO
    end
  end

  context "#median" do
    it "calculates median for single layer groups" do
      # TODO
    end

    it "calculates median for double layer groups" do
      # TODO
    end

    it "calculates median for triple layer groups" do
      # TODO
    end
  end

  context "#head" do
    it "returns first n rows of each single layer group" do
      expect(@sl_group.head(2)).to eq(Daru::DataFrame.new({
        a: ['bar', 'bar','foo','foo'],
        b: ['one', 'three','one', 'two'],
        c: [2, 1, 1, 3],
        d: [22, 44, 11, 33]
      }, index: [1,3,0,2]))
    end

    it "returns first n rows of each double layer group" do
      expect(@dl_group.head(2)).to eq(Daru::DataFrame.new({
        a: ['bar','bar','bar','foo','foo','foo','foo','foo'],
        b: ['one','three','two','one','one','three','two','two'],
        c: [2,1,6,1,3,8,3,3],
        d: [22,44,66,11,77,88,33,55]
      }, index: [1,3,5,0,6,7,2,4]))
    end

    it "returns first n rows of each triple layer group" do
      expect(@tl_group.head(1)).to eq(Daru::DataFrame.new({
        a: ['bar','bar','bar','foo','foo','foo','foo'],
        b: ['one','three','two','one','one','three','two'],
        c: [2,1,6,1,3,8,3],
        d: [22,44,66,11,77,88,33]
        }, index: [1,3,5,0,6,7,2]))
    end
  end

  context "#tail" do
    it "returns last n rows of each single layer group" do
      expect(@sl_group.tail(1)).to eq(Daru::DataFrame.new({
        a: ['bar','foo'],
        b: ['two', 'three'],
        c: [6,8],
        d: [66,88]
      }, index: [5,7]))
    end

    it "returns last n rows of each double layer group" do
      expect(@dl_group.tail(2)).to eq(Daru::DataFrame.new({
        a: ['bar','bar','bar','foo','foo','foo','foo','foo'],
        b: ['one','three','two','one','one','three','two','two'],
        c: [2,1,6,1,3,8,3,3],
        d: [22,44,66,11,77,88,33,55]
        }, index: [1,3,5,0,6,7,2,4]))
    end

    it "returns last n rows of each triple layer group" do
      expect(@tl_group.tail(1)).to eq(Daru::DataFrame.new({
        a: ['bar','bar','bar','foo','foo','foo','foo'],
        b: ['one','three','two','one','one','three','two'],
        c: [2,1,6,1,3,8,3],
        d: [22,44,66,11,77,88,55]
        }, index: [1,3,5,0,6,7,4]))
    end
  end

  context "#[]" do
    pending
  end

  context "#reduce" do
    it "returns a vector that concatenates strings in a group" do
      string_concat = lambda { |result, row| result += row[:b] }
      expect(@sl_group.reduce('', &string_concat)).to eq(Daru::Vector.new(['onethreetwo', 'onetwotwoonethree'], index: @sl_index))
    end

    it "works with multi-indexes" do
      string_concat = lambda { |result, row| result += row[:b] }
      expect(@dl_group.reduce('', &string_concat)).to eq \
        Daru::Vector.new(['one', 'three', 'two', 'oneone', 'three', 'twotwo'], index: @dl_multi_index)
    end
  end

  context 'groups by first vector if no vector mentioned' do
    subject { @df.group_by }

    it { is_expected.to be_a Daru::Core::GroupBy }
    its(:groups) { is_expected.to eq @sl_group.groups }
    its(:size) { is_expected.to eq @sl_group.size }
  end

  context 'group and sum with numeric indices' do
    let(:df) { Daru::DataFrame.new({ g: ['a','a','a'], num: [1,2,3]}, index: [2,12,23]) }

    subject { df.group_by([:g]).sum }

    it { is_expected.to eq Daru::DataFrame.new({num: [6]}, index: ['a']) }
  end

  context 'when dataframe tuples contain nils in mismatching positions' do

    let(:df){
      Daru::DataFrame.new(
        {
          'string1' => ["Color", "Color", "Color", "Color", nil, "Color", "Color", " Black and White"],
          'string2' => ["Test", "test2", nil, "test3", nil, "test", "test3", "test5"],
          'num' => [1, nil, 3, 4, 5, 6, 7, nil]
        }
      )
    }

    it 'groups by without errors' do
      expect { df.group_by(df.vectors.map(&:to_s)) }.to_not raise_error(ArgumentError)
    end
  end
  
  context '#aggregate' do
    let(:dataframe) { Daru::DataFrame.new({
      employee: %w[John Jane Mark John Jane Mark],
      month: %w[June June June July July July],
      salary: [1000, 500, 700, 1200, 600, 600]})
    }
    context 'group and aggregate sum for particular single vector' do
      subject { dataframe.group_by([:employee]).aggregate(salary: :sum) }

      it { is_expected.to eq Daru::DataFrame.new({
              salary: [1100, 2200, 1300]},
              index: ['Jane', 'John', 'Mark'])
      }
    end

    context 'group and aggregate sum and lambda function for vectors' do
      subject { dataframe.group_by([:employee]).aggregate(
        salary: :sum,
        month: ->(vec) { vec.to_a.join('/') }) }

      it { is_expected.to eq Daru::DataFrame.new({
        salary: [1100, 2200, 1300],
        month: ['June/July', 'June/July', 'June/July']},
        index: ['Jane', 'John', 'Mark'],
        order: [:salary, :month])
      }
    end

    context 'group and aggregate sum and lambda functions on dataframe' do
      subject { dataframe.group_by([:employee]).aggregate(
        salary: :sum,
        month: ->(vec) { vec.to_a.join('/') },
        mean_salary: ->(df) { df.salary.mean },
        periods: ->(df) { df.size }
      )}

      it { is_expected.to eq Daru::DataFrame.new({
        salary: [1100, 2200, 1300],
        month: ['June/July', 'June/July', 'June/July'],
        mean_salary: [550.0, 1100.0, 650.0],
        periods: [2, 2, 2]},
        index: ['Jane', 'John', 'Mark'], order: [:salary, :month,
                                                :mean_salary, :periods]) }
    end

    context 'group_by and aggregate on mixed MultiIndex' do
      let(:df) { Daru::DataFrame.new(
                    name: ['Ram','Krishna','Ram','Krishna','Krishna'],
                    visited: [
                      'Hyderabad', 'Delhi', 'Mumbai', 'Raipur', 'Banglore']
                 )
                }
      let(:df_mixed) { Daru::DataFrame.new(
                    name: ['Krishna','Ram','Krishna','Krishna'],
                    visited: [
                      'Delhi', 'Mumbai', 'Raipur', 'Banglore']
                 )
                }
      it 'group_by' do
        expect(df.group_by(:name).df).to eq(
          Daru::DataFrame.new({
            visited: ['Delhi', 'Raipur', 'Banglore', 'Hyderabad', 'Mumbai']},
            index: Daru::MultiIndex.from_tuples(
                [['Krishna', 1], ['Krishna', 3], ['Krishna', 4],
                ['Ram', 0], ['Ram', 2]]
            )
          )
        )
      end

      it 'group_by and aggregate' do
        expect(
          df.group_by(:name).aggregate(
            visited: -> (vec){vec.to_a.join(',')})).to eq(
              Daru::DataFrame.new({
                visited: ['Delhi,Raipur,Banglore', 'Hyderabad,Mumbai']},
                index: ['Krishna', 'Ram']
              )
          )
      end

      it 'group_by and aggregate when anyone index is not multiple times' do
        expect(
          df_mixed.group_by(:name).aggregate(
            visited: -> (vec){vec.to_a.join(',')})).to eq(
              Daru::DataFrame.new({
                visited: ['Delhi,Raipur,Banglore', 'Mumbai']},
                index: ['Krishna', 'Ram']
              )
          )
      end
    end

    let(:spending_df) {
      Daru::DataFrame.rows([
        [2010,    'dev',  50, 1],
        [2010,    'dev', 150, 1],
        [2010,    'dev', 200, 1],
        [2011,    'dev',  50, 1],
        [2012,    'dev', 150, 1],

        [2011, 'office', 300, 1],

        [2010, 'market',  50, 1],
        [2011, 'market', 500, 1],
        [2012, 'market', 500, 1],
        [2012, 'market', 300, 1],

        [2012,    'R&D',  10, 1],],
        order: [:year, :category, :spending, :nb_spending])
    }
    let(:multi_index_year_category) {
      Daru::MultiIndex.from_tuples([
                       [2010, "dev"], [2010, "market"],
                       [2011, "dev"], [2011, "market"], [2011, "office"],
        [2012, "R&D"], [2012, "dev"], [2012, "market"]])
    }

    context 'group_by and aggregate on multiple elements' do
      it 'does aggregate' do
        expect(spending_df.group_by([:year, :category]).aggregate(spending: :sum)).to eq(
          Daru::DataFrame.new({spending: [400, 50, 50, 500, 300, 10, 150, 800]}, index: multi_index_year_category))
      end

      it 'works as older methods' do
        older_way = spending_df.group_by([:year, :category]).sum

        newer_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending: :sum)
        expect(newer_way).to eq(older_way)

        contrived_way = spending_df.group_by([:year, :category]).aggregate(spending: :sum, nb_spending_lambda: ->(df) { df[:nb_spending].sum })
        contrived_way.rename_vectors(nb_spending_lambda: :nb_spending)
        expect(contrived_way).to eq(older_way)
      end

      context 'can aggregate on MultiIndex' do
        let(:multi_indexed_aggregated_df) { spending_df.group_by([:year, :category]).aggregate(spending: :sum) }
        let(:index_year) { Daru::Index.new([2010, 2011, 2012]) }
        let(:index_category) { Daru::Index.new(["dev", "market", "office", "R&D"]) }

        it 'aggregates by default on the last layer of MultiIndex' do
          expect(multi_indexed_aggregated_df.aggregate(spending: :sum)).to eq(
            Daru::DataFrame.new({spending: [450, 850, 960]}, index: index_year))
        end

        it 'can aggregate on the first layer of MultiIndex' do
          expect(multi_indexed_aggregated_df.aggregate({spending: :sum},0)).to eq(
            Daru::DataFrame.new({spending: [600, 1350, 300, 10]}, index: index_category))
        end

        it 'does coercion: when one layer is remaining, MultiIndex is coerced in Index that does not aggregate anymore' do
          df_with_simple_index = multi_indexed_aggregated_df.aggregate(spending: :sum)
          expect(df_with_simple_index.aggregate(spending: :sum)).to eq(df_with_simple_index)
        end
      end
    end
  end
end