require 'spec_helper'

describe DbClustering::DistanceMetrics::PearsonCorrelation, type: :model do

  before(:each) do
    @pearson_correlation = DbClustering::DistanceMetrics::PearsonCorrelation.new
  end


  describe "#distance" do

    context "using array object" do

      it "works with 6 dimensional examples" do
        a1 = [-100, -50, 0, 10, 20, 30]
        a2 = [-100, -50, 0, 20, 30, 40]

        expect_distance(a1, a2, 1.0 - 0.9978980816987033)

        a1[0] = 100
        expect_distance(a1, a2, 1.0 - -0.33178189173568795)

        a1[1] = 50
        expect_distance(a1, a2, 1.0 - -0.8531546818010307)

        a1[3] = 20
        expect_distance(a1, a2, 1.0 - -0.8501701979323958)

        a1[4] = 30
        expect_distance(a1, a2, 1.0 - -0.8251789485121429)

        a1[5] = 40
        expect_distance(a1, a2, 1.0 - -0.777119205197422)
      end

    end
  end

  describe "#correlation" do

    context "using array object" do
      it "works with 6 dimensional examples" do
        a1 = [-100, -50, 0, 10, 20, 30]
        a2 = [-100, -50, 0, 20, 30, 40]

        expect_correlation(a1, a2, 0.9978980816987033)

        a1[0] = 100
        expect_correlation(a1, a2, -0.33178189173568795)

        a1[1] = 50
        expect_correlation(a1, a2, -0.8531546818010307)

        a1[3] = 20
        expect_correlation(a1, a2, -0.8501701979323958)

        a1[4] = 30
        expect_correlation(a1, a2, -0.8251789485121429)

        a1[5] = 40
        expect_correlation(a1, a2, -0.777119205197422)
      end

      it "works with 10 dimensional example" do
        a1 = [-100, -75, -50, -25, 0, 10, 30, 50, 70, 90]
        a2 = [-100, -75, -50, -25, 0, 20, 40, 60, 80, 100]

        expect_correlation(a1, a2, 0.9991021273387496)

        a1[0] = 100
        expect_correlation(a1, a2, 0.47082800718062534)

        a1[1] = 75
        expect_correlation(a1, a2, 0.1556331759412803)

        a1[2] = 50
        expect_correlation(a1, a2, -0.030429030972509225)

        a1[3] = 25
        expect_correlation(a1, a2, -0.11043152607484653)

        a1[5] = 20
        expect_correlation(a1, a2, -0.10683599418231368)

        a1[6] = 40
        expect_correlation(a1, a2, -0.09061095797872151)

        a1[7] = 60
        expect_correlation(a1, a2, -0.061965254978689745)

        a1[8] = 80
        expect_correlation(a1, a2, -0.022715542521212734)

        a1[9] = 100
        expect_correlation(a1, a2, 0.024246432248443597)
      end

      it "works with 200 dimensional example" do
        a1 = (-100..0).to_a + (-9..90).to_a
        a2 = (-100..0).to_a + (1..100).to_a

        expect_correlation(a1, a2, 0.9989178188722178)

        a1[0] = 100
        expect_correlation(a1, a2, 0.9655259356163942)

        a1[1] = 99
        expect_correlation(a1, a2, 0.9331992252857959)

        a1[2] = 98
        expect_correlation(a1, a2, 0.9018830671823298)

        a1[3] = 97
        expect_correlation(a1, a2, 0.871527012471479)

        (4..99).each{ |i| a1[i] = 100 - i }
        expect_correlation(a1, a2, -0.14729260459452256)

        a1[101] = 1
        expect_correlation(a1, a2, -0.147683155760824)

        a1[102] = 2
        expect_correlation(a1, a2, -0.14803962444596394)

        a1[103] = 3
        expect_correlation(a1, a2, -0.14836161254154293)

        a1[104] = 4
        expect_correlation(a1, a2, -0.14864872717684907)

        (5..100).each{ |i| a1[100+i] = i }
        expect_correlation(a1, a2, 0.0)
      end
    end

    context "using hash object" do
      it "works with 6 dimensional examples" do
        a1 = {a: -100, b: -50, c: 0, d: 100, e: 100, f: 100, g: 10, h: 20, i: 30}
        a2 = {a: -100, b: -50, c: 0, g: 20, h: 30, i: 40, j: -100, k: -100, l: -100}

        expect_correlation(a1, a2, 0.9978980816987033)

        a1[:a] = 100
        expect_correlation(a1, a2, -0.33178189173568795)

        a1[:b] = 50
        expect_correlation(a1, a2, -0.8531546818010307)

        a1[:g] = 20
        expect_correlation(a1, a2, -0.8501701979323958)

        a1[:h] = 30
        expect_correlation(a1, a2, -0.8251789485121429)

        a1[:i] = 40
        expect_correlation(a1, a2, -0.777119205197422)
      end
    end

  end

  def expect_correlation(object1, object2, correlation)
    vector1 = DbClustering::Models::Vector.new(object: object1)
    vector2 = DbClustering::Models::Vector.new(object: object2)
    expect(@pearson_correlation.correlation(vector1, vector2)).to be_within(0.001).of(correlation)
  end

  def expect_distance(object1, object2, distance)
    vector1 = DbClustering::Models::Vector.new(object: object1)
    vector2 = DbClustering::Models::Vector.new(object: object2)
    expect(@pearson_correlation.distance(vector1, vector2)).to be_within(0.001).of(distance)
  end

end