require 'compsci/fit'
require 'minitest/autorun'

include CompSci

describe Fit do
  before do
    @xs = [1, 2, 5, 10, 20, 50, 100, 200, 500]
  end

  describe "sigma" do
    it "must answer correctly" do
      Fit.sigma([1, 2, 3]).must_equal 6
      Fit.sigma([1, 2, 3]) { |n| n ** 2 }.must_equal 14
    end
  end

  describe "error" do
    it "must calculate r^2" do
      Fit.error([[1, 1], [2, 2], [3, 3]]) { |x| x }.must_equal 1.0
      Fit.error([[1, 1], [2, 2], [3, 4]]) { |x| x }.must_be_close_to 0.785
    end
  end

  # y = a
  describe "constant" do
    # note, this test can possibly fail depending on the uniformity of
    # rand's output for our sample
    it "must accept constant data" do
      [0, 1, 10, 100, 1000, 9999].each { |a|
        ys = @xs.map { |x| a + (rand - 0.5) }
        y_bar, variance = Fit.constant(@xs, ys)
        var_val = variance / ys.size
        y_bar.must_be_close_to a, 0.3
        var_val.must_be_close_to 0.1, 0.09
      }
    end
  end

  # y = a + b*ln(x)
  describe "logarithmic" do
    it "must accept logarithmic data" do
      [-9999, -2000, -500, -0.01, 0.01, 500, 2000, 9999].each { |a|
        [-9999, -2000, -500, -0.01, 0.01, 500, 2000, 9999].each { |b|
          ary = Fit.logarithmic(@xs, @xs.map { |x| a + b * Math.log(x) })
          ary[0].must_be_close_to a
          ary[1].must_be_close_to b
          ary[2].must_equal 1.0
        }
      }
    end
  end

  # y = a + bx
  describe "linear" do
    it "must accept linear data" do
      [-9999, -2000, -500, -0.01, 0.01, 500, 2000, 9999].each { |a|
        [-9999, -2000, -500, -0.01, 0.01, 500, 2000, 9999].each { |b|
          ary = Fit.linear(@xs, @xs.map { |x| a + b * x })
          ary[0].must_be_close_to a
          ary[1].must_be_close_to b
          ary[2].must_equal 1.0
        }
      }
    end

    # test that b is near 0; (1 - b) is similar magnitude to r2 in terms of
    # threshold
    # here's the deal: r2 is usually pretty low, but sometimes it is up over
    # 0.5, if rand() is being less than uniform in our sample
    # so, accept a wide range for r2
    # and let's check against 1 - b
    #
    # note, this test can possibly fail depending on the uniformity of
    # rand's output for our sample
    #
    it "must accept constant data" do
      r2s = []
      [0, 1, 10, 100, 1000, 9999].each { |a|
        ys = @xs.map { |x| a + (rand - 0.5) }
        ary = Fit.linear(@xs, ys)
        ary[0].must_be_close_to a, 0.4
        ary[1].must_be_close_to 0, 0.05
        r2s << ary[2]
      }
      mean_r2 = Fit.sigma(r2s) / r2s.size
      mean_r2.must_be_close_to 0.15, 0.15
    end

    it "must reject nonlinear data" do
      skip "investigate further"
      # this should be quite un-linear; expect r2 below 0.8
      #
      # ACTUALLY
      #
      # the r2 for fit_linear is mostly about the relative fit of a sloped
      # line compared to zero slope (i.e. y_bar)
      #
      # this is why a linear r2 close to 1.0 is the wrong test for fit_constant
      # because the relative fit of the sloped line (slope near 0) doesn't
      # "explain" much relative to y_bar
      #
      # in the case where y = x^3, a linear fit may still have a high r2,
      # because the error for the y_bar predictor is astronomical.  A super
      # steep slope fits (relative to the zero slope mean) pretty well.

      # this calls into question how useful r2 is, as we need it to be a
      # threshold value due to noise, yet even a terrible fit like trying to
      # match x^3 is hard to distinguish from noise
      #

      a = -50
      b = 1.3
      ys = @xs.map { |x| a + b * x**2 + x**3 }
      ary = Fit.linear(@xs, ys)
      if ary[2] > 0.85
        puts
        puts "fit_linear: #{ary.inspect}"
        puts "y = %0.2f + %0.2f(x) (r2 = %0.3f)" % ary
        puts
        col1, col2 = 5, 15
        puts "x".ljust(col1, ' ') + "y".ljust(col2, ' ') + "predicted"
        puts '---'.ljust(col1, ' ') + '---'.ljust(col2, ' ') + '---'
        @xs.zip(ys).each { |(x,y)|
          puts x.to_s.ljust(col1, ' ') + y.to_s.ljust(col2, ' ') +
               "%0.2f" % (ary[0] + ary[1] * x)
        }
        # ary[2].must_be :<, 0.8
        ary[2].must_be :<, 0.9
      end
    end
  end

  # y = ae^(bx)
  describe "exponential" do
    it "must accept exponential data" do
      [0.001, 7.5, 500, 1000, 5000, 9999].each { |a|
        [-1.4, -1.1, -0.1, 0.01, 0.5, 0.75].each { |b|
          ary = Fit.exponential(@xs, @xs.map { |x| a * Math::E**(b * x) })
          ary[0].must_be_close_to a
          ary[1].must_be_close_to b
          ary[2].must_equal 1.0
        }
      }
    end
  end

  # y = ax^b
  describe "power" do
    it "must accept power data" do
      [0.01, 7.5, 500, 1000, 5000, 9999].each { |a|
        [-114, -100, -10, -0.5, -0.1, 0.1, 0.75, 10, 50, 60].each { |b|
          next if b == -114 # Fit.error warning: Bignum out of Float range
          ary = Fit.power(@xs, @xs.map { |x| a * x**b })
          ary[0].must_be_close_to a
          ary[1].must_be_close_to b
          ary[2].must_equal 1.0
        }
      }
    end
  end
end