module Selector
  module IG
    def information_gain(pos, neg, tp, fp)
      fn = neg - fp
      tn = pos - tp
      p_word = (tp + fp).quo(pos + neg)

      e(pos, neg) - (p_word * e(tp, fp) + (1 - p_word) * e(fn, tn))
    end
    def e(x,y)
      -xlx(x.quo(x+y)) -xlx(y.quo(x+y))
    end
    def xlx(x)
      x * Math.log2(x)
    end
  end
  module BNS
    SQR2 = Math.sqrt(2)
    SQR2PI = Math.sqrt(2.0*Math::PI)

    def bi_normal_seperation pos, neg, tp, fp
      false_prositive_rate = fp.quo(neg)
      true_prositive_rate = tp.quo(pos)
      bns = cdf_inverse(true_prositive_rate) - cdf_inverse(false_prositive_rate)
    end
    # standard normal cumulative distribution function
    def cdf(z)
      0.5 * (1.0 + Math.erf( z.quo(SQR2) ) )
    end

    # inverse standard normal cumulative distribution function
    # http://home.online.no/~pjacklam/notes/invnorm

    # Coefficients in rational approximations.
    A = [0, -3.969683028665376e+01, 2.209460984245205e+02, -2.759285104469687e+02, 1.383577518672690e+02, -3.066479806614716e+01, 2.506628277459239e+00]
    B = [0, -5.447609879822406e+01, 1.615858368580409e+02, -1.556989798598866e+02, 6.680131188771972e+01, -1.328068155288572e+01]
    C = [0, -7.784894002430293e-03, -3.223964580411365e-01, -2.400758277161838e+00, -2.549732539343734e+00, 4.374664141464968e+00, 2.938163982698783e+00]
    D = [0, 7.784695709041462e-03, 3.224671290700398e-01, 2.445134137142996e+00, 3.754408661907416e+00]
    # Define break-points.
    P_LOW  = 0.02425
    P_HIGH = 1.0 - P_LOW

    def cdf_inverse(p)
      return 0.0 if p < 0 || p > 1 || p == 0.5
      x = 0.0

      if 0.0 < p && p < P_LOW
        # Rational approximation for lower region.
        q = Math.sqrt(-2.0*Math.log(p))
        x = (((((C[1]*q+C[2])*q+C[3])*q+C[4])*q+C[5])*q+C[6]) /
            ((((D[1]*q+D[2])*q+D[3])*q+D[4])*q+1.0)
      elsif P_LOW <= p && p <= P_HIGH
        # Rational approximation for central region.
        q = p - 0.5
        r = q*q
        x = (((((A[1]*r+A[2])*r+A[3])*r+A[4])*r+A[5])*r+A[6])*q /
            (((((B[1]*r+B[2])*r+B[3])*r+B[4])*r+B[5])*r+1.0)
      elsif P_HIGH < p && p < 1.0
        # Rational approximation for upper region.
        q = Math.sqrt(-2.0*Math.log(1.0-p))
        x = -(((((C[1]*q+C[2])*q+C[3])*q+C[4])*q+C[5])*q+C[6]) /
             ((((D[1]*q+D[2])*q+D[3])*q+D[4])*q+1.0)
      end
      if 0 < p && p < 1
        u = cdf(p) * SQR2PI * Math.exp((x**2.0)/2.0)
        x = x - u/(1.0 + x*u/2.0)
      end
      x
    end
  end
end