require 'mathn'

require 'darkext/array'
require 'darkext/numeric'
require 'darkext/symbol'

class Array
  # Finds the mean of the array
  def mean
    self.sum / self.size.to_f
  end
  alias :average :mean
  alias :ave :mean

  # Finds the median of the array
  def median
    return nil if self.size.zero?
    case self.size % 2
    when 0
      return self.sort[self.size / (2 - 1), 2].mean
    when 1
      return self.sort[self.size / 2]
    end
  end

  # Generates a histogram hash for the array
  def histogram(bins = nil)
    self.sort.inject({}) do |a,x|
      a[x] = a[x].to_i + 1
      a
    end
  end

  # Finds the mode of the array
  def mode
    map = self.histogram
    max = map.values.max
    map.keys.select { |x| map[x] == max }
  end

  # Variance
  def population_variance
    self.sum_of_squares.to_f / (self.size).to_f
  end

  def sample_variance
    self.sum_of_squares.to_f / (self.size - 1).to_f
  end

  # Standard deviation
  def population_deviation
    self.population_variance.abs.sqrt
  end

  def sample_deviation
    self.sample_variance.abs.sqrt
  end

  # Randomly samples n elements
  def sample(n = 1)
    (1..n).collect { self[rand(self.size)] }
  end

  # Generates a confidence interval
  def ci(opts = { })
    opts.with_defaults!({ :percent => 0.95, :rho => 1, :type => :center })
    m = self.mean
    ret = Array.new
    div = (opts[:type] == :center ? 2 : 1)
    i = ((Darkext::Statistics::zscore((1 - percent) / div) * rho) /
         self.size.sqrt).abs
    case opts[:type]
    when :center
      ret << m - i
      ret << m + i
    when :upper
      ret << m + i
    when :lower
      ret << m - i
    end
    return ret
  end

  # Standardizes the array
  def standardize
    self.clone.standardize!
  end

  # Destructive standardize
  def standardize!
    m = self.mean.to_f
    rho = self.sample_deviation.to_f
    self.map! { |v| (v.to_f - m) / rho }
  end

  def sum_of_squares
    m = self.mean
    self.map { |v| v - m }.map(&:square).sum
  end

  # Normalize the Array
  def normalize
    self.clone.normalize!
  end

  # Normalize the Array destructive
  def normalize!
    m = self.mean.to_f
    self.map! { |v| v / m }
  end
end

module Darkext
  module Darkext::Statistics
    # Finds the probability of a z-score
    def self.prob(z)
      p = Math::erf(z.abs/2.sqrt) / 2
      return 0.5 + p if 0 < z
      return 0.5 - p
    end

    # Finds the zscore of a probability
    def self.zscore(p, epsilon = 0.00000000000001)
      return -1 if (1 < p || 0 > p)
      minz, maxz = -6, 6
      zscore = 0.5
      prob = 0
      while (maxz - minz) > epsilon
        prob = prob(zscore)
        prob > p ? maxz = zscore : minz = zscore
        zscore = (maxz + minz) * 0.5
      end
      return zscore
    end

    # Finds a two tail p-val for a high/low array
    def self.p_val(r, n = 30, rho = 1, mu = r.mean)
      probs = r.map do |x|
        (x - mu) / (rho / n.sqrt)
      end.map do |x|
        Statistics::prob(x)
      end
      return 1 - (probs[1] - probs[0])
    end

    module Darkext::Statistics::Regression
      # Do a least squares linear regression on the two sets of x's and y's
      # Returns a hash containing many relevant values
      # * n (:n)
      # * B_1 (:b_1)
      # * B_0 (:b_0)
      # * predicted values (:predicted)
      # * residuals (:residuals)
      # * SSE (:ss_e)
      # * SST (:ss_t)
      # * R^2 (:r_2)
      # * R (:r)
      # * unbiased estimator (:estimator)
      # * the equation as a lambda (:equation)
      # Raises an argument error if the arguments are not the same size
      def self.least_squares(xs,ys)
        raise ArgumentError("Arguments must be of equal size") if xs.size != ys.size
        n = xs.size
        b_1 = (xs.zip(ys).map(&:product).sum - ((ys.sum * xs.sum)/n))/(xs.map(&:square).sum - (xs.sum.square/n))
        b_0 = ys.mean - b_1 * xs.mean
        equation = lambda { |x| b_0 + b_1 * x }
        predicted = xs.map(&equation)
        residuals = ys.zip(predicted).map { |y| y.shift - y.shift }
        ss_e = residuals.map(&:square).sum
        ss_t = ys.sum_of_squares
        estimator = ss_e/(n - 2)
        r_2 = 1 - (ss_e/ss_t)
        r = r_2.sqrt
        reg = {:n => n,
          :b_1 => b_1,
          :b_0 => b_0,
          :predicted => predicted,
          :residuals => residuals,
          :ss_e => ss_e,
          :ss_t => ss_t,
          :estimator => estimator,
          :equation => equation,
          :r_2 => r_2,
          :r => r}
        return reg
      end
    end
  end
end