# This is a namespaced version of the gem, in case you can create a
# container for your data and only include these methods there. 
# Example:
class Object
  
  # Simpler way to handle a random number between to values
  def rand_between(a, b)
    return rand_in_floats(a, b) if a.is_a?(Float) or b.is_a?(Float)
    range = (a - b).abs + 1
    rand(range) + [a,b].min
  end
  
  # Handles non-integers
  def rand_in_floats(a, b)
    range = (a - b).abs
    (rand * range) + [a,b].min
  end
  
end

module JustEnumerableStats #:nodoc:
  module Stats
    
    # To keep max and min DRY.
    def block_sorter(a, b, &block)
      if block
        val = yield(a, b)
      elsif default_block
        val = default_block.call(a, b)
      else
        val = a <=> b
      end
    end
    protected :block_sorter

    # Returns the max, using an optional block.
    def max(&block)
      self.inject do |best, e|
        val = block_sorter(best, e, &block)
        best = val > 0 ? best : e
      end
    end

    # Returns the first index of the max value
    def max_index(&block)
      self.index(max(&block))
    end

    # Min of any number of items
    def min(&block)
      self.inject do |best, e|
        val = block_sorter(best, e, &block)
        best = val < 0 ? best : e
      end
    end

    # Returns the first index of the min value
    def min_index(&block)
      self.index(min(&block))
    end

    # The block called to filter the values in the object.
    def default_block
      @default_stat_block 
    end

    # Allows me to setup a block for a series of operations.  Example:
    # a = [1,2,3]
    # a.sum # => 6.0
    # a.default_block = lambda{|e| 1 / e}
    # a.sum # => 1.0
    def default_block=(block)
      @default_stat_block = block
    end

    # Provides zero in the right class (Numeric or Float)
    def zero
      any? {|e| e.is_a?(Float)} ? 0.0 : 0
    end
    protected :zero

    # Provides one in the right class (Numeric or Float)
    def one
      any? {|e| e.is_a?(Float)} ? 1.0 : 1
    end
    protected :one

    # Adds up the list.  Uses a block or default block if present.
    def sum
      sum = zero
      if block_given?
        each{|i| sum += yield(i)}
      elsif default_block
        each{|i| sum += default_block[*i]}
      else
        each{|i| sum += i}
      end
      sum
    end

    # The arithmetic mean, uses a block or default block.
    def average(&block)
      sum(&block)/size
    end
    alias :mean :average
    alias :avg :average

    # The variance, uses a block or default block.
    def variance(&block)
      m = mean(&block)
      sum_of_differences = if block_given?
        sum{ |i| j=yield(i); (m - j) ** 2 }
      elsif default_block
        sum{ |i| j=default_block[*i]; (m - j) ** 2 }
      else
        sum{ |i| (m - i) ** 2 }
      end
      sum_of_differences / (size - 1)
    end
    alias :var :variance

    # The standard deviation.  Uses a block or default block.
    def standard_deviation(&block)
      Math::sqrt(variance(&block))
    end
    alias :std :standard_deviation

    # The slow way is to iterate up to the middle point.  A faster way is to
    # use the index, when available.  If a block is supplied, always iterate
    # to the middle point. 
    def median(ratio=0.5, &block)
      return iterate_midway(ratio, &block) if block_given?
      begin
        mid1, mid2 = middle_two
        sorted = new_sort
        med1, med2 = sorted[mid1], sorted[mid2]
        return med1 if med1 == med2
        return med1 + ((med2 - med1) * ratio)
      rescue
        iterate_midway(ratio, &block)
      end
    end

    def middle_two
      mid2 = size.div(2)
      mid1 = (size % 2 == 0) ? mid2 - 1 : mid2
      return mid1, mid2
    end
    protected :middle_two

    def median_position
      middle_two.last
    end
    protected :median_position

    def first_half(&block)
      fh = self[0..median_position].dup
    end
    protected :first_half

    def second_half(&block)
      # Total crap, but it's the way R does things, and this will most likely
      # only be used to feed R some numbers to plot, if at all. 
      sh = size <= 5 ? self[median_position..-1].dup : self[median_position - 1..-1].dup
    end
    protected :second_half

    # An iterative version of median
    def iterate_midway(ratio, &block)
      mid1, mid2, last_value, j, sorted, sort1, sort2 = middle_two, nil, 0, new_sort, nil, nil

      if block_given?
        sorted.each do |i|
          last_value = yield(i)
          j += 1
          sort1 = last_value if j == mid1
          sort2 = last_value if j == mid2
          break if j >= mid2
        end
      elsif default_block
        sorted.each do |i|
          last_value = default_block[*i]
          j += 1
          sort1 = last_value if j == mid1
          sort2 = last_value if j == mid2
          break if j >= mid2
        end
      else
        sorted.each do |i|
          last_value = i
          sort1 = last_value if j == mid1
          sort2 = last_value if j == mid2
          j += 1
          break if j >= mid2
        end
      end
      return med1 if med1 == med2
      return med1 + ((med2 - med1) * ratio)
    end
    protected :iterate_midway

    # Takes the range_class and returns its map.
    # Example:
    # require 'mathn'
    # a = [1,2,3]
    # a
    # range_class = FixedRange, a.min, a.max, 1/4
    # a.categories
    # => [1, 5/4, 3/2, 7/4, 2, 9/4, 5/2, 11/4, 3]
    # For non-numeric values, returns a unique set, 
    # ordered if possible.
    def categories
      if @categories
        @categories
      elsif self.is_numeric?
        self.range_instance.map
      else
        self.uniq.sort rescue self.uniq
      end
    end

    def is_numeric?
      self.all? {|e| e.is_a?(Numeric)}
    end

    # Just an array of [min, max] to comply with R uses of the work.  Use
    # range_as_range if you want a real Range. 
    def range(&block)
      [min(&block), max(&block)]
    end

    # Useful for setting a real range class (FixedRange).
    def set_range_class(klass, *args)
      @range_class = klass
      @range_class_args = args
      self.range_class
    end

    # Takes a hash of arrays for categories
    # If Facets happens to be loaded on the computer, this keeps the order
    # of the categories straight. 
    def set_range(hash)
      if defined?(Dictionary)
        @range_hash = Dictionary.new
        @range_hash.merge!(hash)
        @categories = @range_hash.keys
      else
        @categories = hash.keys
        @range_hash = hash
      end
      @categories
    end

    # The hash of lambdas that are used to categorize the enumerable.
    attr_reader :range_hash

    # The arguments needed to instantiate the custom-defined range class.
    attr_reader :range_class_args

    # Splits the values in two, <= the value and > the value.
    def dichotomize(split_value, first_label, second_label)
      set_range({
        first_label => lambda{|e| e <= split_value},
        second_label => lambda{|e| e > split_value}
      })
    end

    # Counts each element where the block evaluates to true
    # Example:
    # a = [1,2,3]
    # a.count_if {|e| e % 2 == 0}
    def count_if(&block)
      self.inject(0) do |s, e|
        s += 1 if block.call(e)
        s
      end
    end

    # Returns a Hash or Dictionary (if available) for each category with a
    # value as the set of matching values as an array. 
    # Because this is supposed to be lean (just enumerables), but this is an
    # expensive call, I'm going to cache it and offer a parameter to reset
    # the cache.  So, call category_values(true) if you need to reset the
    # cache. 
    def category_values(reset=false)
      @category_values = nil if reset
      return @category_values if @category_values
      container = defined?(Dictionary) ? Dictionary.new : Hash.new
      if self.range_hash
        @category_values = self.categories.inject(container) do |cont, cat|
          cont[cat] = self.find_all &self.range_hash[cat]
          cont
        end
      else
        @category_values = self.categories.inject(container) do |cont, cat|
          cont[cat] = self.find_all {|e| e == cat}
          cont
        end
      end
    end

    # When creating a range, what class will it be?  Defaults to Range, but
    # other classes are sometimes useful. 
    def range_class
      @range_class ||= Range
    end

    # Actually instantiates the range, instead of producing a min and max array.
    def range_as_range(&block)
      if @range_class_args and not @range_class_args.empty?
        self.range_class.new(*@range_class_args)
      else
        self.range_class.new(min(&block), max(&block))
      end
    end
    alias :range_instance :range_as_range

    # I don't pass the block to the sort, because a sort block needs to look
    # something like: {|x,y| x <=> y}.  To get around this, set the default 
    # block on the object.
    def new_sort(&block)
      if block_given?
        map { |i| yield(i) }.sort.dup
      elsif default_block
        map { |i| default_block[*i] }.sort.dup
      else
        sort().dup
      end
    end

    # Doesn't overwrite things like Matrix#rank
    def rank(&block)

      sorted = new_sort(&block)

      if block_given?
        map { |i| sorted.index(yield(i)) + 1 }
      elsif default_block
        map { |i| sorted.index(default_block[*i]) + 1 }
      else
        map { |i| sorted.index(i) + 1 }
      end

    end unless defined?(rank)

    # Given values like [10,5,5,1]
    # Rank should produce something like [4,2,2,1]
    # And order should produce something like [4,2,3,1]
    # The trick is that rank skips as many as were duplicated, so there
    # could not be a 3 in the rank from the example above. 
    def order(&block)
      hold = []
      rank(&block).each do |x|
        while hold.include?(x) do
          x += 1
        end
        hold << x
      end
      hold
    end

    # First quartile: nth_split_by_m(1, 4)
    # Third quartile: nth_split_by_m(3, 4)
    # Median: nth_split_by_m(1, 2)
    # Doesn't match R, and it's silly to try to.
    # def nth_split_by_m(n, m)
    #   sorted  = new_sort
    #   dividers = m - 1
    #   if size % m == dividers # Divides evenly
    #     # Because we have a 0-based list, we get the floor
    #     i = ((size / m.to_f) * n).floor
    #     j = i
    #   else
    #     # This reflects R's approach, which I don't think I agree with.
    #     i = (((size / m.to_f) * n) - 1)
    #     i = i > (size / m.to_f) ? i.floor : i.ceil
    #     j = i + 1
    #   end
    #   sorted[i] + ((n / m.to_f) * (sorted[j] - sorted[i]))
    # end
    def quantile(&block)
      [
        min(&block), 
        first_half(&block).median(0.25, &block), 
        median(&block), 
        second_half(&block).median(0.75, &block), 
        max(&block)
      ]
    end

    # The cummulative sum.  Example:
    # [1,2,3].cum_sum # => [1, 3, 6]
    def cum_sum(sorted=false, &block)
      sum = zero
      obj = sorted ? self.new_sort : self
      if block_given?
        obj.map { |i| sum += yield(i) }
      elsif default_block
        obj.map { |i| sum += default_block[*i] }
      else
        obj.map { |i| sum += i }
      end
    end
    alias :cumulative_sum :cum_sum

    # The cummulative product.  Example:
    # [1,2,3].cum_prod # => [1.0, 2.0, 6.0]
    def cum_prod(sorted=false, &block)
      prod = one
      obj = sorted ? self.new_sort : self
      if block_given?
        obj.map { |i| prod *= yield(i) }
      elsif default_block
        obj.map { |i| prod *= default_block[*i] }
      else
        obj.map { |i| prod *= i }
      end
    end
    alias :cumulative_product :cum_prod

    # Used to preprocess the list
    def morph_list(&block)
      if block
        self.map{ |e| block.call(e) }
      elsif self.default_block
        self.map{ |e| self.default_block.call(e) }
      else
        self
      end
    end
    protected :morph_list

    # Example:
    # [1,2,3,0,5].cum_max # => [1,2,3,3,5]
    def cum_max(&block)
      morph_list(&block).inject([]) do |list, e|
        found = (list | [e]).max
        list << (found ? found : e)
      end
    end
    alias :cumulative_max :cum_max

    # Example: 
    # [1,2,3,0,5].cum_min # => [1,1,1,0,0]
    def cum_min(&block)
      morph_list(&block).inject([]) do |list, e|
        found = (list | [e]).min
        list << (found ? found : e)
      end
    end
    alias :cumulative_min :cum_min

    # Multiplies the values:
    # >> product(1,2,3)
    # => 6.0
    def product
      self.inject(one) {|sum, a| sum *= a}
    end

    # There are going to be a lot more of these kinds of things, so pay
    # attention. 
    def to_pairs(other, &block)
      n = [self.size, other.size].min
      (0...n).map {|i| block.call(self[i], other[i]) }
    end

    # Finds the tanimoto coefficient: the intersection set size / union set
    # size.  This is used to find the distance between two vectors.
    # >> [1,2,3].cor([2,3,5])
    # => 0.981980506061966
    # >> [1,2,3].tanimoto_pairs([2,3,5])
    # => 0.5
    def tanimoto_pairs(other)
      intersect(other).size / union(other).size.to_f
    end
    alias :tanimoto_correlation :tanimoto_pairs

    # Sometimes it just helps to have things spelled out.  These are all
    # part of the Array class. This means, you have methods that you can't
    # run on some kinds of enumerables. 

    # All of the left and right hand sides, excluding duplicates.
    # "The union of x and y"
    def union(other)
      other = other.to_a unless other.is_a?(Array)
      self | other
    end

    # What's shared on the left and right hand sides
    # "The intersection of x and y"
    def intersect(other)
      other = other.to_a unless other.is_a?(Array)
      self & other
    end

    # Everything on the left hand side except what's shared on the right
    # hand side. 
    # "The relative compliment of y in x"
    def compliment(other)
      other = other.to_a unless other.is_a?(Array)
      self - other
    end

    # Everything but what's shared
    def exclusive_not(other)
      other = other.to_a unless other.is_a?(Array)
      (self | other) - (self & other)
    end

    # Finds the cartesian product, excluding duplicates items and self-
    # referential pairs.  Yields the block value if given. 
    def cartesian_product(other, &block)
      x,y = self.uniq.dup, other.uniq.dup
      pairs = x.inject([]) do |cp, i|
        cp | y.map{|b| i == b ? nil : [i,b]}.compact
      end
      return pairs unless block_given?
      pairs.map{|p| yield p.first, p.last}
    end
    alias :cp :cartesian_product
    alias :permutations :cartesian_product

    # Sigma of pairs.  Returns a single float, or whatever object is sent in.
    # Example: [1,2,3].sigma_pairs([4,5,6], 0) {|x, y| x + y}
    # returns 21 instead of 21.0.
    def sigma_pairs(other, z=zero, &block)
      self.to_pairs(other,&block).inject(z) {|sum, i| sum += i}
    end

    # Returns the Euclidian distance between all points of a set of enumerables
    def euclidian_distance(other)
      Math.sqrt(self.sigma_pairs(other) {|a, b| (a - b) ** 2})
    end

    # Returns a random integer in the range for any number of lists.  This
    # is a way to get a random vector that is tenable based on the sample
    # data.  For example, given two sets of numbers: 
    # 
    # a = [1,2,3]; b = [8,8,8]
    # 
    # rand_in_pair_range will return a value >= 1 and <= 8 in the first
    # place, >= 2 and <= 8 in the second place, and >= 3 and <= 8 in the
    # last place. 
    # Works for integers.  Rethink this for floats.  May consider setting up
    # FixedRange for floats.  O(n*5)
    def rand_in_range(*args)
      min = self.min_of_lists(*args)
      max = self.max_of_lists(*args)
      (0...size).inject([]) do |ary, i|
        ary << rand_between(min[i], max[i])
      end
    end

    # Finds the correlation between two enumerables.
    # Example: [1,2,3].cor [2,3,5]
    # returns 0.981980506061966
    def correlation(other)
      n = [self.size, other.size].min
      sum_of_products_of_pairs = self.sigma_pairs(other) {|a, b| a * b}
      self_sum = self.sum
      other_sum = other.sum
      sum_of_squared_self_scores = self.sum { |e| e * e }
      sum_of_squared_other_scores = other.sum { |e| e * e }

      numerator = (n * sum_of_products_of_pairs) - (self_sum * other_sum)
      self_denominator = ((n * sum_of_squared_self_scores) - (self_sum ** 2))
      other_denominator = ((n * sum_of_squared_other_scores) - (other_sum ** 2))
      denominator = Math.sqrt(self_denominator * other_denominator)
      return numerator / denominator
    end
    alias :cor :correlation

    # Transposes arrays of arrays and yields a block on the value.
    # The regular Array#transpose ignores blocks
    def yield_transpose(*enums, &block)
      enums.unshift(self)
      n = enums.map{ |x| x.size}.min
      block ||= lambda{|e| e}
      (0...n).map { |i| block.call enums.map{ |x| x[i] } }
    end

    # Returns the max of two or more enumerables.
    # >> [1,2,3].max_of_lists([0,5,6], [0,2,9])
    # => [1, 5, 9]
    def max_of_lists(*enums)
      yield_transpose(*enums) {|e| e.max}
    end

    # Returns the min of two or more enumerables.
    # >> [1,2,3].min_of_lists([4,5,6], [0,2,9])
    # => [0, 2, 3]
    def min_of_lists(*enums)
      yield_transpose(*enums) {|e| e.min}
    end

  end
end