require 'forwardable'

# Generate values from a categorical probability distribution in constant time.
# Uses Vose's Alias Method to construct a table of probabilities and aliases.
# To generate a value, we pick an entry in the distribution at random, and then pick
# either the value or the alias, using a biased coin flip.
#
# Table construction is O(n), index generation is O(1).
# 
# See http://www.keithschwarz.com/darts-dice-coins for a detailed explanation.

class CategoricalDistribution
  extend Forwardable
  include Enumerable


  # :call-seq:
  #   CategoricalDistribution.new(p={})
  #   CategoricalDistribution.new(p=[])
  #   CategoricalDistribution.new(p=[], values=[])
  #
  # Constructs a categorical random distribution from a set of probabilities
  # and, optionally, values. 
  #
  # These can be passed as an array of probabilities and an array of values,
  # or as a hash of values to probabilities.
  #
  # If necessary, probabilities will be normalized so that they sum to one.
  # If no values are provided, they will default to the respective indices of
  # each probability.
  #
  # Raises an +ArgumentError+ if any probability is negative.
  #
  #   CategoricalDistribution.new                                    
  #     #=> {}
  #
  #   CategoricalDistribution.new([Rational(1, 5), Rational(4, 5)])
  #     #=> {0 => (1/5), 1 => (4/5)}
  #
  #   CategoricalDistribution.new([1, 1, 1])
  #     #=> {0 => (1/3), 1 => (1/3), 2 => (1/3)}
  #
  #   CategoricalDistribution.new([1, 1, 1], [:a, :b, :c])
  #     #=> {:a => (1/3), :b => (1/3), :c => (1/3)}
  #
  #   CategoricalDistribution.new({a: 1, b: 2, c: 3})
  #     #=> {:a => (1/6), :b => (1/3), :c => (1/2)}

  def initialize(p={}, values=nil)
    if p.respond_to?(:keys) && p.respond_to?(:values)
      values, p = p.keys, p.values 
    else
      values ||= p.each_index.to_a
    end

    raise ArgumentError, 'probabilities must be positive' if p.any? { |value| value < 0 }

    sum = p.reduce(:+)
    p.map! { |value| Rational(value, sum) } unless sum == 1

    @p = Hash[[values, p].transpose].freeze

    n = p.size
    @prob = Array.new(n, Rational(1))
    @alias = Array.new(n)

    p.map! { |value| value * n }

    small, large = p.each_index.partition { |i| p[i] < 1 }

    until small.empty?
      l = small.pop
      g = large.pop

      @prob[l] = p[l]
      @alias[l] = g

      p[g] -= 1 - p[l]
      (p[g] < 1 ? small : large) << g
    end
  end


  # :call-seq:
  #   distribution.rand              -> obj or nil
  #   distribution.rand(random: rng) -> obj or nil
  #
  # Return a value from the probability distribution, or +nil+ if it is empty.
  #
  # The optional rng argument will be used as the random number generator.
  #
  #   distribution = CategoricalDistribution.new({a: 1, b: 2, c: 3})
  #   distribution.rand      #=> :b
  #   distribution.rand      #=> :c
  #   distribution.rand      #=> :c

  def rand(random: Random)
    return if @prob.empty?

    i = random.rand(@prob.size)
    random.rand <= @prob[i] ? @p.keys[i] : @p.keys[@alias[i]]
  end


  # :call-seq:
  #   distribution.each { |item| block }  -> nil
  #   distribution.each                   -> Enumerator
  #
  # Calls the given block infinitely many times, passing a value from the
  # distribution as a parameter.
  #
  # An Enumerator is returned if no block is given.
  #
  #   distribution = CategoricalDistribution.new([1, 2, 3])
  #   distribution.each { |x| print x, " -- " }
  #
  # produces:
  #
  #  1 -- 2 -- 2 -- 0 -- 1 -- ... 
  
  def each(random: Random)
    return enum_for(:each) unless block_given?

    loop { yield rand } unless empty?
  end


  # Returns the distribution's values and their probabilities, as a hash of
  # objects to Rationals which sum to one, or an empty hash if the distribution is empty.
  #
  #   CategoricalDistribution.new([1, 1, 1]).probabilities   
  #     #=> {0 => (1/3), 1 => (1/3), 2 => (1/3)}
  #
  #   CategoricalDistribution.new.probabilities
  #     #=> {}

  def probabilities
    @p
  end


  # :call_seq:
  #   distribution == other_distribution    ->    bool
  #
  # Equality --- Two probability distributions are equal if they contain the
  # same values and if each value has the same probability as the corresponding
  # value in other_distribution.
  #
  #   CategoricalDistribution.new([1, 2])            == CategoricalDistribution.new([1, 2, 3])
  #     #=> false
  #
  #   CategoricalDistribution.new([1, 2, 3])         == CategoricalDistribution.new([1, 2, 3])
  #     #=> true
  #
  #   CategoricalDistribution.new([1, 2, 3])         == CategoricalDistribution.new([1, 1, 1])
  #     #=> false
  #
  #   CategoricalDistribution.new([1, 2], [:a, :b])  == CategoricalDistribution.new([1, 2], [:a, :c])
  #     #=> false

  def ==(other)
    self.probabilities == other.probabilities
  end


  # :call_seq:
  #   distribution.eql?(other)  -> true or false
  #
  # Returns true if self and other are the same object, or are both probability
  # distributions with the same values and probabilities.

  def eql?(other)
    self.equal?(other) || 
      (self.class == other.class && self.probabilities == other.probabilities)
  end


  def_delegators :@p, :empty?, :hash, :length, :size, :to_s

  ##
  # :method: empty?
  # :call_seq:
  #   distribution.empty?    -> true or false
  #
  # Returns true if self contains no values.
  #
  #   distribution = CategoricalDistribution.new
  #   distribution.empty?   #=> true

  ##
  # :method: hash
  # :call_seq:
  #   distribution.hash    -> fixnum
  #
  # Compute a hash-code for this probability distribution.
  #
  # Two distributions with the same values and probabilities will have the same
  # hash code (and will compare using #eql?).
  #
  # See also Hash#hash.

  ##
  # :method: length
  # :call_seq:
  #   distribution.length  -> int
  #
  # Returns the number of values in self. May be zero.
  #
  #   CategoricalDistribution.new([1, 2, 3, 4]).length    #=> 4
  #   CategoricalDistribution.new.length                  #=> 0
end