class NaiveBayesClassifier
  attr_reader :num_categories, :prior_token_count, :prior_category_counts
  attr_accessor :category_names
  
  # Parameters
  # ----------
  # num_categories: number of categories we want to classify.
  # prior_category_counts: array of parameters for a Dirichlet prior that we place on the prior probabilities of each category. (In other words, these are "virtual counts" of the number of times we have seen each category previously.) Set the array to all 0's if you want to use maximum likelihood estimates. Defaults to uniform reals from the unit interval if nothing is set.
  # prior_token_count: parameter for a beta prior that we place on p(token|category). (In other words, this is a "virtual count" of the number of times we have seen each token previously.) Set to 0 if you want to use maximum likelihood estimates.
  def initialize(options = {})
    options = {:num_categories => 2,
               :prior_token_count => 0.0001}.merge(options)

    @num_categories = options[:num_categories]
    @prior_token_count = options[:prior_token_count]
    @prior_category_counts = options[:prior_category_counts] || Array.new(@num_categories) { rand }
    @category_names = options[:category_names] || (0..num_categories-1).map(&:to_s).to_a
    
    # `@token_counts[category][token]` is the (weighted) number of times we have seen `token` with this category.
    @token_counts = Array.new(@num_categories) do
      Hash.new { |h, token| h[token] = 0 }
    end
    
    # `@total_token_counts[category]` is always equal to `@token_counts[category].sum`.
    @total_token_counts = Array.new(@num_categories, 0)
    
    # `@category_counts[category]` is the (weighted) number of training examples we have seen with this category.
    @category_counts = Array.new(@num_categories, 0)
  end    
  
  # Given a labeled training example (i.e., an array of tokens and its probability of belonging to a certain category), update the parameters of the Naive Bayes model.
  # Parameters
  # ----------
  # example: an array of tokens.
  # category_index: the index of the category this example belongs to.
  # probability: the probability that the example belongs to the category.
  # 
  def train(example, category_index, probability = 1)
    example.each do |token|
      @token_counts[category_index][token] += probability
      @total_token_counts[category_index] += probability
    end
    @category_counts[category_index] += probability
  end
  
  # Performs a Naive Bayes EM algorithm with two classes.
  def self.train_em(max_epochs, training_examples)
    prev_classifier = NaiveBayesClassifier.new
    max_epochs.times do
      classifier = NaiveBayesClassifier.new
    
      # E-M training
      training_examples.each do |example|
        # E-step: for each training example, recompute its classification probabilities.
        posterior_category_probs = prev_classifier.get_posterior_category_probabilities(example) 
              
        # M-step: for each category, recompute the probability of generating each token.
        posterior_category_probs.each_with_index do |p, category|
          classifier.train(example, category, p) 
        end
      end    
      prev_classifier = classifier    
      # TODO: add a convergence check, so we can break out early if we want.
    end
    return prev_classifier
  end  
  
  # Returns the *index* (not the name) of the category the tokens are classified under.
  def classify(tokens)
    max_prob, max_category = -1, -1

    if tokens.empty?      
      # If the example is empty, find the category with the highest prior probability.
      (0..@num_categories - 1).each do |i|
        prior_prob = get_prior_category_probability(i)
        max_prob, max_category = prior_prob, i if prior_prob > max_prob
      end
    else
      # Otherwise, find the category with the highest posterior probability.
      get_posterior_category_probabilities(tokens).each_with_index do |prob, category|
        max_prob, max_category = prob, category if prob > max_prob
      end
    end
    
    return max_category
  end
  
  # Returns p(category | token), for each category, in an array.
  def get_posterior_category_probabilities(tokens)
    unnormalized_posterior_probs = (0..@num_categories-1).map do |category|
      p = tokens.map { |token| get_token_probability(token, category) }.reduce(:*) # p(tokens | category)
      p * get_prior_category_probability(category) # p(tokens | category) * p(category)
    end
    normalization = unnormalized_posterior_probs.reduce(:+)
    normalization = 1 if normalization == 0
    return unnormalized_posterior_probs.map{ |p| p / normalization }
  end    
  
  # Returns p(token | category).
  def get_token_probability(token, category_index)
    denom = @total_token_counts[category_index] + @token_counts[category_index].size * @prior_token_count    
    if denom == 0
      return 0
    else
      return ((@token_counts[category_index][token] || 0) + @prior_token_count).to_f / denom
    end
  end
  
  # Returns p(category).
  def get_prior_category_probability(category_index)
    denom = @category_counts.reduce(:+) + @prior_category_counts.reduce(:+)
    if denom == 0
      return 0
    else
      return (@category_counts[category_index] + @prior_category_counts[category_index]).to_f / denom
    end
  end  
end