Sha256: 9a770da9b57a753e875d89e65cfbb26f6c520b0f6bb5bad15018ab1ec9336775

Contents?: true

Size: 1.44 KB

Versions: 2

Compression:

Stored size: 1.44 KB

Contents

# Author::    Robert Dormer (mailto:rdormer@gmail.com)
# Copyright:: Copyright (c) 2016 Robert Dormer
# License::   MIT

module Yanbi

  class Fisher < Bayes
  
    def classify(text)
      max_score(text) do |cat, doc|
        fisher_score(cat, doc)
      end
    end
  
    private
  
    def fisher_score(category, document)
      features = document.words.uniq
      probs = features.map {|x| weighted_prob(x, category)}
      pscores = probs.reduce(&:*)
      score = -2 * Math.log(pscores)
      invchi2(score, features.count * 2)
    end

    def category_prob(cat, word)
      wp = word_prob(cat, word)
      sum = @categories.inject(0) {|s,c| s + word_prob(c, word)}
      return 0 if sum.zero?
      wp / sum
    end

    def word_prob(cat, word)
      all_word_count = @category_counts[cat].values.reduce(&:+)
      count = @category_counts[cat].has_key?(word) ? @category_counts[cat][word].to_f : 0 
      count / all_word_count
    end

    def weighted_prob(word, category, basicprob=nil, weight=1.0, ap=0.5)
      basicprob = category_prob(category, word)
      totals = @category_counts.inject(0) {|sum, cat| sum += cat.last[word].to_i}
      ((weight * ap) + (totals*basicprob)) / (weight + totals)
    end

    def invchi2(chi, df)
      m = chi / 2.0
      sum = Math.exp(-m)
      term = Math.exp(-m)
  
      (1..df/2).each do |i|
        term *= (m / i)
        sum += term
      end
  
      [sum, 1.0].min 
      
    rescue
      1.0 
    end
  end

end

Version data entries

2 entries across 2 versions & 1 rubygems

Version Path
yanbi-ml-0.2.1 lib/bayes/fisher.rb
yanbi-ml-0.2.0 lib/bayes/fisher.rb