Sha256: 9a770da9b57a753e875d89e65cfbb26f6c520b0f6bb5bad15018ab1ec9336775
Contents?: true
Size: 1.44 KB
Versions: 2
Compression:
Stored size: 1.44 KB
Contents
# Author:: Robert Dormer (mailto:rdormer@gmail.com) # Copyright:: Copyright (c) 2016 Robert Dormer # License:: MIT module Yanbi class Fisher < Bayes def classify(text) max_score(text) do |cat, doc| fisher_score(cat, doc) end end private def fisher_score(category, document) features = document.words.uniq probs = features.map {|x| weighted_prob(x, category)} pscores = probs.reduce(&:*) score = -2 * Math.log(pscores) invchi2(score, features.count * 2) end def category_prob(cat, word) wp = word_prob(cat, word) sum = @categories.inject(0) {|s,c| s + word_prob(c, word)} return 0 if sum.zero? wp / sum end def word_prob(cat, word) all_word_count = @category_counts[cat].values.reduce(&:+) count = @category_counts[cat].has_key?(word) ? @category_counts[cat][word].to_f : 0 count / all_word_count end def weighted_prob(word, category, basicprob=nil, weight=1.0, ap=0.5) basicprob = category_prob(category, word) totals = @category_counts.inject(0) {|sum, cat| sum += cat.last[word].to_i} ((weight * ap) + (totals*basicprob)) / (weight + totals) end def invchi2(chi, df) m = chi / 2.0 sum = Math.exp(-m) term = Math.exp(-m) (1..df/2).each do |i| term *= (m / i) sum += term end [sum, 1.0].min rescue 1.0 end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
yanbi-ml-0.2.1 | lib/bayes/fisher.rb |
yanbi-ml-0.2.0 | lib/bayes/fisher.rb |