lib/bayes/fisher.rb in yanbi-ml-0.2.1 vs lib/bayes/fisher.rb in yanbi-ml-0.2.2

- old
+ new

@@ -3,20 +3,14 @@ # License:: MIT module Yanbi class Fisher < Bayes - - def classify(text) - max_score(text) do |cat, doc| - fisher_score(cat, doc) - end - end - + private - def fisher_score(category, document) + def score(category, document) features = document.words.uniq probs = features.map {|x| weighted_prob(x, category)} pscores = probs.reduce(&:*) score = -2 * Math.log(pscores) invchi2(score, features.count * 2) @@ -28,12 +22,11 @@ return 0 if sum.zero? wp / sum end def word_prob(cat, word) - all_word_count = @category_counts[cat].values.reduce(&:+) count = @category_counts[cat].has_key?(word) ? @category_counts[cat][word].to_f : 0 - count / all_word_count + count / @category_sizes[cat] end def weighted_prob(word, category, basicprob=nil, weight=1.0, ap=0.5) basicprob = category_prob(category, word) totals = @category_counts.inject(0) {|sum, cat| sum += cat.last[word].to_i}