lib/bayes/fisher.rb in yanbi-ml-0.2.1 vs lib/bayes/fisher.rb in yanbi-ml-0.2.2
- old
+ new
@@ -3,20 +3,14 @@
# License:: MIT
module Yanbi
class Fisher < Bayes
-
- def classify(text)
- max_score(text) do |cat, doc|
- fisher_score(cat, doc)
- end
- end
-
+
private
- def fisher_score(category, document)
+ def score(category, document)
features = document.words.uniq
probs = features.map {|x| weighted_prob(x, category)}
pscores = probs.reduce(&:*)
score = -2 * Math.log(pscores)
invchi2(score, features.count * 2)
@@ -28,12 +22,11 @@
return 0 if sum.zero?
wp / sum
end
def word_prob(cat, word)
- all_word_count = @category_counts[cat].values.reduce(&:+)
count = @category_counts[cat].has_key?(word) ? @category_counts[cat][word].to_f : 0
- count / all_word_count
+ count / @category_sizes[cat]
end
def weighted_prob(word, category, basicprob=nil, weight=1.0, ap=0.5)
basicprob = category_prob(category, word)
totals = @category_counts.inject(0) {|sum, cat| sum += cat.last[word].to_i}