lib/ankusa/naive_bayes.rb in ankusa-0.1.0 vs lib/ankusa/naive_bayes.rb in ankusa-0.1.1
- old
+ new
@@ -37,11 +37,11 @@
result = Hash.new 0
TextHash.new(text).each { |word, count|
probs = get_word_probs(word, classnames)
classnames.each { |k|
- # log likelihood should be negative infinity if we've never seen the klass
- result[k] += probs[k] > 0 ? (Math.log(probs[k]) * count) : -INFTY
+ # Choose a really small probability if the word has never been seen before in class k
+ result[k] += Math.log(probs[k] > 0 ? (probs[k] * count) : Float::EPSILON)
}
}
# add the prior
doc_counts = doc_count_totals.select { |k,v| classnames.include? k }.map { |k,v| v }