Sha256: 77e78e99aec5d82cbd00407c0c60b1e434ce0cb66823f24676cc1e10686e0538
Contents?: true
Size: 750 Bytes
Versions: 12
Compression:
Stored size: 750 Bytes
Contents
module Ankusa class KLDivergenceClassifier include Classifier def classify(text, classes=nil) # return the class with the least distance from the word # distribution of the given text distances(text, classes).sort_by { |c| c[1] }.first.first end # Classes is an array of classes to look at def distances(text, classnames=nil) classnames ||= @classnames distances = Hash.new 0 th = TextHash.new(text) th.each { |word, count| thprob = count.to_f / th.length.to_f probs = get_word_probs(word, classnames) classnames.each { |k| distances[k] += (thprob * Math.log(thprob / probs[k]) * count) } } distances end end end
Version data entries
12 entries across 12 versions & 1 rubygems