Class Ankusa::KLDivergenceClassifier
In: lib/ankusa/kl_divergence.rb
Parent: Object

Methods

Included Modules

Classifier

Public Instance methods

[Source]

# File lib/ankusa/kl_divergence.rb, line 6
    def classify(text, classes=nil)
      # return the class with the least distance from the word
      # distribution of the given text
      distances(text, classes).sort_by { |c| c[1] }.first.first
    end

Classes is an array of classes to look at

[Source]

# File lib/ankusa/kl_divergence.rb, line 14
    def distances(text, classnames=nil)
      classnames ||= @classnames
      distances = Hash.new 0

      th = TextHash.new(text)
      th.each { |word, count|
        thprob = count.to_f / th.length.to_f
        probs = get_word_probs(word, classnames)
        classnames.each { |k| 
          distances[k] += (thprob * Math.log(thprob / probs[k]) * count) 
        }
      }

      distances
    end

[Validate]