Sha256: 77e78e99aec5d82cbd00407c0c60b1e434ce0cb66823f24676cc1e10686e0538

Contents?: true

Size: 750 Bytes

Versions: 12

Compression:

Stored size: 750 Bytes

Contents

module Ankusa

  class KLDivergenceClassifier
    include Classifier

    def classify(text, classes=nil)
      # return the class with the least distance from the word
      # distribution of the given text
      distances(text, classes).sort_by { |c| c[1] }.first.first
    end
    

    # Classes is an array of classes to look at
    def distances(text, classnames=nil)
      classnames ||= @classnames
      distances = Hash.new 0

      th = TextHash.new(text)
      th.each { |word, count|
        thprob = count.to_f / th.length.to_f
        probs = get_word_probs(word, classnames)
        classnames.each { |k| 
          distances[k] += (thprob * Math.log(thprob / probs[k]) * count) 
        }
      }

      distances
    end
  end

end

Version data entries

12 entries across 12 versions & 1 rubygems

Version Path
ankusa-0.1.1 lib/ankusa/kl_divergence.rb
ankusa-0.1.0 lib/ankusa/kl_divergence.rb
ankusa-0.0.16 lib/ankusa/kl_divergence.rb
ankusa-0.0.15 lib/ankusa/kl_divergence.rb
ankusa-0.0.14 lib/ankusa/kl_divergence.rb
ankusa-0.0.13 lib/ankusa/kl_divergence.rb
ankusa-0.0.12 lib/ankusa/kl_divergence.rb
ankusa-0.0.11 lib/ankusa/kl_divergence.rb
ankusa-0.0.10 lib/ankusa/kl_divergence.rb
ankusa-0.0.9 lib/ankusa/kl_divergence.rb
ankusa-0.0.8 lib/ankusa/kl_divergence.rb
ankusa-0.0.7 lib/ankusa/kl_divergence.rb