Sha256: 34c2d962836684a45575a5c5bd9cbb01e177e7ef7eed24afe3b8e68242ed65f2
Contents?: true
Size: 1.35 KB
Versions: 1
Compression:
Stored size: 1.35 KB
Contents
# Author:: Lucas Carlson (mailto:lucas@rufy.com) # Copyright:: Copyright (c) 2005 Lucas Carlson # License:: LGPL module Classifier class Bayes def initialize(*categories) @categories = Hash.new categories.each { |category| @categories[category.capitalize.intern] = Hash.new } @total_words = 0 end def classify(text) (classifications(text).sort_by { |a| -a[1] })[0][0] end def classifications(text) score = Hash.new @categories.each do |category, category_words| score[category.to_s] = 0 total = category_words.values.inject(0) {|sum, element| sum+element} text.word_hash.each do |word, count| s = category_words.has_key?(word) ? category_words[word] : 0.1 score[category.to_s] += Math.log(s/total.to_f) end end return score end def method_missing(name, *args) category = name.to_s.gsub(/train_([\w]+)/, '\1').gsub("_"," ").capitalize.intern if @categories.has_key? category args.each {|text| train category, text} elsif name.to_s =~ /train_([\w]+)/ raise StandardError, "No such category: #{category}" else raise StandardError, "No such method: #{name}" end end def train(category, text) category = category.to_s.gsub("_"," ").capitalize.intern text.word_hash.each do |word, count| @categories[category][word] ||= 0 @categories[category][word] += count @total_words += count end end end end
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
classifier-1.1 | lib/classifier/bayes.rb |