Sha256: 3a9e9bee98f546bf8e9ae693a2612bba5e57294f2908d47f4d33ce250e347f86

Contents?: true

Size: 1.31 KB

Versions: 1

Compression:

Stored size: 1.31 KB

Contents

# Author::    Lucas Carlson  (mailto:lucas@rufy.com)
# Copyright:: Copyright (c) 2005 Lucas Carlson
# License::   LGPL

module Classifier

class Bayes
	def initialize(*categories)
		@categories = Hash.new
		categories.each { |category| @categories[category.capitalize.intern] = Hash.new }
		@total_words = 0
	end

	def classify(text)
		(classifications(text).sort { |a, b| b[1] <=> a[1] })[0][0]
	end
	
	def classifications(text)
		score = Hash.new
		@categories.each do |category, category_words|
			score[category.to_s] = 0
			total = category_words.values.inject(0) {|sum, element| sum+element}
			text.word_hash.each do |word, count|
				s = category_words.has_key?(word) ? category_words[word] : 0.1
				score[category.to_s] += Math.log(s/total.to_f)
			end
		end
		return score
	end

	def method_missing(name, *args)
		category = name.to_s.gsub(/train_([\w]+)/, '\1').capitalize.intern
		if @categories.has_key? category
			args.each {|text| add_words category, text}
		elsif name.to_s =~ /train_([\w]+)/
			raise StandardError, "No such category: #{category}"
		else
			raise StandardError, "No such method: #{name}"
		end
	end

	private

	def add_words(category, text)
		text.word_hash.each do |word, count|
			@categories[category][word]	||=	0
			@categories[category][word]	 +=	count
			@total_words += count
		end
	end
end

end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
classifier-1.0 lib/classifier/bayes.rb