# Author:: Lucas Carlson (mailto:lucas@rufy.com) # Copyright:: Copyright (c) 2005 Lucas Carlson # License:: LGPL module Classifier module WordHash def without_punctuation tr( ',?.!;:\'"@#$%^&*()_=+[]{}\|<>/`~', " " ) end def word_hash d = Hash.new corpus = without_punctuation (corpus.split + gsub(/[\w]/,"").split).each do |word| key = word.downcase.stem.intern d[key] ||= 0 d[key] += 1 end return d end end end