Sha256: 49991b531be6a7bee9c78360d31a0ba75cbcdae9b6e45afe9d9efe0b19b23ecb

Contents?: true

Size: 510 Bytes

Versions: 1

Compression:

Stored size: 510 Bytes

Contents

# Author::    Lucas Carlson  (mailto:lucas@rufy.com)
# Copyright:: Copyright (c) 2005 Lucas Carlson
# License::   LGPL

module Classifier

module WordHash  
  def without_punctuation
    tr( ',?.!;:\'"@#$%^&*()_=+[]{}\|<>/`~', " " )
  end
  
	def word_hash
		d = Hash.new
		corpus = without_punctuation
		(corpus.split + gsub(/[\w+]/,"").split).each do |word|
			key = word.downcase.stem.intern
			if !(word =~ /[\w+]/) || word.length > 2
				d[key] ||= 0
				d[key] += 1
			end
		end
		return d
	end
end

end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
classifier-1.1 lib/classifier/string_extensions/word_hash.rb