lib/classifier/string_extensions/word_hash.rb in classifier-1.0 vs lib/classifier/string_extensions/word_hash.rb in classifier-1.1
- old
+ new
@@ -10,15 +10,17 @@
end
def word_hash
d = Hash.new
corpus = without_punctuation
- (corpus.split + gsub(/[\w]/,"").split).each do |word|
+ (corpus.split + gsub(/[\w+]/,"").split).each do |word|
key = word.downcase.stem.intern
- d[key] ||= 0
- d[key] += 1
+ if !(word =~ /[\w+]/) || word.length > 2
+ d[key] ||= 0
+ d[key] += 1
+ end
end
return d
end
end
-end
\ No newline at end of file
+end