lib/classifier/extensions/word_hash.rb in classifier-1.4.2 vs lib/classifier/extensions/word_hash.rb in classifier-1.4.3

- old
+ new

@@ -1,9 +1,11 @@ # Author:: Lucas Carlson (mailto:lucas@rufy.com) # Copyright:: Copyright (c) 2005 Lucas Carlson # License:: LGPL +require 'set' + # These are extensions to the String class to provide convenience # methods for the Classifier package. class String # Removes common punctuation symbols, returning a new string. # E.g., @@ -43,88 +45,88 @@ d[word.intern] += 1 end d end - CORPUS_SKIP_WORDS = Set.new(%w[ - a - again - all - along - are - also - an - and - as - at - but - by - came - can - cant - couldnt - did - didn - didnt - do - doesnt - dont - ever - first - from - have - her - here - him - how - i - if - in - into - is - isnt - it - itll - just - last - least - like - most - my - new - no - not - now - of - on - or - should - sinc - so - some - th - than - this - that - the - their - then - those - to - told - too - true - try - until - url - us - were - when - whether - while - with - within - yes - you - youll - ]) + CORPUS_SKIP_WORDS = ::Set.new(%w[ + a + again + all + along + are + also + an + and + as + at + but + by + came + can + cant + couldnt + did + didn + didnt + do + doesnt + dont + ever + first + from + have + her + here + him + how + i + if + in + into + is + isnt + it + itll + just + last + least + like + most + my + new + no + not + now + of + on + or + should + sinc + so + some + th + than + this + that + the + their + then + those + to + told + too + true + try + until + url + us + were + when + whether + while + with + within + yes + you + youll + ]) end