lib/classifier/extensions/word_hash.rb in classifier-1.4.2 vs lib/classifier/extensions/word_hash.rb in classifier-1.4.3
- old
+ new
@@ -1,9 +1,11 @@
# Author:: Lucas Carlson (mailto:lucas@rufy.com)
# Copyright:: Copyright (c) 2005 Lucas Carlson
# License:: LGPL
+require 'set'
+
# These are extensions to the String class to provide convenience
# methods for the Classifier package.
class String
# Removes common punctuation symbols, returning a new string.
# E.g.,
@@ -43,88 +45,88 @@
d[word.intern] += 1
end
d
end
- CORPUS_SKIP_WORDS = Set.new(%w[
- a
- again
- all
- along
- are
- also
- an
- and
- as
- at
- but
- by
- came
- can
- cant
- couldnt
- did
- didn
- didnt
- do
- doesnt
- dont
- ever
- first
- from
- have
- her
- here
- him
- how
- i
- if
- in
- into
- is
- isnt
- it
- itll
- just
- last
- least
- like
- most
- my
- new
- no
- not
- now
- of
- on
- or
- should
- sinc
- so
- some
- th
- than
- this
- that
- the
- their
- then
- those
- to
- told
- too
- true
- try
- until
- url
- us
- were
- when
- whether
- while
- with
- within
- yes
- you
- youll
- ])
+ CORPUS_SKIP_WORDS = ::Set.new(%w[
+ a
+ again
+ all
+ along
+ are
+ also
+ an
+ and
+ as
+ at
+ but
+ by
+ came
+ can
+ cant
+ couldnt
+ did
+ didn
+ didnt
+ do
+ doesnt
+ dont
+ ever
+ first
+ from
+ have
+ her
+ here
+ him
+ how
+ i
+ if
+ in
+ into
+ is
+ isnt
+ it
+ itll
+ just
+ last
+ least
+ like
+ most
+ my
+ new
+ no
+ not
+ now
+ of
+ on
+ or
+ should
+ sinc
+ so
+ some
+ th
+ than
+ this
+ that
+ the
+ their
+ then
+ those
+ to
+ told
+ too
+ true
+ try
+ until
+ url
+ us
+ were
+ when
+ whether
+ while
+ with
+ within
+ yes
+ you
+ youll
+ ])
end