lib/lda-ruby/document/document.rb in lda-ruby-0.3.5 vs lib/lda-ruby/document/document.rb in lda-ruby-0.3.6
- old
+ new
@@ -1,5 +1,7 @@
+require 'yaml'
+
module Lda
class Document
attr_reader :corpus, :words, :counts, :length, :total, :tokens
def initialize(corpus)
@@ -27,10 +29,11 @@
def handle(tokens)
tokens
end
def tokenize(text)
- clean_text = text.gsub(/[^A-Za-z'\s]+/, ' ').gsub(/\s+/, ' ') # remove everything but letters and ' and leave only single spaces
+ clean_text = text.gsub(/[^A-Za-z'\s]+/, ' ').gsub(/\s+/, ' ').downcase # remove everything but letters and ' and leave only single spaces
@tokens = handle(clean_text.split(' '))
+ nil
end
end
-end
\ No newline at end of file
+end