lib/lda-ruby/document/document.rb in lda-ruby-0.3.5 vs lib/lda-ruby/document/document.rb in lda-ruby-0.3.6

- old
+ new

@@ -1,5 +1,7 @@ +require 'yaml' + module Lda class Document attr_reader :corpus, :words, :counts, :length, :total, :tokens def initialize(corpus) @@ -27,10 +29,11 @@ def handle(tokens) tokens end def tokenize(text) - clean_text = text.gsub(/[^A-Za-z'\s]+/, ' ').gsub(/\s+/, ' ') # remove everything but letters and ' and leave only single spaces + clean_text = text.gsub(/[^A-Za-z'\s]+/, ' ').gsub(/\s+/, ' ').downcase # remove everything but letters and ' and leave only single spaces @tokens = handle(clean_text.split(' ')) + nil end end -end \ No newline at end of file +end