Sha256: 1b3fd1caf8c0c4a8013377cf3cc7312b813240308fb22ed1dcf12f9044aa5ae6
Contents?: true
Size: 756 Bytes
Versions: 3
Compression:
Stored size: 756 Bytes
Contents
module Lda class TextDocument < Document attr_reader :filename def initialize(corpus, text) super(corpus) @filename = nil tokenize(text) @corpus.stopwords.each { |w| @tokens.delete(w) } build_from_tokens end def has_text? true end def self.build_from_file(corpus, filename) @filename = filename.dup.freeze text = File.open(@filename, 'r') { |f| f.read } self.new(corpus, text) end protected def build_from_tokens vocab = Hash.new(0) @tokens.each { |t| vocab[t] = vocab[t] + 1 } vocab.each_pair do |word, count| @words << @corpus.vocabulary.check_word(word) - 1 @counts << count end recompute end end end
Version data entries
3 entries across 3 versions & 1 rubygems
Version | Path |
---|---|
lda-ruby-0.3.9 | lib/lda-ruby/document/text_document.rb |
lda-ruby-0.3.8 | lib/lda-ruby/document/text_document.rb |
lda-ruby-0.3.7 | lib/lda-ruby/document/text_document.rb |