Sha256: e2f27d2098fe8249179f251e2ee1bf358a155e5c265e29ffb52cc84c0a266f22
Contents?: true
Size: 608 Bytes
Versions: 2
Compression:
Stored size: 608 Bytes
Contents
module Lda class TextDocument < Document def initialize(corpus, text) super(corpus) tokenize(text) build_from_tokens end def has_text? true end def self.build_from_file(corpus, filename) text = File.open(filename, 'r') { |f| f.read } self.new(corpus, text) end protected def build_from_tokens vocab = Hash.new(0) @tokens.each { |t| vocab[t] = vocab[t] + 1 } vocab.each_pair do |word, count| @words << @corpus.vocabulary.check_word(word) @counts << count end recompute end end end
Version data entries
2 entries across 2 versions & 1 rubygems
Version | Path |
---|---|
ealdent-lda-ruby-0.3.0 | lib/lda-ruby/document/text_document.rb |
ealdent-lda-ruby-0.3.1 | lib/lda-ruby/document/text_document.rb |