Sha256: 98181a383a1001f9a4b1b8e26f992c6aeecdc46a9908e4a66293cce0c1f1a129
Contents?: true
Size: 650 Bytes
Versions: 5
Compression:
Stored size: 650 Bytes
Contents
require 'set' module Lda class Corpus attr_reader :documents, :num_docs, :num_terms, :vocabulary def initialize @documents = Array.new @all_terms = Set.new @num_terms = @num_docs = 0 @vocabulary = Vocabulary.new end def add_document(doc) raise 'Parameter +doc+ must be of type Document' unless doc.kind_of?(Document) @documents << doc @all_terms += doc.words @num_docs += 1 @num_terms = @all_terms.size update_vocabulary(doc) nil end protected def update_vocabulary(doc) doc.tokens.each { |w| @vocabulary.check_word(w) } end end end
Version data entries
5 entries across 5 versions & 2 rubygems