Sha256: 296d2fd3d5390f17c91e057eaef47f4fb3f09fe0de050e8e213e14aba8bd79b5
Contents?: true
Size: 554 Bytes
Versions: 9
Compression:
Stored size: 554 Bytes
Contents
module Lda class DirectoryCorpus < Corpus attr_reader :path, :extension # load documents from a directory def initialize(path, extension = nil) super() @path = path.dup.freeze @extension = extension ? extension.dup.freeze : nil load_from_directory end protected def load_from_directory dir_glob = File.join(@path, (@extension ? "*.#{@extension}" : "*")) Dir.glob(dir_glob).each do |filename| add_document(TextDocument.build_from_file(self, filename)) end end end end
Version data entries
9 entries across 9 versions & 2 rubygems