Sha256: f07f7cb2a49ab1ffc345f2035f5b996617e7c74f326f968872074dc01d58f7c8

Contents?: true

Size: 885 Bytes

Versions: 1

Compression:

Stored size: 885 Bytes

Contents

module Lda
  class Vocabulary
    attr_reader :words

    def initialize(words = nil)
      @words = Hash.new do |hash, key|
        if hash.member?(:MAX_VALUE)
          hash[:MAX_VALUE] = hash[:MAX_VALUE] + 1
        else
          hash[:MAX_VALUE] = 1
        end
        hash[key] = hash[:MAX_VALUE]
      end

      words.each { |w| @words[w] } if words
    end

    def check_word(word)
      @words[word.dup]
    end

    def load_file(filename)
      txt = File.open(filename, 'r') { |f| f.read }
      txt.split(/[\n\r]+/).each { |word| check_word(word) }
    end

    def load_yaml(filename)
      YAML::load_file(filename).each { |word| check_word(word) }
    end

    def num_words
      ((@words.size > 0) ? @words.size - 1 : 0 )
    end

    def to_a
      @words.sort { |w1, w2| w1[1] <=> w2[1] }.map { |word, idx| word }.reject { |w| w == :MAX_VALUE }
    end
  end
end

Version data entries

1 entries across 1 versions & 1 rubygems

Version Path
ealdent-lda-ruby-0.3.0 lib/lda-ruby/vocabulary.rb