Sha256: c34b7dda375f97208135cdbd330b4e6a27982b08a2c421845ae3f4af7b24a410

Contents?: true

Size: 1.01 KB

Versions: 8

Compression:

Stored size: 1.01 KB

Contents

module Lda
  class Vocabulary
    attr_reader :words, :indexes

    def initialize(words = nil)
      @words = Hash.new do |hash, key|
        if hash.member?(:MAX_VALUE)
          hash[:MAX_VALUE] = hash[:MAX_VALUE] + 1
        else
          hash[:MAX_VALUE] = 1
        end
        hash[key] = hash[:MAX_VALUE]
      end

      words.each { |w| @words[w] } if words
      @indexes = Hash.new

      @words.each_pair do |w, i|
        @indexes[i] = w
      end
    end

    def check_word(word)
      w = @words[word.dup]
      @indexes[w] = word.dup
      w
    end

    def load_file(filename)
      txt = File.open(filename, 'r') { |f| f.read }
      txt.split(/[\n\r]+/).each { |word| check_word(word) }
    end

    def load_yaml(filename)
      YAML::load_file(filename).each { |word| check_word(word) }
    end

    def num_words
      ((@words.size > 0) ? @words.size - 1 : 0 )
    end

    def to_a
      @words.sort { |w1, w2| w1[1] <=> w2[1] }.map { |word, idx| word }.reject { |w| w == :MAX_VALUE }
    end
  end
end

Version data entries

8 entries across 8 versions & 2 rubygems

Version Path
lda-ruby-0.3.9 lib/lda-ruby/vocabulary.rb
ealdent-lda-ruby-0.3.1 lib/lda-ruby/vocabulary.rb
lda-ruby-0.3.8 lib/lda-ruby/vocabulary.rb
lda-ruby-0.3.7 lib/lda-ruby/vocabulary.rb
lda-ruby-0.3.6 lib/lda-ruby/vocabulary.rb
lda-ruby-0.3.5 lib/lda-ruby/vocabulary.rb
lda-ruby-0.3.4 lib/lda-ruby/vocabulary.rb
lda-ruby-0.3.1 lib/lda-ruby/vocabulary.rb