lib/lda.rb in ealdent-lda-ruby-0.2.2 vs lib/lda.rb in ealdent-lda-ruby-0.2.3

- old
+ new

@@ -28,14 +28,14 @@ # an svmlight-style formatted line with the first element # being the number of words, or it can be a Document object. def add_document(doc) if doc.is_a?(Document) @documents << doc - @all_terms = @all_terms + doc.words + @all_terms += doc.words elsif doc.is_a?(String) d = Document.new(doc) - @all_terms = @all_terms + d.words + @all_terms += d.words @documents << d end @num_docs += 1 @num_terms = @all_terms.size true @@ -49,15 +49,29 @@ end end true end end + + class BaseDocument + def words + raise NotSupportedError + end + + def length + raise NotSupportedError + end + + def total + raise NotSupportedError + end + end # # A single document. # - class Document + class Document < BaseDocument attr_accessor :words, :counts attr_reader :length, :total # Create the Document using the svmlight-style text line: # @@ -220,30 +234,24 @@ unless @vocab puts "No vocabulary loaded." return nil end - # Load the model - beta = self.beta - unless beta - puts "Model has not been run." - return nil - end - # find the highest scoring words per topic topics = Hash.new - indices = (0..(@vocab.size - 1)).to_a - topic_num = 0 - beta.each do |topic| - topics[topic_num] = Array.new - indices.sort! {|x, y| -(topic[x] <=> topic[y])} - words_per_topic.times do |i| - topics[topic_num] << @vocab[indices[i]] + indices = (0...@vocab.size).to_a + + begin + beta.each_with_index do |topic, topic_idx| + indices.sort! {|x, y| -(topic[x] <=> topic[y])} + topics[topic_idx] = indices.first(words_per_topic).map { |i| @vocab[i] } end - topic_num += 1 + rescue NoMethodError + puts "Error: model has not been run." + topics = nil end - + topics end # @@ -306,6 +314,6 @@ end end end # load the c-side stuff -require 'lda_ext' \ No newline at end of file +require 'lda_ext'