lib/lda.rb in ealdent-lda-ruby-0.2.2 vs lib/lda.rb in ealdent-lda-ruby-0.2.3
- old
+ new
@@ -28,14 +28,14 @@
# an svmlight-style formatted line with the first element
# being the number of words, or it can be a Document object.
def add_document(doc)
if doc.is_a?(Document)
@documents << doc
- @all_terms = @all_terms + doc.words
+ @all_terms += doc.words
elsif doc.is_a?(String)
d = Document.new(doc)
- @all_terms = @all_terms + d.words
+ @all_terms += d.words
@documents << d
end
@num_docs += 1
@num_terms = @all_terms.size
true
@@ -49,15 +49,29 @@
end
end
true
end
end
+
+ class BaseDocument
+ def words
+ raise NotSupportedError
+ end
+
+ def length
+ raise NotSupportedError
+ end
+
+ def total
+ raise NotSupportedError
+ end
+ end
#
# A single document.
#
- class Document
+ class Document < BaseDocument
attr_accessor :words, :counts
attr_reader :length, :total
# Create the Document using the svmlight-style text line:
#
@@ -220,30 +234,24 @@
unless @vocab
puts "No vocabulary loaded."
return nil
end
- # Load the model
- beta = self.beta
- unless beta
- puts "Model has not been run."
- return nil
- end
-
# find the highest scoring words per topic
topics = Hash.new
- indices = (0..(@vocab.size - 1)).to_a
- topic_num = 0
- beta.each do |topic|
- topics[topic_num] = Array.new
- indices.sort! {|x, y| -(topic[x] <=> topic[y])}
- words_per_topic.times do |i|
- topics[topic_num] << @vocab[indices[i]]
+ indices = (0...@vocab.size).to_a
+
+ begin
+ beta.each_with_index do |topic, topic_idx|
+ indices.sort! {|x, y| -(topic[x] <=> topic[y])}
+ topics[topic_idx] = indices.first(words_per_topic).map { |i| @vocab[i] }
end
- topic_num += 1
+ rescue NoMethodError
+ puts "Error: model has not been run."
+ topics = nil
end
-
+
topics
end
#
@@ -306,6 +314,6 @@
end
end
end
# load the c-side stuff
-require 'lda_ext'
\ No newline at end of file
+require 'lda_ext'