lib/bae/classifier.rb in bae-0.1.0 vs lib/bae/classifier.rb in bae-0.2.0
- old
+ new
@@ -11,10 +11,12 @@
@label_index_sequence = -1 # start at -1 so 0 is first value
@total_terms = 0.0
end
def finish_training!
+ @frequency_table_size = @frequency_table.keys.size
+
calculate_likelihoods!
calculate_priors!
end
def train(label, training_data)
@@ -66,11 +68,11 @@
def classify_from_string(document)
words = document.split.uniq
likelihoods = @likelihoods.dup
posterior = {}
- vocab_size = frequency_table.keys.size
+ vocab_size = @frequency_table_size
label_index.each do |label, index|
words.map do |word|
row = frequency_table[word]
@@ -128,10 +130,10 @@
private
def calculate_likelihoods!
@likelihoods = label_index.inject({}) do |accumulator, (label, index)|
initial_likelihood = 1.0
- vocab_size = frequency_table.keys.size
+ vocab_size = @frequency_table_size
frequency_table.each do |feature, row|
laplace_word_likelihood = (row[index] + 1.0).to_f / (label_instance_count[label] + vocab_size).to_f
initial_likelihood *= (1.0 - laplace_word_likelihood)
end