lib/bae/classifier.rb in bae-0.1.0 vs lib/bae/classifier.rb in bae-0.2.0

- old
+ new

@@ -11,10 +11,12 @@ @label_index_sequence = -1 # start at -1 so 0 is first value @total_terms = 0.0 end def finish_training! + @frequency_table_size = @frequency_table.keys.size + calculate_likelihoods! calculate_priors! end def train(label, training_data) @@ -66,11 +68,11 @@ def classify_from_string(document) words = document.split.uniq likelihoods = @likelihoods.dup posterior = {} - vocab_size = frequency_table.keys.size + vocab_size = @frequency_table_size label_index.each do |label, index| words.map do |word| row = frequency_table[word] @@ -128,10 +130,10 @@ private def calculate_likelihoods! @likelihoods = label_index.inject({}) do |accumulator, (label, index)| initial_likelihood = 1.0 - vocab_size = frequency_table.keys.size + vocab_size = @frequency_table_size frequency_table.each do |feature, row| laplace_word_likelihood = (row[index] + 1.0).to_f / (label_instance_count[label] + vocab_size).to_f initial_likelihood *= (1.0 - laplace_word_likelihood) end