lib/ai4r/classifiers/naive_bayes.rb in ai4r-1.12 vs lib/ai4r/classifiers/naive_bayes.rb in ai4r-1.13
- old
+ new
@@ -55,12 +55,12 @@
# b.eval(["Red", "SUV", "Domestic"])
#
class NaiveBayes < Classifier
- parameters_info :m => "Default value is set to 0. It may be set to a value greater than " +
- "0 when the size of the dataset is relatively small"
+ parameters_info :m => 'Default value is set to 0. It may be set to a value greater than ' +
+ '0 when the size of the dataset is relatively small'
def initialize
@m = 0
@class_counts = []
@class_prob = [] # stores the probability of the classes
@@ -73,11 +73,11 @@
# You can evaluate new data, predicting its category.
# e.g.
# b.eval(["Red", "SUV", "Domestic"])
# => 'No'
def eval(data)
- prob = @class_prob.map {|cp| cp}
+ prob = @class_prob.dup
prob = calculate_class_probabilities_for_entry(data, prob)
index_to_klass(prob.index(prob.max))
end
# Calculates the probabilities for the data entry Data.
@@ -88,31 +88,32 @@
# Probability is <= 1 and of type Float.
# e.g.
# b.get_probability_map(["Red", "SUV", "Domestic"])
# => {"Yes"=>0.4166666666666667, "No"=>0.5833333333333334}
def get_probability_map(data)
- prob = @class_prob.map {|cp| cp}
+ prob = @class_prob.dup
prob = calculate_class_probabilities_for_entry(data, prob)
prob = normalize_class_probability prob
probability_map = {}
prob.each_with_index { |p, i| probability_map[index_to_klass(i)] = p }
- return probability_map
+
+ probability_map
end
# counts values of the attribute instances and calculates the probability of the classes
# and the conditional probabilities
# Parameter data has to be an instance of CsvDataSet
def build(data)
- raise "Error instance must be passed" unless data.is_a?(DataSet)
- raise "Data should not be empty" if data.data_items.length == 0
+ raise 'Error instance must be passed' unless data.is_a?(Ai4r::Data::DataSet)
+ raise 'Data should not be empty' if data.data_items.length == 0
initialize_domain_data(data)
initialize_klass_index
initialize_pc
calculate_probabilities
- return self
+ self
end
private
def initialize_domain_data(data)
@@ -126,29 +127,31 @@
# calculates the klass probability of a data entry
# as usual, the probability of the value is multiplied with every conditional
# probability of every attribute in condition to a specific class
# this is repeated for every class
def calculate_class_probabilities_for_entry(data, prob)
- prob.each_with_index do |prob_entry, prob_index|
+ 0.upto(prob.length - 1) do |prob_index|
data.each_with_index do |att, index|
next if value_index(att, index).nil?
prob[prob_index] *= @pcp[index][value_index(att, index)][prob_index]
end
end
+
+ prob
end
# normalises the array of probabilities so the sum of the array equals 1
def normalize_class_probability(prob)
prob_sum = sum(prob)
prob_sum > 0 ?
- prob.map {|prob_entry| prob_entry / prob_sum } :
+ prob.map { |prob_entry| prob_entry / prob_sum } :
prob
end
# sums an array up; returns a number of type Float
def sum(array)
- array.inject(0.0){|b, i| b+i}
+ array.inject(0.0) { |b, i| b + i }
end
# returns the name of the class when the index is found
def index_to_klass(index)
@klass_index.has_value?(index) ? @klass_index.key(index) : nil
@@ -158,11 +161,11 @@
def initialize_klass_index
@klasses.each_with_index do |dl, index|
@klass_index[dl] = index
end
- @data_labels.each_with_index do |dl, index|
+ 0.upto(@data_labels.length - 1) do |index|
@values[index] = {}
@domains[index].each_with_index do |d, d_index|
@values[index][d] = d_index
end
end
@@ -178,31 +181,31 @@
@values[dl_index][value]
end
# builds an array of the form:
# array[attributes][values][classes]
- def build_array(dl, index)
+ def build_array(index)
domains = Array.new(@domains[index].length)
- domains.map do |p1|
- pl = Array.new @klasses.length, 0
+ domains.map do
+ Array.new @klasses.length, 0
end
end
# initializes the two array for storing the count and conditional probabilities of
# the attributes
def initialize_pc
- @data_labels.each_with_index do |dl, index|
- @pcc << build_array(dl, index)
- @pcp << build_array(dl, index)
+ 0.upto(@data_labels.length - 1) do |index|
+ @pcc << build_array(index)
+ @pcp << build_array(index)
end
end
# calculates the occurrences of a class and the instances of a certain value of a
# certain attribute and the assigned class.
# In addition to that, it also calculates the conditional probabilities and values
def calculate_probabilities
- @klasses.each {|dl| @class_counts[klass_index(dl)] = 0}
+ @klasses.each { |dl| @class_counts[klass_index(dl)] = 0 }
calculate_class_probabilities
count_instances
calculate_conditional_probabilities
end
@@ -218,21 +221,21 @@
end
# counts the instances of a certain value of a certain attribute and the assigned class
def count_instances
@data_items.each do |item|
- @data_labels.each_with_index do |dl, dl_index|
+ 0.upto(@data_labels.length - 1) do |dl_index|
@pcc[dl_index][value_index(item[dl_index], dl_index)][klass_index(item.klass)] += 1
end
end
end
# calculates the conditional probability and stores it in the @pcp-array
def calculate_conditional_probabilities
@pcc.each_with_index do |attributes, a_index|
attributes.each_with_index do |values, v_index|
values.each_with_index do |klass, k_index|
- @pcp[a_index][v_index][k_index] = (klass.to_f + @m * @class_prob[k_index]) / (@class_counts[k_index] + @m).to_f
+ @pcp[a_index][v_index][k_index] = (klass.to_f + @m * @class_prob[k_index]) / (@class_counts[k_index] + @m)
end
end
end
end