lib/rumale/naive_bayes/naive_bayes.rb in rumale-0.13.8 vs lib/rumale/naive_bayes/naive_bayes.rb in rumale-0.14.0
- old
+ new
@@ -14,33 +14,33 @@
# Predict class labels for samples.
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels.
# @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample.
def predict(x)
- check_sample_array(x)
+ x = check_convert_sample_array(x)
n_samples = x.shape.first
decision_values = decision_function(x)
Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] })
end
# Predict log-probability for samples.
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities.
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample.
def predict_log_proba(x)
- check_sample_array(x)
+ x = check_convert_sample_array(x)
n_samples, = x.shape
log_likelihoods = decision_function(x)
log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1)
end
# Predict probability for samples.
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities.
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample.
def predict_proba(x)
- check_sample_array(x)
+ x = check_convert_sample_array(x)
Numo::NMath.exp(predict_log_proba(x)).abs
end
end
# GaussianNB is a class that implements Gaussian Naive Bayes classifier.
@@ -76,12 +76,12 @@
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
# to be used for fitting the model.
# @return [GaussianNB] The learned classifier itself.
def fit(x, y)
- check_sample_array(x)
- check_label_array(y)
+ x = check_convert_sample_array(x)
+ y = check_convert_label_array(y)
check_sample_label_size(x, y)
n_samples, = x.shape
@classes = Numo::Int32[*y.to_a.uniq.sort]
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
@means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }]
@@ -92,11 +92,11 @@
# Calculate confidence scores for samples.
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
def decision_function(x)
- check_sample_array(x)
+ x = check_convert_sample_array(x)
n_classes = @classes.size
log_likelihoods = Array.new(n_classes) do |l|
Math.log(@class_priors[l]) - 0.5 * (
Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) +
((x - @means[l, true])**2 / @variances[l, true])).sum(1)
@@ -152,11 +152,11 @@
# Create a new classifier with Multinomial Naive Bayes.
#
# @param smoothing_param [Float] The Laplace smoothing parameter.
def initialize(smoothing_param: 1.0)
- check_params_float(smoothing_param: smoothing_param)
+ check_params_numeric(smoothing_param: smoothing_param)
check_params_positive(smoothing_param: smoothing_param)
@params = {}
@params[:smoothing_param] = smoothing_param
end
@@ -165,12 +165,12 @@
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
# to be used for fitting the model.
# @return [MultinomialNB] The learned classifier itself.
def fit(x, y)
- check_sample_array(x)
- check_label_array(y)
+ x = check_convert_sample_array(x)
+ y = check_convert_label_array(y)
check_sample_label_size(x, y)
n_samples, = x.shape
@classes = Numo::Int32[*y.to_a.uniq.sort]
@class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }]
count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(0) }]
@@ -183,11 +183,11 @@
# Calculate confidence scores for samples.
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
def decision_function(x)
- check_sample_array(x)
+ x = check_convert_sample_array(x)
n_classes = @classes.size
bin_x = x.gt(0)
log_likelihoods = Array.new(n_classes) do |l|
Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1)
end
@@ -241,11 +241,11 @@
# Create a new classifier with Bernoulli Naive Bayes.
#
# @param smoothing_param [Float] The Laplace smoothing parameter.
# @param bin_threshold [Float] The threshold for binarizing of features.
def initialize(smoothing_param: 1.0, bin_threshold: 0.0)
- check_params_float(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
+ check_params_numeric(smoothing_param: smoothing_param, bin_threshold: bin_threshold)
check_params_positive(smoothing_param: smoothing_param)
@params = {}
@params[:smoothing_param] = smoothing_param
@params[:bin_threshold] = bin_threshold
end
@@ -255,12 +255,12 @@
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model.
# @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels)
# to be used for fitting the model.
# @return [BernoulliNB] The learned classifier itself.
def fit(x, y)
- check_sample_array(x)
- check_label_array(y)
+ x = check_convert_sample_array(x)
+ y = check_convert_label_array(y)
check_sample_label_size(x, y)
n_samples, = x.shape
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
@classes = Numo::Int32[*y.to_a.uniq.sort]
n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }]
@@ -276,10 +276,10 @@
# Calculate confidence scores for samples.
#
# @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores.
# @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class.
def decision_function(x)
- check_sample_array(x)
+ x = check_convert_sample_array(x)
n_classes = @classes.size
bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])]
not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])]
log_likelihoods = Array.new(n_classes) do |l|
Math.log(@class_priors[l]) + (