lib/rumale/naive_bayes/naive_bayes.rb in rumale-0.13.8 vs lib/rumale/naive_bayes/naive_bayes.rb in rumale-0.14.0

- old
+ new

@@ -14,33 +14,33 @@ # Predict class labels for samples. # # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the labels. # @return [Numo::Int32] (shape: [n_samples]) Predicted class label per sample. def predict(x) - check_sample_array(x) + x = check_convert_sample_array(x) n_samples = x.shape.first decision_values = decision_function(x) Numo::Int32.asarray(Array.new(n_samples) { |n| @classes[decision_values[n, true].max_index] }) end # Predict log-probability for samples. # # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the log-probailities. # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted log-probability of each class per sample. def predict_log_proba(x) - check_sample_array(x) + x = check_convert_sample_array(x) n_samples, = x.shape log_likelihoods = decision_function(x) log_likelihoods - Numo::NMath.log(Numo::NMath.exp(log_likelihoods).sum(1)).reshape(n_samples, 1) end # Predict probability for samples. # # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to predict the probailities. # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Predicted probability of each class per sample. def predict_proba(x) - check_sample_array(x) + x = check_convert_sample_array(x) Numo::NMath.exp(predict_log_proba(x)).abs end end # GaussianNB is a class that implements Gaussian Naive Bayes classifier. @@ -76,12 +76,12 @@ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model. # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels) # to be used for fitting the model. # @return [GaussianNB] The learned classifier itself. def fit(x, y) - check_sample_array(x) - check_label_array(y) + x = check_convert_sample_array(x) + y = check_convert_label_array(y) check_sample_label_size(x, y) n_samples, = x.shape @classes = Numo::Int32[*y.to_a.uniq.sort] @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }] @means = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].mean(0) }] @@ -92,11 +92,11 @@ # Calculate confidence scores for samples. # # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores. # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class. def decision_function(x) - check_sample_array(x) + x = check_convert_sample_array(x) n_classes = @classes.size log_likelihoods = Array.new(n_classes) do |l| Math.log(@class_priors[l]) - 0.5 * ( Numo::NMath.log(2.0 * Math::PI * @variances[l, true]) + ((x - @means[l, true])**2 / @variances[l, true])).sum(1) @@ -152,11 +152,11 @@ # Create a new classifier with Multinomial Naive Bayes. # # @param smoothing_param [Float] The Laplace smoothing parameter. def initialize(smoothing_param: 1.0) - check_params_float(smoothing_param: smoothing_param) + check_params_numeric(smoothing_param: smoothing_param) check_params_positive(smoothing_param: smoothing_param) @params = {} @params[:smoothing_param] = smoothing_param end @@ -165,12 +165,12 @@ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model. # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels) # to be used for fitting the model. # @return [MultinomialNB] The learned classifier itself. def fit(x, y) - check_sample_array(x) - check_label_array(y) + x = check_convert_sample_array(x) + y = check_convert_label_array(y) check_sample_label_size(x, y) n_samples, = x.shape @classes = Numo::Int32[*y.to_a.uniq.sort] @class_priors = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count / n_samples.to_f }] count_features = Numo::DFloat[*@classes.to_a.map { |l| x[y.eq(l).where, true].sum(0) }] @@ -183,11 +183,11 @@ # Calculate confidence scores for samples. # # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores. # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class. def decision_function(x) - check_sample_array(x) + x = check_convert_sample_array(x) n_classes = @classes.size bin_x = x.gt(0) log_likelihoods = Array.new(n_classes) do |l| Math.log(@class_priors[l]) + (Numo::DFloat[*bin_x] * Numo::NMath.log(@feature_probs[l, true])).sum(1) end @@ -241,11 +241,11 @@ # Create a new classifier with Bernoulli Naive Bayes. # # @param smoothing_param [Float] The Laplace smoothing parameter. # @param bin_threshold [Float] The threshold for binarizing of features. def initialize(smoothing_param: 1.0, bin_threshold: 0.0) - check_params_float(smoothing_param: smoothing_param, bin_threshold: bin_threshold) + check_params_numeric(smoothing_param: smoothing_param, bin_threshold: bin_threshold) check_params_positive(smoothing_param: smoothing_param) @params = {} @params[:smoothing_param] = smoothing_param @params[:bin_threshold] = bin_threshold end @@ -255,12 +255,12 @@ # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The training data to be used for fitting the model. # @param y [Numo::Int32] (shape: [n_samples]) The categorical variables (e.g. labels) # to be used for fitting the model. # @return [BernoulliNB] The learned classifier itself. def fit(x, y) - check_sample_array(x) - check_label_array(y) + x = check_convert_sample_array(x) + y = check_convert_label_array(y) check_sample_label_size(x, y) n_samples, = x.shape bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])] @classes = Numo::Int32[*y.to_a.uniq.sort] n_samples_each_class = Numo::DFloat[*@classes.to_a.map { |l| y.eq(l).count.to_f }] @@ -276,10 +276,10 @@ # Calculate confidence scores for samples. # # @param x [Numo::DFloat] (shape: [n_samples, n_features]) The samples to compute the scores. # @return [Numo::DFloat] (shape: [n_samples, n_classes]) Confidence scores per sample for each class. def decision_function(x) - check_sample_array(x) + x = check_convert_sample_array(x) n_classes = @classes.size bin_x = Numo::DFloat[*x.gt(@params[:bin_threshold])] not_bin_x = Numo::DFloat[*x.le(@params[:bin_threshold])] log_likelihoods = Array.new(n_classes) do |l| Math.log(@class_priors[l]) + (