lib/svmkit/linear_model/lasso.rb in svmkit-0.3.3 vs lib/svmkit/linear_model/lasso.rb in svmkit-0.4.0

- old
+ new

@@ -1,27 +1,26 @@ # frozen_string_literal: true require 'svmkit/validation' require 'svmkit/base/base_estimator' require 'svmkit/base/regressor' +require 'svmkit/optimizer/nadam' module SVMKit module LinearModel # Lasso is a class that implements Lasso Regression # with stochastic gradient descent (SGD) optimization. # # @example # estimator = - # SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 5000, batch_size: 50, random_seed: 1) + # SVMKit::LinearModel::Lasso.new(reg_param: 0.1, max_iter: 1000, batch_size: 20, random_seed: 1) # estimator.fit(training_samples, traininig_values) # results = estimator.predict(testing_samples) # # *Reference* # - S. Shalev-Shwartz and Y. Singer, "Pegasos: Primal Estimated sub-GrAdient SOlver for SVM," Proc. ICML'07, pp. 807--814, 2007. # - L. Bottou, "Large-Scale Machine Learning with Stochastic Gradient Descent," Proc. COMPSTAT'10, pp. 177--186, 2010. - # - I. Sutskever, J. Martens, G. Dahl, and G. Hinton, "On the importance of initialization and momentum in deep learning," Proc. ICML'13, pp. 1139--1147, 2013. - # - G. Hinton, N. Srivastava, and K. Swersky, "Lecture 6e rmsprop," Neural Networks for Machine Learning, 2012. class Lasso include Base::BaseEstimator include Base::Regressor include Validation @@ -39,34 +38,27 @@ # Create a new Lasso regressor. # # @param reg_param [Float] The regularization parameter. # @param fit_bias [Boolean] The flag indicating whether to fit the bias term. - # @param learning_rate [Float] The learning rate for optimization. - # @param decay [Float] The discounting factor for RMS prop optimization. - # @param momentum [Float] The momentum for optimization. # @param max_iter [Integer] The maximum number of iterations. # @param batch_size [Integer] The size of the mini batches. + # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate. + # Nadam is selected automatically on current version. # @param random_seed [Integer] The seed value using to initialize the random generator. - def initialize(reg_param: 1.0, fit_bias: false, learning_rate: 0.01, decay: 0.9, momentum: 0.9, - max_iter: 1000, batch_size: 10, random_seed: nil) - check_params_float(reg_param: reg_param, - learning_rate: learning_rate, decay: decay, momentum: momentum) + def initialize(reg_param: 1.0, fit_bias: false, max_iter: 1000, batch_size: 10, optimizer: nil, random_seed: nil) + check_params_float(reg_param: reg_param) check_params_integer(max_iter: max_iter, batch_size: batch_size) check_params_boolean(fit_bias: fit_bias) check_params_type_or_nil(Integer, random_seed: random_seed) - check_params_positive(reg_param: reg_param, - learning_rate: learning_rate, decay: decay, momentum: momentum, - max_iter: max_iter, batch_size: batch_size) + check_params_positive(reg_param: reg_param, max_iter: max_iter, batch_size: batch_size) @params = {} @params[:reg_param] = reg_param @params[:fit_bias] = fit_bias - @params[:learning_rate] = learning_rate - @params[:decay] = decay - @params[:momentum] = momentum @params[:max_iter] = max_iter @params[:batch_size] = batch_size + @params[:optimizer] = optimizer @params[:random_seed] = random_seed @params[:random_seed] ||= srand @weight_vec = nil @bias_term = nil @rng = Random.new(@params[:random_seed]) @@ -136,15 +128,13 @@ # Initialize some variables. n_samples, n_features = samples.shape rand_ids = [*0...n_samples].shuffle(random: @rng) weight_vec = Numo::DFloat.zeros(n_features) left_weight_vec = Numo::DFloat.zeros(n_features) - left_weight_sqrsum = Numo::DFloat.zeros(n_features) - left_weight_update = Numo::DFloat.zeros(n_features) right_weight_vec = Numo::DFloat.zeros(n_features) - right_weight_sqrsum = Numo::DFloat.zeros(n_features) - right_weight_update = Numo::DFloat.zeros(n_features) + left_optimizer = Optimizer::Nadam.new + right_optimizer = Optimizer::Nadam.new # Start optimization. @params[:max_iter].times do |_t| # Random sampling. subset_ids = rand_ids.shift(@params[:batch_size]) rand_ids.concat(subset_ids) @@ -152,16 +142,12 @@ values = y[subset_ids] # Calculate gradients for loss function. loss_grad = loss_gradient(data, values, weight_vec) next if loss_grad.ne(0.0).count.zero? # Update weight. - left_weight_vec, left_weight_sqrsum, left_weight_update = - update_weight(left_weight_vec, left_weight_sqrsum, left_weight_update, - left_weight_gradient(loss_grad, data)) - right_weight_vec, right_weight_sqrsum, right_weight_update = - update_weight(right_weight_vec, right_weight_sqrsum, right_weight_update, - right_weight_gradient(loss_grad, data)) + left_weight_vec = round_weight(left_optimizer.call(left_weight_vec, left_weight_gradient(loss_grad, data))) + right_weight_vec = round_weight(right_optimizer.call(right_weight_vec, right_weight_gradient(loss_grad, data))) weight_vec = left_weight_vec - right_weight_vec end split_weight_vec_bias(weight_vec) end @@ -175,15 +161,11 @@ def right_weight_gradient(loss_grad, data) ((@params[:reg_param] - loss_grad).expand_dims(1) * data).mean(0) end - def update_weight(weight, sqrsum, update, gr) - new_sqrsum = @params[:decay] * sqrsum + (1.0 - @params[:decay]) * gr**2 - new_update = (@params[:learning_rate] / ((new_sqrsum + 1.0e-8)**0.5)) * gr - new_weight = weight - (new_update + @params[:momentum] * update) - new_weight = 0.5 * (new_weight + new_weight.abs) - [new_weight, new_sqrsum, new_update] + def round_weight(weight) + 0.5 * (weight + weight.abs) end def expand_feature(x) Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])]) end