# frozen_string_literal: true require 'rumale/base/base_estimator' require 'rumale/optimizer/nadam' module Rumale # This module consists of the classes that implement polynomial models. module PolynomialModel # BaseFactorizationMachine is an abstract class for implementation of Factorization Machine-based estimators. # This class is used internally. class BaseFactorizationMachine include Base::BaseEstimator # Initialize a Factorization Machine-based estimator. # # @param n_factors [Integer] The maximum number of iterations. # @param loss [String] The loss function ('hinge' or 'logistic' or nil). # @param reg_param_linear [Float] The regularization parameter for linear model. # @param reg_param_factor [Float] The regularization parameter for factor matrix. # @param max_iter [Integer] The maximum number of epochs that indicates # how many times the whole data is given to the training process. # @param batch_size [Integer] The size of the mini batches. # @param tol [Float] The tolerance of loss for terminating optimization. # @param optimizer [Optimizer] The optimizer to calculate adaptive learning rate. # If nil is given, Nadam is used. # @param n_jobs [Integer] The number of jobs for running the fit and predict methods in parallel. # If nil is given, the methods do not execute in parallel. # If zero or less is given, it becomes equal to the number of processors. # This parameter is ignored if the Parallel gem is not loaded. # @param verbose [Boolean] The flag indicating whether to output loss during iteration. # @param random_seed [Integer] The seed value using to initialize the random generator. def initialize(n_factors: 2, loss: nil, reg_param_linear: 1.0, reg_param_factor: 1.0, max_iter: 200, batch_size: 50, tol: 1e-4, optimizer: nil, n_jobs: nil, verbose: false, random_seed: nil) @params = {} @params[:n_factors] = n_factors @params[:loss] = loss unless loss.nil? @params[:reg_param_linear] = reg_param_linear @params[:reg_param_factor] = reg_param_factor @params[:max_iter] = max_iter @params[:batch_size] = batch_size @params[:tol] = tol @params[:optimizer] = optimizer @params[:optimizer] ||= Optimizer::Nadam.new @params[:n_jobs] = n_jobs @params[:verbose] = verbose @params[:random_seed] = random_seed @params[:random_seed] ||= srand @factor_mat = nil @weight_vec = nil @bias_term = nil @rng = Random.new(@params[:random_seed]) end private def partial_fit(x, y) # Initialize some variables. class_name = self.class.to_s.split('::').last if @params[:verbose] n_samples, n_features = x.shape sub_rng = @rng.dup weight_vec = Numo::DFloat.zeros(n_features + 1) factor_mat = Rumale::Utils.rand_normal([@params[:n_factors], n_features], sub_rng) weight_optimizer = @params[:optimizer].dup factor_optimizers = Array.new(@params[:n_factors]) { @params[:optimizer].dup } # Start optimization. @params[:max_iter].times do |t| sample_ids = [*0...n_samples] sample_ids.shuffle!(random: sub_rng) until (subset_ids = sample_ids.shift(@params[:batch_size])).empty? # Sampling. sub_x = x[subset_ids, true] sub_y = y[subset_ids] ex_sub_x = expand_feature(sub_x) # Calculate gradients for loss function. loss_grad = loss_gradient(sub_x, ex_sub_x, sub_y, factor_mat, weight_vec) next if loss_grad.ne(0.0).count.zero? # Update each parameter. weight_vec = weight_optimizer.call(weight_vec, weight_gradient(loss_grad, ex_sub_x, weight_vec)) @params[:n_factors].times do |n| factor_mat[n, true] = factor_optimizers[n].call(factor_mat[n, true], factor_gradient(loss_grad, sub_x, factor_mat[n, true])) end end loss = loss_func(x, expand_feature(x), y, factor_mat, weight_vec) puts "[#{class_name}] Loss after #{t + 1} epochs: #{loss}" if @params[:verbose] break if loss < @params[:tol] end [factor_mat, *split_weight_vec_bias(weight_vec)] end def loss_func(_x, _expanded_x, _y, _factor, _weight) raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}." end def loss_gradient(_x, _expanded_x, _y, _factor, _weight) raise NotImplementedError, "#{__method__} has to be implemented in #{self.class}." end def weight_gradient(loss_grad, data, weight) (loss_grad.expand_dims(1) * data).mean(0) + @params[:reg_param_linear] * weight end def factor_gradient(loss_grad, data, factor) (loss_grad.expand_dims(1) * (data * data.dot(factor).expand_dims(1) - factor * (data**2))).mean(0) + @params[:reg_param_factor] * factor end def expand_feature(x) Numo::NArray.hstack([x, Numo::DFloat.ones([x.shape[0], 1])]) end def split_weight_vec_bias(weight_vec) weights = weight_vec[0...-1].dup bias = weight_vec[-1] [weights, bias] end end end end