# frozen_string_literal: true module Mopti # ScaledConjugateGradient is a class that implements multivariate optimization using scaled conjugate gradient method. # # @example # # Seek the minimum value of the expression a*u**2 + b*u*v + c*v**2 + d*u + e*v + f for # # given values of the parameters and an initial guess (u, v) = (0, 0). # # https://docs.scipy.org/doc/scipy/reference/generated/scipy.optimize.fmin_cg.html#scipy.optimize.fmin_cg # require 'numo/narray' # require 'mopti' # # args = [2, 3, 7, 8, 9, 10] # # f = proc do |x, a, b, c, d, e, f| # u = x[0] # v = x[1] # a * u**2 + b * u * v + c * v**2 + d * u + e * v + f # end # # g = proc do |x, a, b, c, d, e, f| # u = x[0] # v = x[1] # gu = 2 * a * u + b * v + d # gv = b * u + 2 * c * v + e # Numo::DFloat[gu, gv] # end # # x0 = Numo::DFloat.zeros(2) # # optimizer = Mopti::ScaledConjugateGradient.new(fnc: f, jcb: g, x_init: x0, args: args) # result = optimizer.map { |params| params }.last # # pp result # # # {:x=>Numo::DFloat#shape=[2] # # [-1.80847, -0.25533], # # :n_fev=>10, # # :n_jev=>18, # # :n_iter=>9, # # :fnc=>1.6170212789006122, # # :jcb=>1.8698188678645777e-07} # # *Reference* # 1. M F. Moller, "A Scaled Conjugate Gradient Algorithm for Fast Supervised Learning," Neural Networks, Vol. 6, pp. 525--533, 1993. class ScaledConjugateGradient include Enumerable # Create a new optimizer with scaled conjugate gradient. # # @param fnc [Method/Proc] Method for calculating the objective function to be minimized. # @param jcb [Method/Proc] Method for calculating the gradient vector. # @param args [Array/Hash] Arguments pass to the 'fnc' and 'jcb'. # @param x_init [Numo::NArray] Initial point. # @param max_iter [Integer] Maximum number of iterations. # @param xtol [Float] Tolerance for termination for the optimal vector norm. # @param ftol [Float] Tolerance for termination for the objective function value. # @param jtol [Float] Tolerance for termination for the gradient norm. def initialize(fnc:, jcb:, args: nil, x_init:, max_iter: 200, xtol: 1e-6, ftol: 1e-8, jtol: 1e-7) @fnc = fnc @jcb = jcb @x_init = x_init @args = args @max_iter = max_iter @ftol = ftol @jtol = jtol @xtol = xtol end # Iteration for optimization. # # @overload each(&block) -> Object # @yield [Hash] { x:, n_fev:, n_jev:, n_iter:, fnc:, jcb: } # - x [Numo::NArray] Updated vector by optimization. # - n_fev [Interger] Number of calls of the objective function. # - n_jev [Integer] Number of calls of the jacobian. # - n_iter [Integer] Number of iterations. # - fnc [Float] Value of the objective function. # - jcb [Numo::Narray] Values of the jacobian # @return [Enumerator] If block is not given, this method returns Enumerator. def each return to_enum(__method__) unless block_given? x = @x_init f_prev = func(x) n_fev = 1 f_curr = f_prev j_next = jacb(x) n_jev = 1 j_curr = j_next.dot(j_next) j_prev = j_next.dup d = -j_next success = true n_successes = 0 beta = 1.0 n_iter = 0 while n_iter < @max_iter if success mu = d.dot(j_next) if mu >= 0.0 d = -j_next mu = d.dot(j_next) end kappa = d.dot(d) break if kappa < 1e-16 sigma = SIGMA_INIT / Math.sqrt(kappa) x_plus = x + sigma * d j_plus = jacb(x_plus) n_jev += 1 theta = d.dot(j_plus - j_next) / sigma end delta = theta + beta * kappa if delta <= 0 delta = beta * kappa beta -= theta / kappa end alpha = -mu / delta x_next = x + alpha * d f_next = func(x_next) n_fev += 1 delta = 2 * (f_next - f_prev) / (alpha * mu) if delta >= 0 success = true n_successes += 1 x = x_next f_curr = f_next else success = false f_curr = f_prev end n_iter += 1 yield({ x: x, n_fev: n_fev, n_jev: n_jev, n_iter: n_iter, fnc: f_curr, jcb: j_curr }) if success break if (f_next - f_prev).abs < @ftol break if (alpha * d).abs.max < @xtol f_prev = f_next j_prev = j_next j_next = jacb(x) n_jev += 1 j_curr = j_next.dot(j_next) break if j_curr <= @jtol end beta = [beta * 4, BETA_MAX].min if delta < 0.25 beta = [beta / 4, BETA_MIN].max if delta > 0.75 if n_successes == x.size d = -j_next beta = 1.0 n_successes = 0 elsif success gamma = (j_prev - j_next).dot(j_next) / mu d = -j_next + gamma * d end end end SIGMA_INIT = 1e-4 BETA_MIN = 1e-15 BETA_MAX = 1e+15 private_constant :SIGMA_INIT, :BETA_MIN, :BETA_MAX private def func(x) if @args.is_a?(Hash) @fnc.call(x, **@args) elsif @args.is_a?(Array) @fnc.call(x, *@args) elsif @args.nil? @fnc.call(x) else @fnc.call(x, @args) end end def jacb(x) if @args.is_a?(Hash) @jcb.call(x, **@args) elsif @args.is_a?(Array) @jcb.call(x, *@args) elsif @args.nil? @jcb.call(x) else @jcb.call(x, @args) end end end end