lib/dnn/core/optimizers.rb in ruby-dnn-0.13.1 vs lib/dnn/core/optimizers.rb in ruby-dnn-0.13.2

- old
+ new

@@ -4,12 +4,21 @@ # Super class of all optimizer classes. class Optimizer attr_reader :status attr_accessor :clip_norm + def self.from_hash(hash) + return nil unless hash + optimizer_class = DNN.const_get(hash[:class]) + optimizer = optimizer_class.allocate + raise DNN_Error.new("#{optimizer.class} is not an instance of #{self} class.") unless optimizer.is_a?(self) + optimizer.load_hash(hash) + optimizer + end + def self.load(dumped) - opt = Utils.hash_to_obj(dumped[:hash]) + opt = from_hash(dumped[:hash]) dumped[:status].each do |key, state| state = state.clone opt.status[key] = state opt.instance_variable_set("@#{key}", state) end @@ -54,21 +63,21 @@ rate = @clip_norm / (norm + 1e-7) params.each do |param| param.grad *= rate end end + + def load_hash(hash) + initialize(clip_norm: hash[:clip_norm]) + end end class SGD < Optimizer attr_accessor :lr attr_accessor :momentum - def self.from_hash(hash) - self.new(hash[:lr], momentum: hash[:momentum], clip_norm: hash[:clip_norm]) - end - # @param [Float] lr Learning rate. # @param [Float] momentum Momentum coefficient. def initialize(lr = 0.01, momentum: 0, clip_norm: nil) super(clip_norm: clip_norm) @lr = lr @@ -90,10 +99,14 @@ @v[param.name] = amount end param.data -= amount end end + + def load_hash(hash) + initialize(hash[:lr], momentum: hash[:momentum], clip_norm: hash[:clip_norm]) + end end class Nesterov < SGD def initialize(lr = 0.01, momentum: 0.9, clip_norm: nil) @@ -113,14 +126,10 @@ class AdaGrad < Optimizer attr_accessor :lr attr_accessor :eps - def self.from_hash(hash) - self.new(hash[:lr], eps: hash[:eps], clip_norm: hash[:clip_norm]) - end - # @param [Float] lr Learning rate. # @param [Float] eps Value to avoid division by zero. def initialize(lr = 0.01, eps: 1e-7, clip_norm: nil) super(clip_norm: clip_norm) @lr = lr @@ -138,22 +147,22 @@ end def to_hash super(lr: @lr, eps: @eps) end + + def load_hash(hash) + initialize(hash[:lr], eps: hash[:eps], clip_norm: hash[:clip_norm]) + end end class RMSProp < Optimizer attr_accessor :lr attr_accessor :alpha attr_accessor :eps - def self.from_hash(hash) - self.new(hash[:lr], alpha: hash[:alpha], eps: hash[:eps], clip_norm: hash[:clip_norm]) - end - # @param [Float] lr Learning rate. # @param [Float] alpha Moving average index of past slopes. # @param [Float] eps Value to avoid division by zero. def initialize(lr = 0.001, alpha: 0.9, eps: 1e-7, clip_norm: nil) super(clip_norm: clip_norm) @@ -173,21 +182,21 @@ @g[param.name] ||= Xumo::SFloat.zeros(*param.data.shape) @g[param.name] = @alpha * @g[param.name] + (1 - @alpha) * param.grad ** 2 param.data -= (@lr / Xumo::NMath.sqrt(@g[param.name] + @eps)) * param.grad end end + + def load_hash(hash) + initialize(hash[:lr], alpha: hash[:alpha], eps: hash[:eps], clip_norm: hash[:clip_norm]) + end end class AdaDelta < Optimizer attr_accessor :rho attr_accessor :eps - def self.from_hash(hash) - self.new(rho: hash[:rho], eps: hash[:eps], clip_norm: hash[:clip_norm]) - end - # @param [Float] rho Moving average index of past slopes. # @param [Float] eps Value to avoid division by zero. def initialize(rho: 0.95, eps: 1e-6, clip_norm: nil) super(clip_norm: clip_norm) @rho = rho @@ -209,22 +218,22 @@ v = (Xumo::NMath.sqrt(@s[param.name] + @eps) / Xumo::NMath.sqrt(@h[param.name] + @eps)) * param.grad @s[param.name] = @rho * @s[param.name] + (1 - @rho) * v ** 2 param.data -= v end end + + def load_hash(hash) + initialize(rho: hash[:rho], eps: hash[:eps], clip_norm: hash[:clip_norm]) + end end class RMSPropGraves < Optimizer attr_accessor :lr attr_accessor :alpha attr_accessor :eps - def self.from_hash(hash) - self.new(hash[:lr], alpha: hash[:alpha], eps: hash[:eps], clip_norm: hash[:clip_norm]) - end - # @param [Float] lr Learning rate. # @param [Float] alpha Moving average index of past slopes. # @param [Float] eps Value to avoid division by zero. def initialize(lr = 0.0001, alpha: 0.95, eps: 0.0001, clip_norm: nil) super(clip_norm: clip_norm) @@ -247,25 +256,24 @@ @m[param.name] = @alpha * @m[param.name] + (1 - @alpha) * param.grad @v[param.name] = @alpha * @v[param.name] + (1 - @alpha) * param.grad ** 2 param.data -= (@lr / Xumo::NMath.sqrt(@v[param.name] - @m[param.name] ** 2 + @eps)) * param.grad end end + + def load_hash(hash) + initialize(hash[:lr], alpha: hash[:alpha], eps: hash[:eps], clip_norm: hash[:clip_norm]) + end end class Adam < Optimizer attr_accessor :alpha attr_accessor :beta1 attr_accessor :beta2 attr_accessor :eps attr_reader :amsgrad - def self.from_hash(hash) - self.new(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2], - eps: hash[:eps], amsgrad: hash[:amsgrad], clip_norm: hash[:clip_norm]) - end - # @param [Float] alpha Value used to calculate learning rate. # @param [Float] beta1 Moving average index of beta1. # @param [Float] beta2 Moving average index of beta2. # @param [Float] eps Value to avoid division by zero. # @param [Boolean] amsgrad Setting the true enable amsgrad. @@ -305,22 +313,22 @@ else param.data -= lr * @m[param.name] / Xumo::NMath.sqrt(@v[param.name] + @eps) end end end + + def load_hash(hash) + initialize(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2], + eps: hash[:eps], amsgrad: hash[:amsgrad], clip_norm: hash[:clip_norm]) + end end class AdaBound < Adam attr_accessor :final_lr attr_accessor :gamma - def self.from_hash(hash) - self.new(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2], - final_lr: hash[:final_lr], gamma: hash[:gamma], eps: hash[:eps], amsgrad: hash[:amsgrad], clip_norm: hash[:clip_norm]) - end - # @param [Float] final_lr Final learning rate. # @param [Float] gamma Lower and upper range value. def initialize(alpha: 0.001, beta1: 0.9, beta2: 0.999, final_lr: 0.1, gamma: 0.001, eps: 1e-7, amsgrad: false, clip_norm: nil) super(alpha: alpha, beta1: beta1, beta2: beta2, eps: eps, amsgrad: amsgrad, clip_norm: clip_norm) @final_lr = final_lr @@ -357,9 +365,14 @@ private def clip_lr(lr, lower_bound, upper_bound) lr[lr < lower_bound] = lower_bound lr[lr > upper_bound] = upper_bound lr + end + + def load_hash(hash) + initialize(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2], + final_lr: hash[:final_lr], gamma: hash[:gamma], eps: hash[:eps], amsgrad: hash[:amsgrad], clip_norm: hash[:clip_norm]) end end end end