lib/dnn/core/optimizers.rb in ruby-dnn-0.13.1 vs lib/dnn/core/optimizers.rb in ruby-dnn-0.13.2
- old
+ new
@@ -4,12 +4,21 @@
# Super class of all optimizer classes.
class Optimizer
attr_reader :status
attr_accessor :clip_norm
+ def self.from_hash(hash)
+ return nil unless hash
+ optimizer_class = DNN.const_get(hash[:class])
+ optimizer = optimizer_class.allocate
+ raise DNN_Error.new("#{optimizer.class} is not an instance of #{self} class.") unless optimizer.is_a?(self)
+ optimizer.load_hash(hash)
+ optimizer
+ end
+
def self.load(dumped)
- opt = Utils.hash_to_obj(dumped[:hash])
+ opt = from_hash(dumped[:hash])
dumped[:status].each do |key, state|
state = state.clone
opt.status[key] = state
opt.instance_variable_set("@#{key}", state)
end
@@ -54,21 +63,21 @@
rate = @clip_norm / (norm + 1e-7)
params.each do |param|
param.grad *= rate
end
end
+
+ def load_hash(hash)
+ initialize(clip_norm: hash[:clip_norm])
+ end
end
class SGD < Optimizer
attr_accessor :lr
attr_accessor :momentum
- def self.from_hash(hash)
- self.new(hash[:lr], momentum: hash[:momentum], clip_norm: hash[:clip_norm])
- end
-
# @param [Float] lr Learning rate.
# @param [Float] momentum Momentum coefficient.
def initialize(lr = 0.01, momentum: 0, clip_norm: nil)
super(clip_norm: clip_norm)
@lr = lr
@@ -90,10 +99,14 @@
@v[param.name] = amount
end
param.data -= amount
end
end
+
+ def load_hash(hash)
+ initialize(hash[:lr], momentum: hash[:momentum], clip_norm: hash[:clip_norm])
+ end
end
class Nesterov < SGD
def initialize(lr = 0.01, momentum: 0.9, clip_norm: nil)
@@ -113,14 +126,10 @@
class AdaGrad < Optimizer
attr_accessor :lr
attr_accessor :eps
- def self.from_hash(hash)
- self.new(hash[:lr], eps: hash[:eps], clip_norm: hash[:clip_norm])
- end
-
# @param [Float] lr Learning rate.
# @param [Float] eps Value to avoid division by zero.
def initialize(lr = 0.01, eps: 1e-7, clip_norm: nil)
super(clip_norm: clip_norm)
@lr = lr
@@ -138,22 +147,22 @@
end
def to_hash
super(lr: @lr, eps: @eps)
end
+
+ def load_hash(hash)
+ initialize(hash[:lr], eps: hash[:eps], clip_norm: hash[:clip_norm])
+ end
end
class RMSProp < Optimizer
attr_accessor :lr
attr_accessor :alpha
attr_accessor :eps
- def self.from_hash(hash)
- self.new(hash[:lr], alpha: hash[:alpha], eps: hash[:eps], clip_norm: hash[:clip_norm])
- end
-
# @param [Float] lr Learning rate.
# @param [Float] alpha Moving average index of past slopes.
# @param [Float] eps Value to avoid division by zero.
def initialize(lr = 0.001, alpha: 0.9, eps: 1e-7, clip_norm: nil)
super(clip_norm: clip_norm)
@@ -173,21 +182,21 @@
@g[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
@g[param.name] = @alpha * @g[param.name] + (1 - @alpha) * param.grad ** 2
param.data -= (@lr / Xumo::NMath.sqrt(@g[param.name] + @eps)) * param.grad
end
end
+
+ def load_hash(hash)
+ initialize(hash[:lr], alpha: hash[:alpha], eps: hash[:eps], clip_norm: hash[:clip_norm])
+ end
end
class AdaDelta < Optimizer
attr_accessor :rho
attr_accessor :eps
- def self.from_hash(hash)
- self.new(rho: hash[:rho], eps: hash[:eps], clip_norm: hash[:clip_norm])
- end
-
# @param [Float] rho Moving average index of past slopes.
# @param [Float] eps Value to avoid division by zero.
def initialize(rho: 0.95, eps: 1e-6, clip_norm: nil)
super(clip_norm: clip_norm)
@rho = rho
@@ -209,22 +218,22 @@
v = (Xumo::NMath.sqrt(@s[param.name] + @eps) / Xumo::NMath.sqrt(@h[param.name] + @eps)) * param.grad
@s[param.name] = @rho * @s[param.name] + (1 - @rho) * v ** 2
param.data -= v
end
end
+
+ def load_hash(hash)
+ initialize(rho: hash[:rho], eps: hash[:eps], clip_norm: hash[:clip_norm])
+ end
end
class RMSPropGraves < Optimizer
attr_accessor :lr
attr_accessor :alpha
attr_accessor :eps
- def self.from_hash(hash)
- self.new(hash[:lr], alpha: hash[:alpha], eps: hash[:eps], clip_norm: hash[:clip_norm])
- end
-
# @param [Float] lr Learning rate.
# @param [Float] alpha Moving average index of past slopes.
# @param [Float] eps Value to avoid division by zero.
def initialize(lr = 0.0001, alpha: 0.95, eps: 0.0001, clip_norm: nil)
super(clip_norm: clip_norm)
@@ -247,25 +256,24 @@
@m[param.name] = @alpha * @m[param.name] + (1 - @alpha) * param.grad
@v[param.name] = @alpha * @v[param.name] + (1 - @alpha) * param.grad ** 2
param.data -= (@lr / Xumo::NMath.sqrt(@v[param.name] - @m[param.name] ** 2 + @eps)) * param.grad
end
end
+
+ def load_hash(hash)
+ initialize(hash[:lr], alpha: hash[:alpha], eps: hash[:eps], clip_norm: hash[:clip_norm])
+ end
end
class Adam < Optimizer
attr_accessor :alpha
attr_accessor :beta1
attr_accessor :beta2
attr_accessor :eps
attr_reader :amsgrad
- def self.from_hash(hash)
- self.new(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2],
- eps: hash[:eps], amsgrad: hash[:amsgrad], clip_norm: hash[:clip_norm])
- end
-
# @param [Float] alpha Value used to calculate learning rate.
# @param [Float] beta1 Moving average index of beta1.
# @param [Float] beta2 Moving average index of beta2.
# @param [Float] eps Value to avoid division by zero.
# @param [Boolean] amsgrad Setting the true enable amsgrad.
@@ -305,22 +313,22 @@
else
param.data -= lr * @m[param.name] / Xumo::NMath.sqrt(@v[param.name] + @eps)
end
end
end
+
+ def load_hash(hash)
+ initialize(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2],
+ eps: hash[:eps], amsgrad: hash[:amsgrad], clip_norm: hash[:clip_norm])
+ end
end
class AdaBound < Adam
attr_accessor :final_lr
attr_accessor :gamma
- def self.from_hash(hash)
- self.new(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2],
- final_lr: hash[:final_lr], gamma: hash[:gamma], eps: hash[:eps], amsgrad: hash[:amsgrad], clip_norm: hash[:clip_norm])
- end
-
# @param [Float] final_lr Final learning rate.
# @param [Float] gamma Lower and upper range value.
def initialize(alpha: 0.001, beta1: 0.9, beta2: 0.999, final_lr: 0.1, gamma: 0.001, eps: 1e-7, amsgrad: false, clip_norm: nil)
super(alpha: alpha, beta1: beta1, beta2: beta2, eps: eps, amsgrad: amsgrad, clip_norm: clip_norm)
@final_lr = final_lr
@@ -357,9 +365,14 @@
private def clip_lr(lr, lower_bound, upper_bound)
lr[lr < lower_bound] = lower_bound
lr[lr > upper_bound] = upper_bound
lr
+ end
+
+ def load_hash(hash)
+ initialize(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2],
+ final_lr: hash[:final_lr], gamma: hash[:gamma], eps: hash[:eps], amsgrad: hash[:amsgrad], clip_norm: hash[:clip_norm])
end
end
end
end