lib/dnn/core/optimizers.rb in ruby-dnn-0.13.4 vs lib/dnn/core/optimizers.rb in ruby-dnn-0.14.0
- old
+ new
@@ -8,17 +8,18 @@
def self.from_hash(hash)
return nil unless hash
optimizer_class = DNN.const_get(hash[:class])
optimizer = optimizer_class.allocate
- raise DNN_Error.new("#{optimizer.class} is not an instance of #{self} class.") unless optimizer.is_a?(self)
+ raise DNN_Error, "#{optimizer.class} is not an instance of #{self} class." unless optimizer.is_a?(self)
optimizer.load_hash(hash)
optimizer
end
def self.load(dumped)
opt = from_hash(dumped[:hash])
+ return opt unless dumped[:status]
dumped[:status].each do |key, state|
state = state.clone
opt.status[key] = state
opt.instance_variable_set("@#{key}", state)
end
@@ -32,36 +33,38 @@
# Update layers has params.
def update(layers)
target_params = layers.select { |layer| layer.is_a?(Layers::HasParamLayer) && layer.trainable }
.map { |layer| layer.get_params.values }.flatten.compact
- .select { |param| param.grad }
+ .select(&:grad)
clip_grads(target_params) if @clip_norm
update_params(target_params)
target_params.each do |param|
param.grad = Xumo::SFloat[0]
end
end
- def dump
- { hash: to_hash, status: @status }
+ def dump(require_status = true)
+ status = require_status ? @status : nil
+ { hash: to_hash, status: status }
end
def to_hash(merge_hash = nil)
hash = { class: self.class.name, clip_norm: @clip_norm }
hash.merge!(merge_hash) if merge_hash
hash
end
# Update params.
private def update_params(params)
- raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'update_params'")
+ raise NotImplementedError, "Class '#{self.class.name}' has implement method 'update_params'"
end
private def clip_grads(params)
- norm = Math.sqrt(params.reduce(0) { |total, param| total + (param.grad ** 2).sum })
+ norm = Math.sqrt(params.reduce(0) { |total, param| total + (param.grad**2).sum })
return if norm <= @clip_norm
+
rate = @clip_norm / (norm + 1e-7)
params.each do |param|
param.grad *= rate
end
end
@@ -69,11 +72,10 @@
def load_hash(hash)
initialize(clip_norm: hash[:clip_norm])
end
end
-
class SGD < Optimizer
attr_accessor :lr
attr_accessor :momentum
# @param [Float] lr Learning rate.
@@ -105,27 +107,25 @@
def load_hash(hash)
initialize(hash[:lr], momentum: hash[:momentum], clip_norm: hash[:clip_norm])
end
end
-
class Nesterov < SGD
def initialize(lr = 0.01, momentum: 0.9, clip_norm: nil)
super(lr, momentum: momentum, clip_norm: clip_norm)
end
private def update_params(params)
params.each do |param|
@v[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
amount = param.grad * @lr
@v[param.name] = @v[param.name] * @momentum - amount
- param.data = (param.data + @momentum ** 2 * @v[param.name]) - (1 + @momentum) * amount
+ param.data = (param.data + @momentum**2 * @v[param.name]) - (1 + @momentum) * amount
end
end
end
-
class AdaGrad < Optimizer
attr_accessor :lr
attr_accessor :eps
# @param [Float] lr Learning rate.
@@ -139,11 +139,11 @@
end
private def update_params(params)
params.each do |param|
@g[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
- @g[param.name] += param.grad ** 2
+ @g[param.name] += param.grad**2
param.data -= (@lr / Xumo::NMath.sqrt(@g[param.name] + @eps)) * param.grad
end
end
def to_hash
@@ -153,11 +153,10 @@
def load_hash(hash)
initialize(hash[:lr], eps: hash[:eps], clip_norm: hash[:clip_norm])
end
end
-
class RMSProp < Optimizer
attr_accessor :lr
attr_accessor :alpha
attr_accessor :eps
@@ -178,21 +177,20 @@
end
private def update_params(params)
params.each do |param|
@g[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
- @g[param.name] = @alpha * @g[param.name] + (1 - @alpha) * param.grad ** 2
+ @g[param.name] = @alpha * @g[param.name] + (1 - @alpha) * param.grad**2
param.data -= (@lr / Xumo::NMath.sqrt(@g[param.name] + @eps)) * param.grad
end
end
def load_hash(hash)
initialize(hash[:lr], alpha: hash[:alpha], eps: hash[:eps], clip_norm: hash[:clip_norm])
end
end
-
class AdaDelta < Optimizer
attr_accessor :rho
attr_accessor :eps
# @param [Float] rho Moving average index of past slopes.
@@ -212,23 +210,22 @@
private def update_params(params)
params.each do |param|
@h[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
@s[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
- @h[param.name] = @rho * @h[param.name] + (1 - @rho) * param.grad ** 2
+ @h[param.name] = @rho * @h[param.name] + (1 - @rho) * param.grad**2
v = (Xumo::NMath.sqrt(@s[param.name] + @eps) / Xumo::NMath.sqrt(@h[param.name] + @eps)) * param.grad
- @s[param.name] = @rho * @s[param.name] + (1 - @rho) * v ** 2
+ @s[param.name] = @rho * @s[param.name] + (1 - @rho) * v**2
param.data -= v
end
end
def load_hash(hash)
initialize(rho: hash[:rho], eps: hash[:eps], clip_norm: hash[:clip_norm])
end
end
-
class RMSPropGraves < Optimizer
attr_accessor :lr
attr_accessor :alpha
attr_accessor :eps
@@ -252,21 +249,20 @@
private def update_params(params)
params.each do |param|
@m[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
@v[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
@m[param.name] = @alpha * @m[param.name] + (1 - @alpha) * param.grad
- @v[param.name] = @alpha * @v[param.name] + (1 - @alpha) * param.grad ** 2
- param.data -= (@lr / Xumo::NMath.sqrt(@v[param.name] - @m[param.name] ** 2 + @eps)) * param.grad
+ @v[param.name] = @alpha * @v[param.name] + (1 - @alpha) * param.grad**2
+ param.data -= (@lr / Xumo::NMath.sqrt(@v[param.name] - @m[param.name]**2 + @eps)) * param.grad
end
end
def load_hash(hash)
initialize(hash[:lr], alpha: hash[:alpha], eps: hash[:eps], clip_norm: hash[:clip_norm])
end
end
-
class Adam < Optimizer
attr_accessor :alpha
attr_accessor :beta1
attr_accessor :beta2
attr_accessor :eps
@@ -298,16 +294,16 @@
}
end
private def update_params(params)
@t += 1
- lr = @alpha * Math.sqrt(1 - @beta2 ** @t) / (1 - @beta1 ** @t)
+ lr = @alpha * Math.sqrt(1 - @beta2**@t) / (1 - @beta1**@t)
params.each do |param|
@m[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
@v[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
@m[param.name] += (1 - @beta1) * (param.grad - @m[param.name])
- @v[param.name] += (1 - @beta2) * (param.grad ** 2 - @v[param.name])
+ @v[param.name] += (1 - @beta2) * (param.grad**2 - @v[param.name])
if @amsgrad
@s[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
@s[param.name] = Xumo::SFloat.maximum(@s[param.name], @v[param.name])
param.data -= lr * @m[param.name] / Xumo::NMath.sqrt(@s[param.name] + @eps)
else
@@ -320,11 +316,10 @@
initialize(alpha: hash[:alpha], beta1: hash[:beta1], beta2: hash[:beta2],
eps: hash[:eps], amsgrad: hash[:amsgrad], clip_norm: hash[:clip_norm])
end
end
-
class AdaBound < Adam
attr_accessor :final_lr
attr_accessor :gamma
# @param [Float] final_lr Final learning rate.
@@ -342,18 +337,18 @@
}
end
private def update_params(params)
@t += 1
- lr = @alpha * Math.sqrt(1 - @beta2 ** @t) / (1 - @beta1 ** @t)
+ lr = @alpha * Math.sqrt(1 - @beta2**@t) / (1 - @beta1**@t)
final_lr = @final_lr * lr / @alpha
lower_bound = final_lr * (1 - 1 / (@gamma * @t + 1))
upper_bound = final_lr * (1 + 1 / (@gamma * @t))
params.each do |param|
@m[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
@v[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
@m[param.name] += (1 - @beta1) * (param.grad - @m[param.name])
- @v[param.name] += (1 - @beta2) * (param.grad ** 2 - @v[param.name])
+ @v[param.name] += (1 - @beta2) * (param.grad**2 - @v[param.name])
if @amsgrad
@s[param.name] ||= Xumo::SFloat.zeros(*param.data.shape)
@s[param.name] = Xumo::SFloat.maximum(@s[param.name], @v[param.name])
param.data -= clip_lr(lr / (Xumo::NMath.sqrt(@s[param.name]) + @eps), lower_bound, upper_bound) * @m[param.name]
else