lib/dnn/core/optimizers.rb in ruby-dnn-0.8.6 vs lib/dnn/core/optimizers.rb in ruby-dnn-0.8.7
- old
+ new
@@ -35,11 +35,11 @@
@momentum = momentum
@v = {}
end
def update(params)
- params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
+ params.select { |key, param| param.grad }.each_value do |param|
amount = param.grad * @learning_rate
if @momentum > 0
@v[param] ||= 0
amount += @momentum * @v[param]
@v[param] = amount
@@ -62,11 +62,11 @@
def initialize(learning_rate = 0.01, momentum: 0.9)
super(learning_rate, momentum: momentum)
end
def update(params)
- params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
+ params.select { |key, param| param.grad }.each_value do |param|
@v[param] ||= 0
amount = param.grad * @learning_rate
@v[param] = @v[param] * @momentum - amount
param.data = (param.data + @momentum**2 * @v[param]) - (1 + @momentum) * amount
end
@@ -83,11 +83,11 @@
def self.load_hash(hash)
self.new(hash[:learning_rate])
end
def update(params)
- params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
+ params.select { |key, param| param.grad }.each_value do |param|
@g[param] ||= 0
@g[param] += param.grad**2
param.data -= (@learning_rate / Xumo::NMath.sqrt(@g[param] + 1e-7)) * param.grad
end
end
@@ -106,11 +106,11 @@
@alpha = alpha
@g = {}
end
def update(params)
- params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
+ params.select { |key, param| param.grad }.each_value do |param|
@g[param] ||= 0
@g[param] = @alpha * @g[param] + (1 - @alpha) * param.grad**2
param.data -= (@learning_rate / Xumo::NMath.sqrt(@g[param] + 1e-7)) * param.grad
end
end
@@ -134,11 +134,11 @@
@h = {}
@s = {}
end
def update(params)
- params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
+ params.select { |key, param| param.grad }.each_value do |param|
@h[param] ||= Xumo::SFloat.zeros(*param.data.shape)
@s[param] ||= Xumo::SFloat.zeros(*param.data.shape)
@h[param] = @rho * @h[param] + (1 - @rho) * param.grad**2
v = (Xumo::NMath.sqrt(@s[param] + 1e-6) / Xumo::NMath.sqrt(@h[param] + 1e-6)) * param.grad
@s[param] = @rho * @s[param] + (1 - @rho) * v**2
@@ -170,10 +170,10 @@
end
def update(params)
@iter += 1
lr = @learning_rate * Math.sqrt(1 - @beta2**@iter) / (1 - @beta1**@iter)
- params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param|
+ params.select { |key, param| param.grad }.each_value do |param|
@m[param] ||= 0
@v[param] ||= 0
@m[param] += (1 - @beta1) * (param.grad - @m[param])
@v[param] += (1 - @beta2) * (param.grad**2 - @v[param])
param.data -= lr * @m[param] / Xumo::NMath.sqrt(@v[param] + 1e-7)