lib/dnn/core/optimizers.rb in ruby-dnn-0.8.6 vs lib/dnn/core/optimizers.rb in ruby-dnn-0.8.7

- old
+ new

@@ -35,11 +35,11 @@ @momentum = momentum @v = {} end def update(params) - params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param| + params.select { |key, param| param.grad }.each_value do |param| amount = param.grad * @learning_rate if @momentum > 0 @v[param] ||= 0 amount += @momentum * @v[param] @v[param] = amount @@ -62,11 +62,11 @@ def initialize(learning_rate = 0.01, momentum: 0.9) super(learning_rate, momentum: momentum) end def update(params) - params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param| + params.select { |key, param| param.grad }.each_value do |param| @v[param] ||= 0 amount = param.grad * @learning_rate @v[param] = @v[param] * @momentum - amount param.data = (param.data + @momentum**2 * @v[param]) - (1 + @momentum) * amount end @@ -83,11 +83,11 @@ def self.load_hash(hash) self.new(hash[:learning_rate]) end def update(params) - params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param| + params.select { |key, param| param.grad }.each_value do |param| @g[param] ||= 0 @g[param] += param.grad**2 param.data -= (@learning_rate / Xumo::NMath.sqrt(@g[param] + 1e-7)) * param.grad end end @@ -106,11 +106,11 @@ @alpha = alpha @g = {} end def update(params) - params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param| + params.select { |key, param| param.grad }.each_value do |param| @g[param] ||= 0 @g[param] = @alpha * @g[param] + (1 - @alpha) * param.grad**2 param.data -= (@learning_rate / Xumo::NMath.sqrt(@g[param] + 1e-7)) * param.grad end end @@ -134,11 +134,11 @@ @h = {} @s = {} end def update(params) - params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param| + params.select { |key, param| param.grad }.each_value do |param| @h[param] ||= Xumo::SFloat.zeros(*param.data.shape) @s[param] ||= Xumo::SFloat.zeros(*param.data.shape) @h[param] = @rho * @h[param] + (1 - @rho) * param.grad**2 v = (Xumo::NMath.sqrt(@s[param] + 1e-6) / Xumo::NMath.sqrt(@h[param] + 1e-6)) * param.grad @s[param] = @rho * @s[param] + (1 - @rho) * v**2 @@ -170,10 +170,10 @@ end def update(params) @iter += 1 lr = @learning_rate * Math.sqrt(1 - @beta2**@iter) / (1 - @beta1**@iter) - params.select { |key, param| param.is_a?(LearningParam) }.each_value do |param| + params.select { |key, param| param.grad }.each_value do |param| @m[param] ||= 0 @v[param] ||= 0 @m[param] += (1 - @beta1) * (param.grad - @m[param]) @v[param] += (1 - @beta2) * (param.grad**2 - @v[param]) param.data -= lr * @m[param] / Xumo::NMath.sqrt(@v[param] + 1e-7)