lib/dnn/core/optimizers.rb in ruby-dnn-0.6.7 vs lib/dnn/core/optimizers.rb in ruby-dnn-0.6.8

- old
+ new

@@ -93,51 +93,84 @@ end end class RMSProp < Optimizer - attr_accessor :muse + attr_accessor :alpha def self.load_hash(hash) - self.new(hash[:learning_rate], hash[:muse]) + self.new(hash[:learning_rate], alpha: hash[:alpha]) end - def initialize(learning_rate = 0.001, muse = 0.9) + def initialize(learning_rate = 0.001, alpha: 0.9) super(learning_rate) - @muse = muse + @alpha = alpha @g = {} end def update(layer) @g[layer] ||= {} layer.params.each_key do |key| @g[layer][key] ||= 0 - @g[layer][key] = @muse * @g[layer][key] + (1 - @muse) * layer.grads[key]**2 + @g[layer][key] = @alpha * @g[layer][key] + (1 - @alpha) * layer.grads[key]**2 layer.params[key] -= (@learning_rate / Xumo::NMath.sqrt(@g[layer][key] + 1e-7)) * layer.grads[key] end end def to_hash - super({muse: @muse}) + super({alpha: @alpha}) end end + class AdaDelta < Optimizer + attr_accessor :rho + + def self.load_hash(hash) + self.new(rho: hash[:rho]) + end + + def initialize(rho: 0.95) + super(nil) + @rho = rho + @h = {} + @s = {} + end + + def update(layer) + @h[layer] ||= {} + @s[layer] ||= {} + layer.params.each_key do |key| + @h[layer][key] ||= Xumo::SFloat.zeros(*layer.params[key].shape) + @s[layer][key] ||= Xumo::SFloat.zeros(*layer.params[key].shape) + @h[layer][key] = @rho * @h[layer][key] + (1 - @rho) * layer.grads[key]**2 + v = (Xumo::NMath.sqrt(@s[layer][key] + 1e-6) / Xumo::NMath.sqrt(@h[layer][key] + 1e-6)) * layer.grads[key] + @s[layer][key] = @rho * @s[layer][key] + (1 - @rho) * v**2 + layer.params[key] -= v + end + end + + def to_hash + super({rho: @rho}) + end + end + + class Adam < Optimizer attr_accessor :beta1 attr_accessor :beta2 + + def self.load_hash(hash) + self.new(hash[:learning_rate], beta1: hash[:beta1], beta2: hash[:beta2]) + end - def initialize(learning_rate = 0.001, beta1 = 0.9, beta2 = 0.999) + def initialize(learning_rate = 0.001, beta1: 0.9, beta2: 0.999) super(learning_rate) @beta1 = beta1 @beta2 = beta2 @iter = 0 @m = {} @v = {} - end - - def self.load_hash(hash) - self.new(hash[:learning_rate], hash[:beta1], hash[:beta2]) end def update(layer) @iter += 1 @m[layer] ||= {}