lib/dnn/core/optimizers.rb in ruby-dnn-0.1.5 vs lib/dnn/core/optimizers.rb in ruby-dnn-0.1.6

- old
+ new

@@ -9,10 +9,14 @@ @learning_rate = learning_rate end #Update layer has params. def update(layer) end + + def to_hash + {name: self.class.name, learning_rate: @learning_rate} + end end class SGD < Optimizer attr_accessor :momentum @@ -20,10 +24,14 @@ def initialize(learning_rate = 0.01, momentum: 0) super(learning_rate) @momentum = momentum @amounts = {} end + + def self.load_hash(hash) + self.new(hash[:learning_rate], hash[:momentum]) + end def update(layer) amount = if @amounts[layer] @amounts[layer] else @@ -37,18 +45,30 @@ @amounts[layer] = amount end layer.params[key] -= amount[key] end end + + def to_hash + { + name: self.class.name, + learning_rate: @learning_rate, + momentum: @momentum, + } + end end class AdaGrad def initialize(learning_rate = 0.01) super(learning_rate) @g = {} end + + def self.load_hash(hash) + @learning_rate = hash[:learning_rate] + end def update(layer) @g[layer] ||= {} layer.params.each_key do |key| @g[layer][key] ||= 0 @@ -65,19 +85,31 @@ def initialize(learning_rate = 0.001, muse = 0.9) super(learning_rate) @muse = muse @g = {} end + + def self.load_hash(hash) + self.new(hash[:learning_rate], hash[:muse]) + end def update(layer) @g[layer] ||= {} layer.params.each_key do |key| @g[layer][key] ||= 0 @g[layer][key] = @muse * @g[layer][key] + (1 - @muse) * layer.grads[key]**2 layer.params[key] -= (@learning_rate / NMath.sqrt(@g[layer][key] + 1e-7)) * layer.grads[key] end end + + def to_hash + { + name: self.class.name, + learning_rate: @learning_rate, + muse: @muse, + } + end end class Adam < Optimizer include Numo @@ -92,10 +124,14 @@ @iter = 0 @m = {} @v = {} end + def self.load_hash(hash) + self.new(hash[:learning_rate], hash[:beta1], hash[:beta2]) + end + def update(layer) @iter += 1 @m[layer] ||= {} @v[layer] ||= {} lr = @learning_rate * Math.sqrt(1 - @beta2**@iter) / (1 - @beta1**@iter) @@ -104,9 +140,18 @@ @v[layer][key] ||= 0 @m[layer][key] += (1 - @beta1) * (layer.grads[key] - @m[layer][key]) @v[layer][key] += (1 - @beta2) * (layer.grads[key]**2 - @v[layer][key]) layer.params[key] -= lr * @m[layer][key] / NMath.sqrt(@v[layer][key] + 1e-7) end + end + + def to_hash + { + name: self.class.name, + learning_rate: @learning_rate, + beta1: @beta1, + beta2: @beta2, + } end end end end