lib/dnn/core/layers.rb in ruby-dnn-0.7.3 vs lib/dnn/core/layers.rb in ruby-dnn-0.8.0
- old
+ new
@@ -47,16 +47,14 @@
# This class is a superclass of all classes with learning parameters.
class HasParamLayer < Layer
attr_accessor :trainable # Setting false prevents learning of parameters.
attr_reader :params # The parameters of the layer.
- attr_reader :grads # Differential value of parameter of layer.
def initialize
- super
+ super()
@params = {}
- @grads = {}
@trainable = true
end
def build(model)
@model = model
@@ -66,16 +64,17 @@
end
end
# Update the parameters.
def update
- @model.optimizer.update(self) if @trainable
+ @model.optimizer.update(@params) if @trainable
end
private
# Initialize of the parameters.
+ # Classes that inherit from this class must implement this method.
def init_params() end
end
class InputLayer < Layer
@@ -118,36 +117,38 @@
super()
@weight_initializer = (weight_initializer || RandomNormal.new)
@bias_initializer = (bias_initializer || Zeros.new)
@l1_lambda = l1_lambda
@l2_lambda = l2_lambda
+ @params[:weight] = @weight = LearningParam.new(self)
+ @params[:bias] = @bias = LearningParam.new(self)
end
def lasso
if @l1_lambda > 0
- @l1_lambda * @params[:weight].abs.sum
+ @l1_lambda * @weight.data.abs.sum
else
0
end
end
def ridge
if @l2_lambda > 0
- 0.5 * @l2_lambda * (@params[:weight]**2).sum
+ 0.5 * @l2_lambda * (@weight.data**2).sum
else
0
end
end
def dlasso
- dlasso = Xumo::SFloat.ones(*@params[:weight].shape)
- dlasso[@params[:weight] < 0] = -1
+ dlasso = Xumo::SFloat.ones(*@weight.data.shape)
+ dlasso[@weight.data < 0] = -1
@l1_lambda * dlasso
end
def dridge
- @l2_lambda * @params[:weight]
+ @l2_lambda * @weight.data
end
def to_hash(merge_hash)
super({weight_initializer: @weight_initializer.to_hash,
bias_initializer: @bias_initializer.to_hash,
@@ -156,12 +157,12 @@
end
private
def init_params
- @weight_initializer.init_param(self, :weight)
- @bias_initializer.init_param(self, :bias)
+ @weight_initializer.init_param(@weight)
+ @bias_initializer.init_param(@bias)
end
end
class Dense < Connection
@@ -185,22 +186,22 @@
@num_nodes = num_nodes
end
def forward(x)
@x = x
- @x.dot(@params[:weight]) + @params[:bias]
+ @x.dot(@weight.data) + @bias.data
end
def backward(dout)
- @grads[:weight] = @x.transpose.dot(dout)
+ @weight.grad = @x.transpose.dot(dout)
if @l1_lambda > 0
- @grads[:weight] += dlasso
+ @weight.grad += dlasso
elsif @l2_lambda > 0
- @grads[:weight] += dridge
+ @weight.grad += dridge
end
- @grads[:bias] = dout.sum(0)
- dout.dot(@params[:weight].transpose)
+ @bias.grad = dout.sum(0)
+ dout.dot(@weight.data.transpose)
end
def shape
[@num_nodes]
end
@@ -211,12 +212,12 @@
private
def init_params
num_prev_nodes = prev_layer.shape[0]
- @params[:weight] = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
- @params[:bias] = Xumo::SFloat.new(@num_nodes)
+ @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
+ @bias.data = Xumo::SFloat.new(@num_nodes)
super()
end
end
@@ -315,68 +316,66 @@
class BatchNormalization < HasParamLayer
attr_reader :momentum
def self.load_hash(hash)
- running_mean = Xumo::SFloat.cast(hash[:running_mean])
- running_var = Xumo::SFloat.cast(hash[:running_var])
- self.new(momentum: hash[:momentum], running_mean: running_mean, running_var: running_var)
+ self.new(momentum: hash[:momentum])
end
- def initialize(momentum: 0.9, running_mean: nil, running_var: nil)
+ def initialize(momentum: 0.9)
super()
@momentum = momentum
- @running_mean = running_mean
- @running_var = running_var
+ @params[:gamma] = @gamma = LearningParam.new(self)
+ @params[:beta] = @beta = LearningParam.new(self)
+ @params[:running_mean] = nil
+ @params[:running_var] = nil
end
def build(model)
super
- @running_mean ||= Xumo::SFloat.zeros(*shape)
- @running_var ||= Xumo::SFloat.zeros(*shape)
+ @params[:running_mean] ||= Xumo::SFloat.zeros(*shape)
+ @params[:running_var] ||= Xumo::SFloat.zeros(*shape)
end
def forward(x)
if @model.training?
mean = x.mean(0)
@xc = x - mean
var = (@xc**2).mean(0)
@std = Xumo::NMath.sqrt(var + 1e-7)
xn = @xc / @std
@xn = xn
- @running_mean = @momentum * @running_mean + (1 - @momentum) * mean
- @running_var = @momentum * @running_var + (1 - @momentum) * var
+ @params[:running_mean] = @momentum * @params[:running_mean] + (1 - @momentum) * mean
+ @params[:running_var] = @momentum * @params[:running_var] + (1 - @momentum) * var
else
- xc = x - @running_mean
- xn = xc / Xumo::NMath.sqrt(@running_var + 1e-7)
+ xc = x - @params[:running_mean]
+ xn = xc / Xumo::NMath.sqrt(@params[:running_var] + 1e-7)
end
- @params[:gamma] * xn + @params[:beta]
+ @gamma.data * xn + @beta.data
end
def backward(dout)
batch_size = dout.shape[0]
- @grads[:beta] = dout.sum(0)
- @grads[:gamma] = (@xn * dout).sum(0)
- dxn = @params[:gamma] * dout
+ @beta.grad = dout.sum(0)
+ @gamma.grad = (@xn * dout).sum(0)
+ dxn = @gamma.data * dout
dxc = dxn / @std
dstd = -((dxn * @xc) / (@std**2)).sum(0)
dvar = 0.5 * dstd / @std
dxc += (2.0 / batch_size) * @xc * dvar
dmean = dxc.sum(0)
dxc - dmean / batch_size
end
def to_hash
- super({momentum: @momentum,
- running_mean: @running_mean.to_a,
- running_var: @running_var.to_a})
+ super({momentum: @momentum})
end
private
def init_params
- @params[:gamma] = Xumo::SFloat.ones(*shape)
- @params[:beta] = Xumo::SFloat.zeros(*shape)
+ @gamma.data = Xumo::SFloat.ones(*shape)
+ @beta.data = Xumo::SFloat.zeros(*shape)
end
end
end
end