lib/dnn/core/normalizations.rb in ruby-dnn-0.10.1 vs lib/dnn/core/normalizations.rb in ruby-dnn-0.10.2
- old
+ new
@@ -1,72 +1,72 @@
-module DNN
- module Layers
-
- class BatchNormalization < HasParamLayer
- # @return [Integer] The axis to normalization.
- attr_reader :axis
- # @return [Float] Exponential moving average of mean and variance.
- attr_accessor :momentum
- # @return [Float] Value to avoid division by zero.
- attr_accessor :eps
-
- def self.from_hash(hash)
- self.new(axis: hash[:axis], momentum: hash[:momentum])
- end
-
- # @param [integer] axis The axis to normalization.
- # @param [Float] momentum Exponential moving average of mean and variance.
- # @param [Float] eps Value to avoid division by zero.
- def initialize(axis: 0, momentum: 0.9, eps: 1e-7)
- super()
- @axis = axis
- @momentum = momentum
- @eps = eps
- end
-
- def build(input_shape)
- super
- @params[:gamma] = @gamma = Param.new(Xumo::SFloat.ones(*output_shape), 0)
- @params[:beta] = @beta = Param.new(Xumo::SFloat.zeros(*output_shape), 0)
- @params[:running_mean] = @running_mean = Param.new(Xumo::SFloat.zeros(*output_shape))
- @params[:running_var] = @running_var = Param.new(Xumo::SFloat.zeros(*output_shape))
- end
-
- def forward(x)
- if learning_phase
- mean = x.mean(axis: @axis, keepdims: true)
- @xc = x - mean
- var = (@xc**2).mean(axis: @axis, keepdims: true)
- @std = NMath.sqrt(var + @eps)
- xn = @xc / @std
- @xn = xn
- @running_mean.data = @momentum * @running_mean.data + (1 - @momentum) * mean
- @running_var.data = @momentum * @running_var.data + (1 - @momentum) * var
- else
- xc = x - @running_mean.data
- xn = xc / NMath.sqrt(@running_var.data + @eps)
- end
- @gamma.data * xn + @beta.data
- end
-
- def backward(dy)
- batch_size = dy.shape[@axis]
- if @trainable
- @beta.grad = dy.sum(axis: @axis, keepdims: true)
- @gamma.grad = (@xn * dy).sum(axis: @axis, keepdims: true)
- end
- dxn = @gamma.data * dy
- dxc = dxn / @std
- dstd = -((dxn * @xc) / (@std**2)).sum(axis: @axis, keepdims: true)
- dvar = 0.5 * dstd / @std
- dxc += (2.0 / batch_size) * @xc * dvar
- dmean = dxc.sum(axis: @axis, keepdims: true)
- dxc - dmean / batch_size
- end
-
- def to_hash
- super({axis: @axis, momentum: @momentum, eps: @eps})
- end
- end
-
- end
-end
+module DNN
+ module Layers
+
+ class BatchNormalization < HasParamLayer
+ # @return [Integer] The axis to normalization.
+ attr_reader :axis
+ # @return [Float] Exponential moving average of mean and variance.
+ attr_accessor :momentum
+ # @return [Float] Value to avoid division by zero.
+ attr_accessor :eps
+
+ def self.from_hash(hash)
+ self.new(axis: hash[:axis], momentum: hash[:momentum])
+ end
+
+ # @param [integer] axis The axis to normalization.
+ # @param [Float] momentum Exponential moving average of mean and variance.
+ # @param [Float] eps Value to avoid division by zero.
+ def initialize(axis: 0, momentum: 0.9, eps: 1e-7)
+ super()
+ @axis = axis
+ @momentum = momentum
+ @eps = eps
+ end
+
+ def build(input_shape)
+ super
+ @params[:gamma] = @gamma = Param.new(Xumo::SFloat.ones(*output_shape), 0)
+ @params[:beta] = @beta = Param.new(Xumo::SFloat.zeros(*output_shape), 0)
+ @params[:running_mean] = @running_mean = Param.new(Xumo::SFloat.zeros(*output_shape))
+ @params[:running_var] = @running_var = Param.new(Xumo::SFloat.zeros(*output_shape))
+ end
+
+ def forward(x)
+ if learning_phase
+ mean = x.mean(axis: @axis, keepdims: true)
+ @xc = x - mean
+ var = (@xc**2).mean(axis: @axis, keepdims: true)
+ @std = NMath.sqrt(var + @eps)
+ xn = @xc / @std
+ @xn = xn
+ @running_mean.data = @momentum * @running_mean.data + (1 - @momentum) * mean
+ @running_var.data = @momentum * @running_var.data + (1 - @momentum) * var
+ else
+ xc = x - @running_mean.data
+ xn = xc / NMath.sqrt(@running_var.data + @eps)
+ end
+ @gamma.data * xn + @beta.data
+ end
+
+ def backward(dy)
+ batch_size = dy.shape[@axis]
+ if @trainable
+ @beta.grad = dy.sum(axis: @axis, keepdims: true)
+ @gamma.grad = (@xn * dy).sum(axis: @axis, keepdims: true)
+ end
+ dxn = @gamma.data * dy
+ dxc = dxn / @std
+ dstd = -((dxn * @xc) / (@std**2)).sum(axis: @axis, keepdims: true)
+ dvar = 0.5 * dstd / @std
+ dxc += (2.0 / batch_size) * @xc * dvar
+ dmean = dxc.sum(axis: @axis, keepdims: true)
+ dxc - dmean / batch_size
+ end
+
+ def to_hash
+ super({axis: @axis, momentum: @momentum, eps: @eps})
+ end
+ end
+
+ end
+end