module DNN module Layers class BatchNormalization < HasParamLayer attr_reader :gamma attr_reader :beta attr_reader :running_mean attr_reader :running_var attr_reader :axis attr_accessor :momentum attr_accessor :eps def self.from_hash(hash) hash[:axis], momentum: hash[:momentum]) end # @param [Integer] axis The axis to normalization. # @param [Float] momentum Exponential moving average of mean and variance. # @param [Float] eps Value to avoid division by zero. def initialize(axis: 0, momentum: 0.9, eps: 1e-7) super() @axis = axis @momentum = momentum @eps = eps end def call(input) x, prev_link, learning_phase = *input build(x.shape[1..-1]) unless built? y = forward(x, learning_phase) link =, self) [y, link, learning_phase] end def build(input_shape) super @gamma =*output_shape), 0) @beta =*output_shape), 0) @running_mean =*output_shape)) @running_var =*output_shape)) end def forward(x, learning_phase) if learning_phase mean = x.mean(axis: @axis, keepdims: true) @xc = x - mean var = (@xc ** 2).mean(axis: @axis, keepdims: true) @std = Xumo::NMath.sqrt(var + @eps) xn = @xc / @std @xn = xn = @momentum * + (1 - @momentum) * mean = @momentum * + (1 - @momentum) * var else xc = x - xn = xc / Xumo::NMath.sqrt( + @eps) end * xn + end def backward(dy) batch_size = dy.shape[@axis] if @trainable @beta.grad = dy.sum(axis: @axis, keepdims: true) @gamma.grad = (@xn * dy).sum(axis: @axis, keepdims: true) end dxn = * dy dxc = dxn / @std dstd = -((dxn * @xc) / (@std ** 2)).sum(axis: @axis, keepdims: true) dvar = 0.5 * dstd / @std dxc += (2.0 / batch_size) * @xc * dvar dmean = dxc.sum(axis: @axis, keepdims: true) dxc - dmean / batch_size end def to_hash super(axis: @axis, momentum: @momentum, eps: @eps) end def get_params { gamma: @gamma, beta: @beta, running_mean: @running_mean, running_var: @running_var } end end end end