module DNN module Layers # Super class of all optimizer classes. class Layer include Xumo def initialize @built = false end # Build the layer. def build(model) @built = true @model = model end # Does the layer have already been built? def built? @built end # Forward propagation. def forward() end # Backward propagation. def backward() end # Get the shape of the layer. def shape prev_layer.shape end # Layer to a hash. def to_hash(hash) {name: self.class.name}.merge(hash) end # Get the previous layer. def prev_layer @model.layers[@model.layers.index(self) - 1] end end # This class is a superclass of all classes with learning parameters. class HasParamLayer < Layer attr_reader :params # The parameters of the layer. attr_reader :grads # Differential value of parameter of layer. def initialize super @params = {} @grads = {} end def build(model) super init_params end # Update the parameters. def update @model.optimizer.update(self) end private # Initialize of the parameters. def init_params() end end class InputLayer < Layer attr_reader :shape def self.load_hash(hash) self.new(hash[:shape]) end def initialize(dim_or_shape) super() @shape = dim_or_shape.is_a?(Array) ? dim_or_shape : [dim_or_shape] end def forward(x) x end def backward(dout) dout end def to_hash super({shape: @shape}) end end class Dense < HasParamLayer include Initializers attr_reader :num_nodes attr_reader :weight_decay def self.load_hash(hash) self.new(hash[:num_nodes], weight_initializer: Util.load_hash(hash[:weight_initializer]), bias_initializer: Util.load_hash(hash[:bias_initializer]), weight_decay: hash[:weight_decay]) end def initialize(num_nodes, weight_initializer: nil, bias_initializer: nil, weight_decay: 0) super() @num_nodes = num_nodes @weight_initializer = (weight_initializer || RandomNormal.new) @bias_initializer = (bias_initializer || Zeros.new) @weight_decay = weight_decay end def forward(x) @x = x @x.dot(@params[:weight]) + @params[:bias] end def backward(dout) @grads[:weight] = @x.transpose.dot(dout) if @weight_decay > 0 dridge = @weight_decay * @params[:weight] @grads[:weight] += dridge end @grads[:bias] = dout.sum(0) dout.dot(@params[:weight].transpose) end def shape [@num_nodes] end def to_hash super({num_nodes: @num_nodes, weight_initializer: @weight_initializer.to_hash, bias_initializer: @bias_initializer.to_hash, weight_decay: @weight_decay}) end private def init_params num_prev_nodes = prev_layer.shape[0] @params[:weight] = SFloat.new(num_prev_nodes, @num_nodes) @params[:bias] = SFloat.new(@num_nodes) @weight_initializer.init_param(self, :weight) @bias_initializer.init_param(self, :bias) end end class Flatten < Layer def forward(x) @shape = x.shape x.reshape(x.shape[0], x.shape[1..-1].reduce(:*)) end def backward(dout) dout.reshape(*@shape) end def shape [prev_layer.shape.reduce(:*)] end end class Reshape < Layer attr_reader :shape def initialize(shape) super() @shape = shape @x_shape = nil end def self.load_hash(hash) self.new(hash[:shape]) end def forward(x) @x_shape = x.shape x.reshape(*@shape) end def backward(dout) dout.reshape(@x_shape) end def to_hash super({shape: @shape}) end end class OutputLayer < Layer private def ridge 0.5 * @model.layers.select { |layer| layer.respond_to?(:weight_decay) } .reduce(0) { |sum, layer| layer.weight_decay * (layer.params[:weight]**2).sum } end end class Dropout < Layer attr_reader :dropoit_ratio def initialize(dropout_ratio) super() @dropout_ratio = dropout_ratio @mask = nil end def self.load_hash(hash) self.new(hash[:dropout_ratio]) end def self.load(hash) self.new(hash[:dropout_ratio]) end def forward(x) if @model.training? @mask = SFloat.ones(*x.shape).rand < @dropout_ratio x[@mask] = 0 else x *= (1 - @dropout_ratio) end x end def backward(dout) dout[@mask] = 0 if @model.training? dout end def to_hash super({dropout_ratio: @dropout_ratio}) end end class BatchNormalization < HasParamLayer attr_reader :momentum def initialize(momentum: 0.9, running_mean: nil, running_var: nil) super() @momentum = momentum @running_mean = running_mean @running_var = running_var end def self.load_hash(hash) running_mean = SFloat.cast(hash[:running_mean]) running_var = SFloat.cast(hash[:running_var]) self.new(momentum: hash[:momentum], running_mean: running_mean, running_var: running_var) end def build(model) super @running_mean ||= SFloat.zeros(*shape) @running_var ||= SFloat.zeros(*shape) end def forward(x) if @model.training? mean = x.mean(0) @xc = x - mean var = (@xc**2).mean(0) @std = NMath.sqrt(var + 1e-7) xn = @xc / @std @xn = xn @running_mean = @momentum * @running_mean + (1 - @momentum) * mean @running_var = @momentum * @running_var + (1 - @momentum) * var else xc = x - @running_mean xn = xc / NMath.sqrt(@running_var + 1e-7) end @params[:gamma] * xn + @params[:beta] end def backward(dout) batch_size = dout.shape[0] @grads[:beta] = dout.sum(0) @grads[:gamma] = (@xn * dout).sum(0) dxn = @params[:gamma] * dout dxc = dxn / @std dstd = -((dxn * @xc) / (@std**2)).sum(0) dvar = 0.5 * dstd / @std dxc += (2.0 / batch_size) * @xc * dvar dmean = dxc.sum(0) dxc - dmean / batch_size end def to_hash super({momentum: @momentum, running_mean: @running_mean.to_a, running_var: @running_var.to_a}) end private def init_params @params[:gamma] = SFloat.ones(*shape) @params[:beta] = SFloat.zeros(*shape) end end end end