layers.rb in ruby-dnn-0.10.0

- old
+ new

@@ -1,37 +1,46 @@
 module DNN
   module Layers
 
     # Super class of all optimizer classes.
     class Layer
+      # @return [Bool] learning_phase Return the true if learning.
+      attr_accessor :learning_phase
+      # @return [Array] Return the shape of the input data.
       attr_reader :input_shape
 
       def initialize
         @built = false
       end
 
       # Build the layer.
+      # @param [Array] input_shape Setting the shape of the input data.
       def build(input_shape)
         @input_shape = input_shape
+        @learning_phase = true
         @built = true
       end
       
       # Does the layer have already been built?
+      # @return [Bool] If layer have already been built then return true.
       def built?
         @built
       end
 
       # Forward propagation.
       def forward(x)
         raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'forward'")
       end
 
       # Backward propagation.
-      def backward(dout)
-        raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'update'")
+      def backward(dy)
+        raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'backward'")
       end
 
+      # Please reimplement this method as needed.
+      # The default implementation return input_shape.
+      # @return [Array] Return the shape of the output data.
       def output_shape
         @input_shape
       end
 
       # Layer to a hash.
@@ -53,35 +62,15 @@
       def initialize
         super()
         @params = {}
         @trainable = true
       end
-    
-      def build(input_shape)
-        @input_shape = input_shape
-        unless @built
-          @built = true
-          init_params
-        end
-      end
-    
-      # Update the parameters.
-      def update(optimizer)
-        optimizer.update(@params) if @trainable
-      end
-    
-      private
-      
-      # Initialize of the parameters.
-      def init_params
-        raise NotImplementedError.new("Class '#{self.class.name}' has implement method 'init_params'")
-      end
     end
     
     
     class InputLayer < Layer
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         self.new(hash[:input_shape])
       end
 
       def initialize(input_dim_or_shape)
         super()
@@ -95,60 +84,58 @@
 
       def forward(x)
         x
       end
     
-      def backward(dout)
-        dout
+      def backward(dy)
+        dy
       end
 
       def to_hash
         super({input_shape: @input_shape})
       end
     end
 
 
     # It is a superclass of all connection layers.
     class Connection < HasParamLayer
-      # @return [DNN::Initializers] weight initializer.
+      # @return [DNN::Initializers::Initializer] Weight initializer.
       attr_reader :weight_initializer
-      # @return [DNN::Initializers] bias initializer.
+      # @return [DNN::Initializers::Initializer] Bias initializer.
       attr_reader :bias_initializer
-      # @return [Float] L1 regularization.
-      attr_reader :l1_lambda
-      # @return [Float] L2 regularization.
-      attr_reader :l2_lambda
+      # @return [DNN::Regularizers::Regularizer] Weight regularization.
+      attr_reader :weight_regularizer
+      # @return [DNN::Regularizers::Regularizer] Bias regularization.
+      attr_reader :bias_regularizer
 
-      # @param [DNN::Initializers] weight_initializer weight initializer.
-      # @param [DNN::Initializers] bias_initializer bias initializer.
-      # @param [Float] l1_lambda L1 regularization
-      # @param [Float] l2_lambda L2 regularization
+      # @param [DNN::Initializers::Initializer] weight_initializer Weight initializer.
+      # @param [DNN::Initializers::Initializer] bias_initializer Bias initializer.
+      # @param [DNN::Regularizers::Regularizer] weight_regularizer Weight regularization.
+      # @param [DNN::Regularizers::Regularizer] bias_regularizer Bias regularization.
       # @param [Bool] use_bias whether to use bias.
       def initialize(weight_initializer: Initializers::RandomNormal.new,
                      bias_initializer: Initializers::Zeros.new,
-                     l1_lambda: 0,
-                     l2_lambda: 0,
+                     weight_regularizer: nil,
+                     bias_regularizer: nil,
                      use_bias: true)
         super()
         @weight_initializer = weight_initializer
         @bias_initializer = bias_initializer
-        @l1_lambda = l1_lambda
-        @l2_lambda = l2_lambda
-        @params[:weight] = @weight = Param.new
-        # For compatibility on or before with v0.9.3, setting use_bias to nil use bias.
-        # Therefore, setting use_bias to nil is deprecated.
-        if use_bias || use_bias == nil
-          @params[:bias] = @bias = Param.new
+        @weight_regularizer = weight_regularizer
+        @bias_regularizer = bias_regularizer
+        @params[:weight] = @weight = Param.new(nil, 0)
+        if use_bias
+          @params[:bias] = @bias = Param.new(nil, 0)
         else
-          @params[:bias] = @bias = nil
+          @bias = nil
         end
       end
 
       def regularizers
         regularizers = []
-        regularizers << Lasso.new(@l1_lambda, @weight) if @l1_lambda > 0
-        regularizers << Ridge.new(@l2_lambda, @weight) if @l2_lambda > 0
+        regularizers << @weight_regularizer if @weight_regularizer
+        regularizers << @bias_regularizer if @bias_regularizer
         regularizers
       end
 
       # @return [Bool] Return whether to use bias.
       def use_bias
@@ -156,100 +143,102 @@
       end
 
       def to_hash(merge_hash)
         super({weight_initializer: @weight_initializer.to_hash,
                bias_initializer: @bias_initializer.to_hash,
-               l1_lambda: @l1_lambda,
-               l2_lambda: @l2_lambda}.merge(merge_hash))
+               weight_regularizer: @weight_regularizer&.to_hash,
+               bias_regularizer: @bias_regularizer&.to_hash,
+               use_bias: use_bias}.merge(merge_hash))
       end
 
-      private
-
-      def init_params
+      private def init_weight_and_bias
         @weight_initializer.init_param(self, @weight)
-        @bias_initializer.init_param(self, @bias) if @bias
+        @weight_regularizer.param = @weight if @weight_regularizer
+        if @bias
+          @bias_initializer.init_param(self, @bias)
+          @bias_regularizer.param = @bias if @bias_regularizer
+        end
       end
     end
     
     
     # Full connnection layer.
     class Dense < Connection
       # @return [Integer] number of nodes.
       attr_reader :num_nodes
 
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         self.new(hash[:num_nodes],
-                 weight_initializer: Utils.load_hash(hash[:weight_initializer]),
-                 bias_initializer: Utils.load_hash(hash[:bias_initializer]),
-                 l1_lambda: hash[:l1_lambda],
-                 l2_lambda: hash[:l2_lambda],
+                 weight_initializer: Utils.from_hash(hash[:weight_initializer]),
+                 bias_initializer: Utils.from_hash(hash[:bias_initializer]),
+                 weight_regularizer: Utils.from_hash(hash[:weight_regularizer]),
+                 bias_regularizer: Utils.from_hash(hash[:bias_regularizer]),
                  use_bias: hash[:use_bias])
       end
 
       # @param [Integer] num_nodes number of nodes.
       def initialize(num_nodes,
                      weight_initializer: Initializers::RandomNormal.new,
                      bias_initializer: Initializers::Zeros.new,
-                     l1_lambda: 0,
-                     l2_lambda: 0,
+                     weight_regularizer: nil,
+                     bias_regularizer: nil,
                      use_bias: true)
         super(weight_initializer: weight_initializer, bias_initializer: bias_initializer,
-              l1_lambda: l1_lambda, l2_lambda: l2_lambda, use_bias: use_bias)
+              weight_regularizer: weight_regularizer, bias_regularizer: bias_regularizer, use_bias: use_bias)
         @num_nodes = num_nodes
       end
-    
+
+      def build(input_shape)
+        super
+        num_prev_nodes = input_shape[0]
+        @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
+        @bias.data = Xumo::SFloat.new(@num_nodes) if @bias
+        init_weight_and_bias
+      end
+
       def forward(x)
         @x = x
-        out = x.dot(@weight.data)
-        out += @bias.data if @bias
-        out
+        y = x.dot(@weight.data)
+        y += @bias.data if @bias
+        y
       end
     
-      def backward(dout)
-        @weight.grad = @x.transpose.dot(dout)
-        @bias.grad = dout.sum(0) if @bias
-        dout.dot(@weight.data.transpose)
+      def backward(dy)
+        if @trainable
+          @weight.grad += @x.transpose.dot(dy)
+          @bias.grad += dy.sum(0) if @bias
+        end
+        dy.dot(@weight.data.transpose)
       end
     
       def output_shape
         [@num_nodes]
       end
 
       def to_hash
         super({num_nodes: @num_nodes})
       end
-    
-      private
-    
-      # TODO
-      # Change writing super() other than the first.
-      def init_params
-        num_prev_nodes = @input_shape[0]
-        @weight.data = Xumo::SFloat.new(num_prev_nodes, @num_nodes)
-        @bias.data = Xumo::SFloat.new(@num_nodes) if @bias
-        super()
-      end
     end
     
 
     class Flatten < Layer
       def forward(x)
         x.reshape(x.shape[0], *output_shape)
       end
     
-      def backward(dout)
-        dout.reshape(dout.shape[0], *@input_shape)
+      def backward(dy)
+        dy.reshape(dy.shape[0], *@input_shape)
       end
 
       def output_shape
         [@input_shape.reduce(:*)]
       end
     end
 
 
     class Reshape < Layer
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         self.new(hash[:output_shape])
       end
 
       def initialize(output_shape)
         super()
@@ -258,12 +247,12 @@
 
       def forward(x)
         x.reshape(x.shape[0], *@output_shape)
       end
 
-      def backward(dout)
-        dout.reshape(dout.shape[0], *@input_shape)
+      def backward(dy)
+        dy.reshape(dy.shape[0], *@input_shape)
       end
 
       def output_shape
         @output_shape
       end
@@ -274,15 +263,15 @@
     end
 
     
     class Dropout < Layer
       # @return [Float] dropout ratio.
-      attr_reader :dropout_ratio
+      attr_accessor :dropout_ratio
       # @return [Float] Use 'weight scaling inference rule'.
       attr_reader :use_scale
 
-      def self.load_hash(hash)
+      def self.from_hash(hash)
         self.new(hash[:dropout_ratio], seed: hash[:seed], use_scale: hash[:use_scale])
       end
 
       def initialize(dropout_ratio = 0.5, seed: rand(1 << 31), use_scale: true)
         super()
@@ -290,87 +279,29 @@
         @seed = seed
         @use_scale = use_scale
         @mask = nil
       end
 
-      def forward(x, learning_phase)
+      def forward(x)
         if learning_phase
           Xumo::SFloat.srand(@seed)
           @mask = Xumo::SFloat.ones(*x.shape).rand < @dropout_ratio
           x[@mask] = 0
         else
           x *= (1 - @dropout_ratio) if @use_scale
         end
         x
       end
     
-      def backward(dout)
-        dout[@mask] = 0
-        dout
+      def backward(dy)
+        dy[@mask] = 0
+        dy
       end
 
       def to_hash
         super({dropout_ratio: @dropout_ratio, seed: @seed, use_scale: @use_scale})
       end
     end
     
-    
-    class BatchNormalization < HasParamLayer
-      # @return [Float] Exponential moving average of mean and variance.
-      attr_reader :momentum
-
-      def self.load_hash(hash)
-        self.new(momentum: hash[:momentum])
-      end
-
-      # @param [Float] momentum Exponential moving average of mean and variance.
-      def initialize(momentum: 0.9)
-        super()
-        @momentum = momentum
-      end
-
-      def forward(x, learning_phase)
-        if learning_phase
-          mean = x.mean(0)
-          @xc = x - mean
-          var = (@xc**2).mean(0)
-          @std = NMath.sqrt(var + 1e-7)
-          xn = @xc / @std
-          @xn = xn
-          @running_mean.data = @momentum * @running_mean.data + (1 - @momentum) * mean
-          @running_var.data = @momentum * @running_var.data + (1 - @momentum) * var
-        else
-          xc = x - @running_mean.data
-          xn = xc / NMath.sqrt(@running_var.data + 1e-7)
-        end
-        @gamma.data * xn + @beta.data
-      end
-    
-      def backward(dout)
-        batch_size = dout.shape[0]
-        @beta.grad = dout.sum(0)
-        @gamma.grad = (@xn * dout).sum(0)
-        dxn = @gamma.data * dout
-        dxc = dxn / @std
-        dstd = -((dxn * @xc) / (@std**2)).sum(0)
-        dvar = 0.5 * dstd / @std
-        dxc += (2.0 / batch_size) * @xc * dvar
-        dmean = dxc.sum(0)
-        dxc - dmean / batch_size
-      end
-
-      def to_hash
-        super({momentum: @momentum})
-      end
-    
-      private
-    
-      def init_params
-        @params[:gamma] = @gamma = Param.new(Xumo::SFloat.ones(*output_shape))
-        @params[:beta] = @beta = Param.new(Xumo::SFloat.zeros(*output_shape))
-        @params[:running_mean] = @running_mean = Param.new(Xumo::SFloat.zeros(*output_shape))
-        @params[:running_var] = @running_var = Param.new(Xumo::SFloat.zeros(*output_shape))
-      end
-    end
   end
   
 end