model.rb in ruby-dnn-0.10.0

- old
+ new

@@ -20,18 +20,18 @@
     # Load json model.
     # @param [String] json_str json string to load model.
     # @return [DNN::Model]
     def self.load_json(json_str)
       hash = JSON.parse(json_str, symbolize_names: true)
-      model = self.load_hash(hash)
-      model.compile(Utils.load_hash(hash[:optimizer]), Utils.load_hash(hash[:loss]))
+      model = self.from_hash(hash)
+      model.compile(Utils.from_hash(hash[:optimizer]), Utils.from_hash(hash[:loss]))
       model
     end
 
-    def self.load_hash(hash)
+    def self.from_hash(hash)
       model = self.new
-      model.layers = hash[:layers].map { |hash_layer| Utils.load_hash(hash_layer) }
+      model.layers = hash[:layers].map { |hash_layer| Utils.from_hash(hash_layer) }
       model
     end
   
     def initialize
       @layers = []
@@ -95,67 +95,65 @@
 
     # Add layer to the model.
     # @param [DNN::Layers::Layer] layer Layer to add to the model.
     # @return [DNN::Model] return self.
     def <<(layer)
-      # Due to a bug in saving nested models, temporarily prohibit model nesting.
-      # if !layer.is_a?(Layers::Layer) && !layer.is_a?(Model)
-      #   raise TypeError.new("layer is not an instance of the DNN::Layers::Layer class or DNN::Model class.")
-      # end
-      unless layer.is_a?(Layers::Layer)
-        raise TypeError.new("layer:#{layer.class.name} is not an instance of the DNN::Layers::Layer class.")
+      if !layer.is_a?(Layers::Layer) && !layer.is_a?(Model)
+        raise TypeError.new("layer is not an instance of the DNN::Layers::Layer class or DNN::Model class.")
       end
       @layers << layer
       self
     end
 
-    # Set optimizer and loss to model and build all layers.
+    # Set optimizer and loss_func to model and build all layers.
     # @param [DNN::Optimizers::Optimizer] optimizer Optimizer to use for learning.
-    # @param [DNN::Losses::Loss] loss Lptimizer to use for learning.
-    def compile(optimizer, loss)
+    # @param [DNN::Losses::Loss] loss_func Loss function to use for learning.
+    def compile(optimizer, loss_func)
       raise DNN_Error.new("The model is already compiled.") if compiled?
       unless optimizer.is_a?(Optimizers::Optimizer)
         raise TypeError.new("optimizer:#{optimizer.class} is not an instance of DNN::Optimizers::Optimizer class.")
       end
-      unless loss.is_a?(Losses::Loss)
-        raise TypeError.new("loss:#{loss.class} is not an instance of DNN::Losses::Loss class.")
+      unless loss_func.is_a?(Losses::Loss)
+        raise TypeError.new("loss_func:#{loss_func.class} is not an instance of DNN::Losses::Loss class.")
       end
       @compiled = true
       layers_check
       @optimizer = optimizer
-      @loss = loss
+      @loss_func = loss_func
       build
       layers_shape_check
     end
 
-    # Set optimizer and loss to model and recompile. But does not build layers.
+    # Set optimizer and loss_func to model and recompile. But does not build layers.
     # @param [DNN::Optimizers::Optimizer] optimizer Optimizer to use for learning.
-    # @param [DNN::Losses::Loss] loss Lptimizer to use for learning.
-    def recompile(optimizer, loss)
+    # @param [DNN::Losses::Loss] loss_func Loss function to use for learning.
+    def recompile(optimizer, loss_func)
       unless optimizer.is_a?(Optimizers::Optimizer)
         raise TypeError.new("optimizer:#{optimizer.class} is not an instance of DNN::Optimizers::Optimizer class.")
       end
-      unless loss.is_a?(Losses::Loss)
-        raise TypeError.new("loss:#{loss.class} is not an instance of DNN::Losses::Loss class.")
+      unless loss_func.is_a?(Losses::Loss)
+        raise TypeError.new("loss_func:#{loss_func.class} is not an instance of DNN::Losses::Loss class.")
       end
       @compiled = true
       layers_check
       @optimizer = optimizer
-      @loss = loss
+      @loss_func = loss_func
       layers_shape_check
     end
 
     def build(super_model = nil)
       @super_model = super_model
       shape = if super_model
-        super_model.output_shape
+        super_model.get_prev_layer(self).output_shape
       else
         @layers.first.build
       end
-      @layers[1..-1].each do |layer|
+      layers = super_model ? @layers : @layers[1..-1]
+      layers.each do |layer|
         if layer.is_a?(Model)
           layer.build(self)
+          layer.recompile(@optimizer, @loss_func)
         else
           layer.build(shape)
         end
         shape = layer.output_shape
       end
@@ -172,17 +170,17 @@
     end
 
     # @return [DNN::Optimizers::Optimizer] optimizer Return the optimizer to use for learning.
     def optimizer
       raise DNN_Error.new("The model is not compiled.") unless compiled?
-      @optimizer ? @optimizer : @super_model.optimizer
+      @optimizer
     end
 
     # @return [DNN::Losses::Loss] loss Return the loss to use for learning.
-    def loss
+    def loss_func
       raise DNN_Error.new("The model is not compiled.") unless compiled?
-      @loss ? @loss : @super_model.loss
+      @loss_func
     end
 
     # @return [Bool] Returns whether the model is learning.
     def compiled?
       @compiled
@@ -193,29 +191,35 @@
     # @param [Numo::SFloat] x Input training data.
     # @param [Numo::SFloat] y Output training data.
     # @param [Integer] epochs Number of training.
     # @param [Integer] batch_size Batch size used for one training.
     # @param [Array or NilClass] test If you to test the model for every 1 epoch,
-    #     specify [x_test, y_test]. Don't test to the model, specify nil.                     
+    #                            specify [x_test, y_test]. Don't test to the model, specify nil.
     # @param [Bool] verbose Set true to display the log. If false is set, the log is not displayed.
-    # @param [Proc] batch_proc Set proc to process per batch.
-    # @yield [epoch] Process performed before one training.
+    # @param [Lambda] before_epoch_cbk Process performed before one training.
+    # @param [Lambda] after_epoch_cbk Process performed after one training.
+    # @param [Lambda] before_batch_cbk Set the proc to be performed before batch processing.
+    # @param [Lambda] after_batch_cbk Set the proc to be performed after batch processing.
     def train(x, y, epochs,
               batch_size: 1,
               test: nil,
               verbose: true,
-              batch_proc: nil,
-              &epoch_proc)
+              before_epoch_cbk: nil,
+              after_epoch_cbk: nil,
+              before_batch_cbk: nil,
+              after_batch_cbk: nil)
       raise DNN_Error.new("The model is not compiled.") unless compiled?
       check_xy_type(x, y)
       dataset = Dataset.new(x, y)
       num_train_datas = x.shape[0]
       (1..epochs).each do |epoch|
+        before_epoch_cbk.call(epoch) if before_epoch_cbk
         puts "【 epoch #{epoch}/#{epochs} 】" if verbose
         (num_train_datas.to_f / batch_size).ceil.times do |index|
-          x_batch, y_batch = dataset.get_batch(batch_size)
-          loss_value = train_on_batch(x_batch, y_batch, &batch_proc)
+          x_batch, y_batch = dataset.next_batch(batch_size)
+          loss_value = train_on_batch(x_batch, y_batch,
+                                      before_batch_cbk: before_batch_cbk, after_batch_cbk: after_batch_cbk)
           if loss_value.is_a?(Numo::SFloat)
             loss_value = loss_value.mean
           elsif loss_value.nan?
             puts "\nloss is nan" if verbose
             return
@@ -234,67 +238,75 @@
           end
           log << "  #{num_trained_datas}/#{num_train_datas} loss: #{sprintf('%.8f', loss_value)}"
           print log if verbose
         end
         if verbose && test
-          acc = accurate(test[0], test[1], batch_size, &batch_proc)
-          print "  accurate: #{acc}"
+          acc, test_loss = accurate(test[0], test[1], batch_size,
+                                    before_batch_cbk: before_batch_cbk, after_batch_cbk: after_batch_cbk)
+          print "  accurate: #{acc}, test loss: #{sprintf('%.8f', test_loss)}"
         end
         puts "" if verbose
-        epoch_proc.call(epoch) if epoch_proc
+        after_epoch_cbk.call(epoch) if after_epoch_cbk
       end
     end
   
     # Training once.
     # Compile the model before use this method.
     # @param [Numo::SFloat] x Input training data.
     # @param [Numo::SFloat] y Output training data.
+    # @param [Lambda] before_batch_cbk Set the proc to be performed before batch processing.
+    # @param [Lambda] after_batch_cbk Set the proc to be performed after batch processing.
     # @return [Float | Numo::SFloat] Return loss value in the form of Float or Numo::SFloat.
-    # @yield [x, y] batch_proc Set proc to process per batch.
-    def train_on_batch(x, y, &batch_proc)
+    def train_on_batch(x, y, before_batch_cbk: nil, after_batch_cbk: nil)
       raise DNN_Error.new("The model is not compiled.") unless compiled?
       check_xy_type(x, y)
       input_data_shape_check(x, y)
-      x, y = batch_proc.call(x, y) if batch_proc
-      out = forward(x, true)
-      loss_value = @loss.forward(out, y, get_all_layers)
-      dout = @loss.backward(y)
-      backward(dout)
-      @loss.regularizes_backward(get_all_layers)
+      x, y = before_batch_cbk.call(x, y, true) if before_batch_cbk
+      x = forward(x, true)
+      loss_value = @loss_func.forward(x, y, get_all_layers)
+      dy = @loss_func.backward(y, get_all_layers)
+      backward(dy)
       update
+      after_batch_cbk.call(loss_value, true) if after_batch_cbk
       loss_value
     end
   
     # Evaluate model and get accurate of test data.
     # @param [Numo::SFloat] x Input test data.
     # @param [Numo::SFloat] y Output test data.
-    # @yield [x, y] batch_proc Set proc to process per batch.
-    def accurate(x, y, batch_size = 100, &batch_proc)
+    # @param [Lambda] before_batch_cbk Set the proc to be performed before batch processing.
+    # @param [Lambda] after_batch_cbk Set the proc to be performed after batch processing.
+    # @return [Array] Returns the test data accurate and mean loss in the form [accurate, mean_loss].
+    def accurate(x, y, batch_size = 100, before_batch_cbk: nil, after_batch_cbk: nil)
       check_xy_type(x, y)
       input_data_shape_check(x, y)
       batch_size = batch_size >= x.shape[0] ? x.shape[0] : batch_size
+      dataset = Dataset.new(x, y, false)
       correct = 0
+      sum_loss = 0
       (x.shape[0].to_f / batch_size).ceil.times do |i|
-        x_batch = Xumo::SFloat.zeros(batch_size, *x.shape[1..-1])
-        y_batch = Xumo::SFloat.zeros(batch_size, *y.shape[1..-1])
+        x_batch, y_batch = dataset.next_batch(batch_size)
+        x_batch, y_batch = before_batch_cbk.call(x_batch, y_batch, true) if before_batch_cbk
+        x_batch = forward(x_batch, false)
+        sigmoid = Sigmoid.new
         batch_size.times do |j|
-          k = i * batch_size + j
-          break if k >= x.shape[0]
-          x_batch[j, false] = x[k, false]
-          y_batch[j, false] = y[k, false]
-        end
-        x_batch, y_batch = batch_proc.call(x_batch, y_batch) if batch_proc
-        out = forward(x_batch, false)
-        batch_size.times do |j|
           if @layers.last.output_shape == [1]
-            correct += 1 if out[j, 0].round == y_batch[j, 0].round
+            if @loss_func.is_a?(SigmoidCrossEntropy)
+              correct += 1 if sigmoid.forward(x_batch[j, 0]).round == y_batch[j, 0].round
+            else
+              correct += 1 if x_batch[j, 0].round == y_batch[j, 0].round
+            end
           else
-            correct += 1 if out[j, true].max_index == y_batch[j, true].max_index
+            correct += 1 if x_batch[j, true].max_index == y_batch[j, true].max_index
           end
         end
+        loss_value = @loss_func.forward(x_batch, y_batch, get_all_layers)
+        after_batch_cbk.call(loss_value, false) if after_batch_cbk
+        sum_loss += loss_value.is_a?(Numo::SFloat) ? loss_value.mean : loss_value
       end
-      correct.to_f / x.shape[0]
+      mean_loss = sum_loss / batch_size
+      [correct.to_f / x.shape[0], mean_loss]
     end
 
     # Predict data.
     # @param [Numo::SFloat] x Input data.
     def predict(x)
@@ -308,10 +320,21 @@
     def predict1(x)
       check_xy_type(x)
       predict(x.reshape(1, *x.shape))[0, false]
     end
 
+    # Get loss value.
+    # @param [Numo::SFloat] x Input data.
+    # @param [Numo::SFloat] y Output data.
+    # @return [Float | Numo::SFloat] Return loss value in the form of Float or Numo::SFloat.
+    def loss(x, y)
+      check_xy_type(x, y)
+      input_data_shape_check(x, y)
+      x = forward(x, false)
+      @loss_func.forward(x, y, get_all_layers)
+    end
+
     # @return [DNN::Model] Copy this model.
     def copy
       Marshal.load(Marshal.dump(self))
     end
 
@@ -332,39 +355,39 @@
       @layers.map { |layer|
         layer.is_a?(Model) ? layer.get_all_layers : layer
       }.flatten
     end
   
-    # TODO
-    # It is not good to write the Layer class name directly in the Model class. I will fix it later.
     def forward(x, learning_phase)
       @layers.each do |layer|
-        x = if layer.is_a?(Layers::Dropout) || layer.is_a?(Layers::BatchNormalization) || layer.is_a?(Model)
+        x = if layer.is_a?(Model)
           layer.forward(x, learning_phase)
         else
+          layer.learning_phase = learning_phase
           layer.forward(x)
         end
       end
       x
     end
   
-    def backward(dout)
+    def backward(dy)
       @layers.reverse.each do |layer|
-        dout = layer.backward(dout)
+        dy = layer.backward(dy)
       end
-      dout
+      dy
     end
 
     def update
       return unless @trainable
-      @layers.each do |layer|
-        if layer.is_a?(Layers::HasParamLayer)
-          layer.update(@optimizer)
-        elsif layer.is_a?(Model)
-          layer.update
+      all_trainable_layers = @layers.map { |layer|
+        if layer.is_a?(Model)
+          layer.trainable ? layer.get_all_layers : nil
+        else
+          layer
         end
-      end
+      }.flatten.compact.uniq
+      @optimizer.update(all_trainable_layers)
     end
 
     def get_prev_layer(layer)
       layer_index = @layers.index(layer)
       prev_layer = if layer_index == 0
@@ -383,16 +406,16 @@
       end
     end
 
     def to_hash
       hash_layers = @layers.map { |layer| layer.to_hash }
-      {class: Model.name, layers: hash_layers, optimizer: @optimizer.to_hash, loss: @loss.to_hash}
+      {class: Model.name, layers: hash_layers, optimizer: @optimizer.to_hash, loss: @loss_func.to_hash}
     end
 
     private
 
     def layers_check
-      unless @layers.first.is_a?(Layers::InputLayer)
+      if !@layers.first.is_a?(Layers::InputLayer) && !@super_model
         raise TypeError.new("The first layer is not an InputLayer.")
       end
     end
 
     def input_data_shape_check(x, y = nil)