lib/dnn/core/model.rb in ruby-dnn-0.9.4 vs lib/dnn/core/model.rb in ruby-dnn-0.10.0

- old
+ new

@@ -20,18 +20,18 @@ # Load json model. # @param [String] json_str json string to load model. # @return [DNN::Model] def self.load_json(json_str) hash = JSON.parse(json_str, symbolize_names: true) - model = self.load_hash(hash) - model.compile(Utils.load_hash(hash[:optimizer]), Utils.load_hash(hash[:loss])) + model = self.from_hash(hash) + model.compile(Utils.from_hash(hash[:optimizer]), Utils.from_hash(hash[:loss])) model end - def self.load_hash(hash) + def self.from_hash(hash) model = self.new - model.layers = hash[:layers].map { |hash_layer| Utils.load_hash(hash_layer) } + model.layers = hash[:layers].map { |hash_layer| Utils.from_hash(hash_layer) } model end def initialize @layers = [] @@ -95,67 +95,65 @@ # Add layer to the model. # @param [DNN::Layers::Layer] layer Layer to add to the model. # @return [DNN::Model] return self. def <<(layer) - # Due to a bug in saving nested models, temporarily prohibit model nesting. - # if !layer.is_a?(Layers::Layer) && !layer.is_a?(Model) - # raise TypeError.new("layer is not an instance of the DNN::Layers::Layer class or DNN::Model class.") - # end - unless layer.is_a?(Layers::Layer) - raise TypeError.new("layer:#{layer.class.name} is not an instance of the DNN::Layers::Layer class.") + if !layer.is_a?(Layers::Layer) && !layer.is_a?(Model) + raise TypeError.new("layer is not an instance of the DNN::Layers::Layer class or DNN::Model class.") end @layers << layer self end - # Set optimizer and loss to model and build all layers. + # Set optimizer and loss_func to model and build all layers. # @param [DNN::Optimizers::Optimizer] optimizer Optimizer to use for learning. - # @param [DNN::Losses::Loss] loss Lptimizer to use for learning. - def compile(optimizer, loss) + # @param [DNN::Losses::Loss] loss_func Loss function to use for learning. + def compile(optimizer, loss_func) raise DNN_Error.new("The model is already compiled.") if compiled? unless optimizer.is_a?(Optimizers::Optimizer) raise TypeError.new("optimizer:#{optimizer.class} is not an instance of DNN::Optimizers::Optimizer class.") end - unless loss.is_a?(Losses::Loss) - raise TypeError.new("loss:#{loss.class} is not an instance of DNN::Losses::Loss class.") + unless loss_func.is_a?(Losses::Loss) + raise TypeError.new("loss_func:#{loss_func.class} is not an instance of DNN::Losses::Loss class.") end @compiled = true layers_check @optimizer = optimizer - @loss = loss + @loss_func = loss_func build layers_shape_check end - # Set optimizer and loss to model and recompile. But does not build layers. + # Set optimizer and loss_func to model and recompile. But does not build layers. # @param [DNN::Optimizers::Optimizer] optimizer Optimizer to use for learning. - # @param [DNN::Losses::Loss] loss Lptimizer to use for learning. - def recompile(optimizer, loss) + # @param [DNN::Losses::Loss] loss_func Loss function to use for learning. + def recompile(optimizer, loss_func) unless optimizer.is_a?(Optimizers::Optimizer) raise TypeError.new("optimizer:#{optimizer.class} is not an instance of DNN::Optimizers::Optimizer class.") end - unless loss.is_a?(Losses::Loss) - raise TypeError.new("loss:#{loss.class} is not an instance of DNN::Losses::Loss class.") + unless loss_func.is_a?(Losses::Loss) + raise TypeError.new("loss_func:#{loss_func.class} is not an instance of DNN::Losses::Loss class.") end @compiled = true layers_check @optimizer = optimizer - @loss = loss + @loss_func = loss_func layers_shape_check end def build(super_model = nil) @super_model = super_model shape = if super_model - super_model.output_shape + super_model.get_prev_layer(self).output_shape else @layers.first.build end - @layers[1..-1].each do |layer| + layers = super_model ? @layers : @layers[1..-1] + layers.each do |layer| if layer.is_a?(Model) layer.build(self) + layer.recompile(@optimizer, @loss_func) else layer.build(shape) end shape = layer.output_shape end @@ -172,17 +170,17 @@ end # @return [DNN::Optimizers::Optimizer] optimizer Return the optimizer to use for learning. def optimizer raise DNN_Error.new("The model is not compiled.") unless compiled? - @optimizer ? @optimizer : @super_model.optimizer + @optimizer end # @return [DNN::Losses::Loss] loss Return the loss to use for learning. - def loss + def loss_func raise DNN_Error.new("The model is not compiled.") unless compiled? - @loss ? @loss : @super_model.loss + @loss_func end # @return [Bool] Returns whether the model is learning. def compiled? @compiled @@ -193,29 +191,35 @@ # @param [Numo::SFloat] x Input training data. # @param [Numo::SFloat] y Output training data. # @param [Integer] epochs Number of training. # @param [Integer] batch_size Batch size used for one training. # @param [Array or NilClass] test If you to test the model for every 1 epoch, - # specify [x_test, y_test]. Don't test to the model, specify nil. + # specify [x_test, y_test]. Don't test to the model, specify nil. # @param [Bool] verbose Set true to display the log. If false is set, the log is not displayed. - # @param [Proc] batch_proc Set proc to process per batch. - # @yield [epoch] Process performed before one training. + # @param [Lambda] before_epoch_cbk Process performed before one training. + # @param [Lambda] after_epoch_cbk Process performed after one training. + # @param [Lambda] before_batch_cbk Set the proc to be performed before batch processing. + # @param [Lambda] after_batch_cbk Set the proc to be performed after batch processing. def train(x, y, epochs, batch_size: 1, test: nil, verbose: true, - batch_proc: nil, - &epoch_proc) + before_epoch_cbk: nil, + after_epoch_cbk: nil, + before_batch_cbk: nil, + after_batch_cbk: nil) raise DNN_Error.new("The model is not compiled.") unless compiled? check_xy_type(x, y) dataset = Dataset.new(x, y) num_train_datas = x.shape[0] (1..epochs).each do |epoch| + before_epoch_cbk.call(epoch) if before_epoch_cbk puts "【 epoch #{epoch}/#{epochs} 】" if verbose (num_train_datas.to_f / batch_size).ceil.times do |index| - x_batch, y_batch = dataset.get_batch(batch_size) - loss_value = train_on_batch(x_batch, y_batch, &batch_proc) + x_batch, y_batch = dataset.next_batch(batch_size) + loss_value = train_on_batch(x_batch, y_batch, + before_batch_cbk: before_batch_cbk, after_batch_cbk: after_batch_cbk) if loss_value.is_a?(Numo::SFloat) loss_value = loss_value.mean elsif loss_value.nan? puts "\nloss is nan" if verbose return @@ -234,67 +238,75 @@ end log << " #{num_trained_datas}/#{num_train_datas} loss: #{sprintf('%.8f', loss_value)}" print log if verbose end if verbose && test - acc = accurate(test[0], test[1], batch_size, &batch_proc) - print " accurate: #{acc}" + acc, test_loss = accurate(test[0], test[1], batch_size, + before_batch_cbk: before_batch_cbk, after_batch_cbk: after_batch_cbk) + print " accurate: #{acc}, test loss: #{sprintf('%.8f', test_loss)}" end puts "" if verbose - epoch_proc.call(epoch) if epoch_proc + after_epoch_cbk.call(epoch) if after_epoch_cbk end end # Training once. # Compile the model before use this method. # @param [Numo::SFloat] x Input training data. # @param [Numo::SFloat] y Output training data. + # @param [Lambda] before_batch_cbk Set the proc to be performed before batch processing. + # @param [Lambda] after_batch_cbk Set the proc to be performed after batch processing. # @return [Float | Numo::SFloat] Return loss value in the form of Float or Numo::SFloat. - # @yield [x, y] batch_proc Set proc to process per batch. - def train_on_batch(x, y, &batch_proc) + def train_on_batch(x, y, before_batch_cbk: nil, after_batch_cbk: nil) raise DNN_Error.new("The model is not compiled.") unless compiled? check_xy_type(x, y) input_data_shape_check(x, y) - x, y = batch_proc.call(x, y) if batch_proc - out = forward(x, true) - loss_value = @loss.forward(out, y, get_all_layers) - dout = @loss.backward(y) - backward(dout) - @loss.regularizes_backward(get_all_layers) + x, y = before_batch_cbk.call(x, y, true) if before_batch_cbk + x = forward(x, true) + loss_value = @loss_func.forward(x, y, get_all_layers) + dy = @loss_func.backward(y, get_all_layers) + backward(dy) update + after_batch_cbk.call(loss_value, true) if after_batch_cbk loss_value end # Evaluate model and get accurate of test data. # @param [Numo::SFloat] x Input test data. # @param [Numo::SFloat] y Output test data. - # @yield [x, y] batch_proc Set proc to process per batch. - def accurate(x, y, batch_size = 100, &batch_proc) + # @param [Lambda] before_batch_cbk Set the proc to be performed before batch processing. + # @param [Lambda] after_batch_cbk Set the proc to be performed after batch processing. + # @return [Array] Returns the test data accurate and mean loss in the form [accurate, mean_loss]. + def accurate(x, y, batch_size = 100, before_batch_cbk: nil, after_batch_cbk: nil) check_xy_type(x, y) input_data_shape_check(x, y) batch_size = batch_size >= x.shape[0] ? x.shape[0] : batch_size + dataset = Dataset.new(x, y, false) correct = 0 + sum_loss = 0 (x.shape[0].to_f / batch_size).ceil.times do |i| - x_batch = Xumo::SFloat.zeros(batch_size, *x.shape[1..-1]) - y_batch = Xumo::SFloat.zeros(batch_size, *y.shape[1..-1]) + x_batch, y_batch = dataset.next_batch(batch_size) + x_batch, y_batch = before_batch_cbk.call(x_batch, y_batch, true) if before_batch_cbk + x_batch = forward(x_batch, false) + sigmoid = Sigmoid.new batch_size.times do |j| - k = i * batch_size + j - break if k >= x.shape[0] - x_batch[j, false] = x[k, false] - y_batch[j, false] = y[k, false] - end - x_batch, y_batch = batch_proc.call(x_batch, y_batch) if batch_proc - out = forward(x_batch, false) - batch_size.times do |j| if @layers.last.output_shape == [1] - correct += 1 if out[j, 0].round == y_batch[j, 0].round + if @loss_func.is_a?(SigmoidCrossEntropy) + correct += 1 if sigmoid.forward(x_batch[j, 0]).round == y_batch[j, 0].round + else + correct += 1 if x_batch[j, 0].round == y_batch[j, 0].round + end else - correct += 1 if out[j, true].max_index == y_batch[j, true].max_index + correct += 1 if x_batch[j, true].max_index == y_batch[j, true].max_index end end + loss_value = @loss_func.forward(x_batch, y_batch, get_all_layers) + after_batch_cbk.call(loss_value, false) if after_batch_cbk + sum_loss += loss_value.is_a?(Numo::SFloat) ? loss_value.mean : loss_value end - correct.to_f / x.shape[0] + mean_loss = sum_loss / batch_size + [correct.to_f / x.shape[0], mean_loss] end # Predict data. # @param [Numo::SFloat] x Input data. def predict(x) @@ -308,10 +320,21 @@ def predict1(x) check_xy_type(x) predict(x.reshape(1, *x.shape))[0, false] end + # Get loss value. + # @param [Numo::SFloat] x Input data. + # @param [Numo::SFloat] y Output data. + # @return [Float | Numo::SFloat] Return loss value in the form of Float or Numo::SFloat. + def loss(x, y) + check_xy_type(x, y) + input_data_shape_check(x, y) + x = forward(x, false) + @loss_func.forward(x, y, get_all_layers) + end + # @return [DNN::Model] Copy this model. def copy Marshal.load(Marshal.dump(self)) end @@ -332,39 +355,39 @@ @layers.map { |layer| layer.is_a?(Model) ? layer.get_all_layers : layer }.flatten end - # TODO - # It is not good to write the Layer class name directly in the Model class. I will fix it later. def forward(x, learning_phase) @layers.each do |layer| - x = if layer.is_a?(Layers::Dropout) || layer.is_a?(Layers::BatchNormalization) || layer.is_a?(Model) + x = if layer.is_a?(Model) layer.forward(x, learning_phase) else + layer.learning_phase = learning_phase layer.forward(x) end end x end - def backward(dout) + def backward(dy) @layers.reverse.each do |layer| - dout = layer.backward(dout) + dy = layer.backward(dy) end - dout + dy end def update return unless @trainable - @layers.each do |layer| - if layer.is_a?(Layers::HasParamLayer) - layer.update(@optimizer) - elsif layer.is_a?(Model) - layer.update + all_trainable_layers = @layers.map { |layer| + if layer.is_a?(Model) + layer.trainable ? layer.get_all_layers : nil + else + layer end - end + }.flatten.compact.uniq + @optimizer.update(all_trainable_layers) end def get_prev_layer(layer) layer_index = @layers.index(layer) prev_layer = if layer_index == 0 @@ -383,16 +406,16 @@ end end def to_hash hash_layers = @layers.map { |layer| layer.to_hash } - {class: Model.name, layers: hash_layers, optimizer: @optimizer.to_hash, loss: @loss.to_hash} + {class: Model.name, layers: hash_layers, optimizer: @optimizer.to_hash, loss: @loss_func.to_hash} end private def layers_check - unless @layers.first.is_a?(Layers::InputLayer) + if !@layers.first.is_a?(Layers::InputLayer) && !@super_model raise TypeError.new("The first layer is not an InputLayer.") end end def input_data_shape_check(x, y = nil)