require "numo/narray" require "json" class NN VERSION = "2.0" include Numo attr_accessor :weights attr_accessor :biases attr_accessor :gammas attr_accessor :betas attr_accessor :learning_rate attr_accessor :batch_size attr_accessor :activation attr_accessor :momentum attr_accessor :weight_decay attr_accessor :dropout_ratio attr_reader :training def initialize(num_nodes, learning_rate: 0.01, batch_size: 1, activation: %i(relu identity), momentum: 0, weight_decay: 0, use_dropout: false, dropout_ratio: 0.5, use_batch_norm: false) SFloat.srand(rand(2 ** 64)) @num_nodes = num_nodes @learning_rate = learning_rate @batch_size = batch_size @activation = activation @momentum = momentum @weight_decay = weight_decay @use_dropout = use_dropout @dropout_ratio = dropout_ratio @use_batch_norm = use_batch_norm init_weight_and_bias init_gamma_and_beta if @use_batch_norm @training = true init_layers end def self.load(file_name) json = JSON.parse(File.read(file_name)) nn = self.new(json["num_nodes"], learning_rate: json["learning_rate"], batch_size: json["batch_size"], activation: json["activation"].map(&:to_sym), momentum: json["momentum"], weight_decay: json["weight_decay"], use_dropout: json["use_dropout"], dropout_ratio: json["dropout_ratio"], use_batch_norm: json["use_batch_norm"], ) nn.weights = json["weights"].map{|weight| SFloat.cast(weight)} nn.biases = json["biases"].map{|bias| SFloat.cast(bias)} if json["use_batch_norm"] nn.gammas = json["gammas"].map{|gamma| SFloat.cast(gamma)} nn.betas = json["betas"].map{|beta| SFloat.cast(beta)} end nn end def train(x_train, y_train, epochs, func = nil, &block) num_train_data = x_train.is_a?(SFloat) ? x_train.shape[0] : x_train.length (1..epochs).each do |epoch| loss = nil (num_train_data.to_f / @batch_size).ceil.times do loss = learn(x_train, y_train, &func) if loss.nan? puts "loss is nan" return end end puts "epoch #{epoch}/#{epochs} loss: #{loss}" block.call(epoch) if block end end def test(x_test, y_test, tolerance = 0.5, &block) acc = accurate(x_test, y_test, tolerance, &block) puts "accurate: #{acc}" acc end def accurate(x_test, y_test, tolerance = 0.5, &block) correct = 0 num_test_data = x_test.is_a?(SFloat) ? x_test.shape[0] : x_test.length (num_test_data.to_f / @batch_size).ceil.times do |i| x = SFloat.zeros(@batch_size, @num_nodes.first) y = SFloat.zeros(@batch_size, @num_nodes.last) @batch_size.times do |j| k = i * @batch_size + j break if k >= num_test_data if x_test.is_a?(SFloat) x[j, true] = x_test[k, true] y[j, true] = y_test[k, true] else x[j, true] = SFloat.cast(x_test[k]) y[j, true] = SFloat.cast(y_test[k]) end end x, y = block.call(x, y) if block out = forward(x, false) @batch_size.times do |j| vout = out[j, true] vy = y[j, true] case @activation[1] when :identity correct += 1 unless (NMath.sqrt((vout - vy) ** 2) < tolerance).to_a.include?(0) when :softmax correct += 1 if vout.max_index == vy.max_index end end end correct.to_f / num_test_data end def learn(x_train, y_train, &block) x = SFloat.zeros(@batch_size, @num_nodes.first) y = SFloat.zeros(@batch_size, @num_nodes.last) @batch_size.times do |i| if x_train.is_a?(SFloat) r = rand(x_train.shape[0]) x[i, true] = x_train[r, true] y[i, true] = y_train[r, true] else r = rand(x_train.length) x[i, true] = SFloat.cast(x_train[r]) y[i, true] = SFloat.cast(y_train[r]) end end x, y = block.call(x, y) if block forward(x) backward(y) update_weight_and_bias update_gamma_and_beta if @use_batch_norm @layers[-1].loss(y) end def run(x) if x.is_a?(Array) forward(SFloat.cast(x), false).to_a else forward(x, false) end end def save(file_name) json = { "version" => VERSION, "num_nodes" => @num_nodes, "learning_rate" => @learning_rate, "batch_size" => @batch_size, "activation" => @activation, "momentum" => @momentum, "weight_decay" => @weight_decay, "use_dropout" => @use_dropout, "dropout_ratio" => @dropout_ratio, "use_batch_norm" => @use_batch_norm, "weights" => @weights.map(&:to_a), "biases" => @biases.map(&:to_a), } if @use_batch_norm json_batch_norm = { "gammas" => @gammas, "betas" => @betas } json.merge!(json_batch_norm) end File.write(file_name, JSON.dump(json)) end private def init_weight_and_bias @weights = Array.new(@num_nodes.length - 1) @biases = Array.new(@num_nodes.length - 1) @weight_amounts = Array.new(@num_nodes.length - 1, 0) @bias_amounts = Array.new(@num_nodes.length - 1, 0) @num_nodes[0...-1].each_index do |i| weight = SFloat.new(@num_nodes[i], @num_nodes[i + 1]).rand_norm bias = SFloat.new(@num_nodes[i + 1]).rand_norm if @activation[0] == :relu @weights[i] = weight / Math.sqrt(@num_nodes[i]) * Math.sqrt(2) @biases[i] = bias / Math.sqrt(@num_nodes[i]) * Math.sqrt(2) else @weights[i] = weight / Math.sqrt(@num_nodes[i]) @biases[i] = bias / Math.sqrt(@num_nodes[i]) end end end def init_gamma_and_beta @gammas = Array.new(@num_nodes.length - 2, 1) @betas = Array.new(@num_nodes.length - 2, 0) @gamma_amounts = Array.new(@num_nodes.length - 2, 0) @beta_amounts = Array.new(@num_nodes.length - 2, 0) end def init_layers @layers = [] @num_nodes[0...-2].each_index do |i| @layers << Affine.new(self, i) @layers << BatchNorm.new(self, i) if @use_batch_norm @layers << case @activation[0] when :sigmoid Sigmoid.new when :relu ReLU.new end @layers << Dropout.new(self) if @use_dropout end @layers << Affine.new(self, -1) @layers << case @activation[1] when :identity Identity.new(self) when :softmax Softmax.new(self) end end def forward(x, training = true) @training = training @layers.each do |layer| x = layer.forward(x) end x end def backward(y) dout = @layers[-1].backward(y) @layers[0...-1].reverse.each do |layer| dout = layer.backward(dout) end end def update_weight_and_bias @layers.select{|layer| layer.is_a?(Affine)}.each.with_index do |layer, i| weight_amount = layer.d_weight.mean(0) * @learning_rate bias_amount = layer.d_bias.mean * @learning_rate if @momentum > 0 weight_amount += @momentum * @weight_amounts[i] @weight_amounts[i] = weight_amount bias_amount += @momentum * @bias_amounts[i] @bias_amounts[i] = bias_amount end @weights[i] -= weight_amount @biases[i] -= bias_amount end end def update_gamma_and_beta @layers.select{|layer| layer.is_a?(BatchNorm)}.each.with_index do |layer, i| gamma_amount = layer.d_gamma.mean * @learning_rate beta_amount = layer.d_beta.mean * @learning_rate if @momentum > 0 gamma_amount += @momentum * @gamma_amounts[i] @gamma_amounts[i] = gamma_amount beta_amount += @momentum * @beta_amounts[i] @beta_amounts[i] = beta_amount end @gammas[i] -= gamma_amount @betas[i] -= gamma_amount end end end class NN::Affine include Numo attr_reader :d_weight attr_reader :d_bias def initialize(nn, index) @nn = nn @index = index @d_weight = nil @d_bias = nil end def forward(x) @x = x @x.dot(@nn.weights[@index]) + @nn.biases[@index] end def backward(dout) x = @x.reshape(*@x.shape, 1) @d_weight = x.dot(dout.reshape(dout.shape[0], 1, dout.shape[1])) if @nn.weight_decay > 0 dridge = @nn.weight_decay * @nn.weights[@index] @d_weight += dridge end @d_bias = dout dout.dot(@nn.weights[@index].transpose) end end class NN::Sigmoid def forward(x) @out = 1.0 / (1 + Numo::NMath.exp(-x)) end def backward(dout) dout * (1.0 - @out) * @out end end class NN::ReLU def forward(x) @x = x.clone x[x < 0] = 0 x end def backward(dout) @x[@x > 0] = 1.0 @x[@x <= 0] = 0.0 dout * @x end end class NN::Identity include Numo def initialize(nn) @nn = nn end def forward(x) @out = x end def backward(y) @out - y end def loss(y) ridge = 0.5 * @nn.weight_decay * @nn.weights.reduce(0){|sum, weight| sum + (weight ** 2).sum} 0.5 * ((@out - y) ** 2).sum / @nn.batch_size + ridge end end class NN::Softmax include Numo def initialize(nn) @nn = nn end def forward(x) @out = NMath.exp(x) / NMath.exp(x).sum(1).reshape(x.shape[0], 1) end def backward(y) @out - y end def loss(y) ridge = 0.5 * @nn.weight_decay * @nn.weights.reduce(0){|sum, weight| sum + (weight ** 2).sum} -(y * NMath.log(@out + 1e-7)).sum / @nn.batch_size + ridge end end class NN::Dropout include Numo def initialize(nn) @nn = nn @mask = nil end def forward(x) if @nn.training @mask = SFloat.ones(*x.shape).rand < @nn.dropout_ratio x[@mask] = 0 else x *= (1 - @nn.dropout_ratio) end x end def backward(dout) dout[@mask] = 0 if @nn.training dout end end class NN::BatchNorm include Numo attr_reader :d_gamma attr_reader :d_beta def initialize(nn, index) @nn = nn @index = index end def forward(x) @x = x @mean = x.mean(0) @xc = x - @mean @var = (@xc ** 2).mean(0) @std = NMath.sqrt(@var + 1e-7) @xn = @xc / @std out = @nn.gammas[@index] * @xn + @nn.betas[@index] out.reshape(*@x.shape) end def backward(dout) @d_beta = dout.sum(0) @d_gamma = (@xn * dout).sum(0) dxn = @nn.gammas[@index] * dout dxc = dxn / @std dstd = -((dxn * @xc) / (@std ** 2)).sum(0) dvar = 0.5 * dstd / @std dxc += (2.0 / @nn.batch_size) * @xc * dvar dmean = dxc.sum(0) dx = dxc - dmean / @nn.batch_size dx.reshape(*@x.shape) end end