lib/dnn/core/layers.rb in ruby-dnn-0.4.4 vs lib/dnn/core/layers.rb in ruby-dnn-0.5.0

- old
+ new

@@ -156,270 +156,9 @@ @weight_initializer.init_param(self, :weight) @bias_initializer.init_param(self, :bias) end end - - #This module is used for convolution. - module Conv2DModule - private - - def im2col(img, out_h, out_w, fil_h, fil_w, strides) - bsize = img.shape[0] - ch = img.shape[3] - col = SFloat.zeros(bsize, ch, fil_h, fil_w, out_h, out_w) - img = img.transpose(0, 3, 1, 2) - (0...fil_h).each do |i| - i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a - (0...fil_w).each do |j| - j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a - col[true, true, i, j, true, true] = img[true, true, i_range, j_range] - end - end - col.transpose(0, 4, 5, 2, 3, 1).reshape(bsize * out_h * out_w, fil_h * fil_w * ch) - end - - def col2im(col, img_shape, out_h, out_w, fil_h, fil_w, strides) - bsize, img_h, img_w, ch = img_shape - col = col.reshape(bsize, out_h, out_w, fil_h, fil_w, ch).transpose(0, 5, 3, 4, 1, 2) - img = SFloat.zeros(bsize, ch, img_h, img_w) - (0...fil_h).each do |i| - i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a - (0...fil_w).each do |j| - j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a - img[true, true, i_range, j_range] += col[true, true, i, j, true, true] - end - end - img.transpose(0, 2, 3, 1) - end - - def padding(img, pad) - bsize, img_h, img_w, ch = img.shape - img2 = SFloat.zeros(bsize, img_h + pad[0], img_w + pad[1], ch) - i_begin = pad[0] / 2 - i_end = i_begin + img_h - j_begin = pad[1] / 2 - j_end = j_begin + img_w - img2[true, i_begin...i_end, j_begin...j_end, true] = img - img2 - end - - def back_padding(img, pad) - i_begin = pad[0] / 2 - i_end = img.shape[1] - (pad[0] / 2.0).round - j_begin = pad[1] / 2 - j_end = img.shape[2] - (pad[1] / 2.0).round - img[true, i_begin...i_end, j_begin...j_end, true] - end - - def out_size(prev_h, prev_w, fil_h, fil_w, strides) - out_h = (prev_h - fil_h) / strides[0] + 1 - out_w = (prev_w - fil_w) / strides[1] + 1 - [out_h, out_w] - end - end - - - class Conv2D < HasParamLayer - include Initializers - include Conv2DModule - - attr_reader :num_filters - attr_reader :filter_size - attr_reader :strides - attr_reader :weight_decay - - def initialize(num_filters, filter_size, - weight_initializer: nil, - bias_initializer: nil, - strides: 1, - padding: false, - weight_decay: 0) - super() - @num_filters = num_filters - @filter_size = filter_size.is_a?(Integer) ? [filter_size, filter_size] : filter_size - @weight_initializer = (weight_initializer || RandomNormal.new) - @bias_initializer = (bias_initializer || Zeros.new) - @strides = strides.is_a?(Integer) ? [strides, strides] : strides - @padding = padding - @weight_decay = weight_decay - end - - def self.load_hash(hash) - Conv2D.new(hash[:num_filters], hash[:filter_size], - weight_initializer: Util.load_hash(hash[:weight_initializer]), - bias_initializer: Util.load_hash(hash[:bias_initializer]), - strides: hash[:strides], - padding: hash[:padding], - weight_decay: hash[:weight_decay]) - end - - def build(model) - super - prev_h, prev_w = prev_layer.shape[0..1] - @out_size = out_size(prev_h, prev_w, *@filter_size, @strides) - out_w, out_h = @out_size - if @padding - @pad = [prev_h - out_h, prev_w - out_w] - @out_size = [prev_h, prev_w] - end - end - - def forward(x) - x = padding(x, @pad) if @padding - @x_shape = x.shape - @col = im2col(x, *@out_size, *@filter_size, @strides) - out = @col.dot(@params[:weight]) - out.reshape(x.shape[0], *@out_size, out.shape[3]) - end - - def backward(dout) - dout = dout.reshape(dout.shape[0..2].reduce(:*), dout.shape[3]) - @grads[:weight] = @col.transpose.dot(dout) - if @weight_decay > 0 - dridge = @weight_decay * @params[:weight] - @grads[:weight] += dridge - end - @grads[:bias] = dout.sum(0) - dcol = dout.dot(@params[:weight].transpose) - dx = col2im(dcol, @x_shape, *@out_size, *@filter_size, @strides) - @padding ? back_padding(dx, @pad) : dx - end - - def shape - [*@out_size, @num_filters] - end - - def to_hash - super({num_filters: @num_filters, - filter_size: @filter_size, - weight_initializer: @weight_initializer.to_hash, - bias_initializer: @bias_initializer.to_hash, - strides: @strides, - padding: @padding, - weight_decay: @weight_decay}) - end - - private - - def init_params - num_prev_filter = prev_layer.shape[2] - @params[:weight] = SFloat.new(num_prev_filter * @filter_size.reduce(:*), @num_filters) - @params[:bias] = SFloat.new(@num_filters) - @weight_initializer.init_param(self, :weight) - @bias_initializer.init_param(self, :bias) - end - end - - - class MaxPool2D < Layer - include Conv2DModule - - attr_reader :pool_size - attr_reader :strides - - def self.load_hash(hash) - MaxPool2D.new(hash[:pool_size], strides: hash[:strides], padding: hash[:padding]) - end - - def initialize(pool_size, strides: nil, padding: false) - super() - @pool_size = pool_size.is_a?(Integer) ? [pool_size, pool_size] : pool_size - @strides = if strides - strides.is_a?(Integer) ? [strides, strides] : strides - else - @pool_size.clone - end - @padding = padding - end - - def build(model) - super - prev_w, prev_h = prev_layer.shape[0..1] - @num_channel = prev_layer.shape[2] - @out_size = out_size(prev_h, prev_w, *@pool_size, @strides) - out_w, out_h = @out_size - if @padding - @pad = [prev_h - out_h, prev_w - out_w] - @out_size = [prev_h, prev_w] - end - end - - def forward(x) - x = padding(x, @pad) if @padding - @x_shape = x.shape - col = im2col(x, *@out_size, *@pool_size, @strides) - col = col.reshape(x.shape[0] * @out_size.reduce(:*) * x.shape[3], @pool_size.reduce(:*)) - @max_index = col.max_index(1) - col.max(1).reshape(x.shape[0], *@out_size, x.shape[3]) - end - - def backward(dout) - dmax = SFloat.zeros(dout.size * @pool_size.reduce(:*)) - dmax[@max_index] = dout.flatten - dcol = dmax.reshape(dout.shape[0..2].reduce(:*), dout.shape[3] * @pool_size.reduce(:*)) - dx = col2im(dcol, @x_shape, *@out_size, *@pool_size, @strides) - @padding ? back_padding(dx, @pad) : dx - end - - def shape - [*@out_size, @num_channel] - end - - def to_hash - super({pool_width: @pool_width, - pool_height: @pool_height, - strides: @strides, - padding: @padding}) - end - end - - - class UnPool2D < Layer - attr_reader :unpool_size - - def initialize(unpool_size) - super() - @unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size - end - - def self.load_hash(hash) - UnPool2D.new(hash[:unpool_size]) - end - - def build(model) - super - prev_h, prev_w = prev_layer.shape[0..1] - unpool_h, unpool_w = @unpool_size - out_h = prev_h * unpool_h - out_w = prev_w * unpool_w - @out_size = [out_h, out_w] - @num_channel = prev_layer.shape[2] - end - - def forward(x) - @x_shape = x.shape - unpool_h, unpool_w = @unpool_size - x2 = SFloat.zeros(x.shape[0], x.shape[1], unpool_h, x.shape[2], unpool_w, @num_channel) - x2[true, true, 0, true, 0, true] = x - x2.reshape(x.shape[0], *@out_size, x.shape[3]) - end - - def backward(dout) - unpool_h, unpool_w = @unpool_size - dout = dout.reshape(dout.shape[0], @x_shape[0], unpool_h, @x_shape[1], unpool_w, @num_channel) - dout[true, true, 0, true, 0, true].clone - end - - def shape - [@out_width, @out_height, @num_channel] - end - - def to_hash - super({unpool_size: @unpool_size}) - end - end - class Flatten < Layer def forward(x) @shape = x.shape x.reshape(x.shape[0], x.shape[1..-1].reduce(:*))