lib/dnn/core/layers.rb in ruby-dnn-0.3.2 vs lib/dnn/core/layers.rb in ruby-dnn-0.4.0

- old
+ new

@@ -159,73 +159,73 @@ @bias_initializer.init_param(self, :bias) end end - #private module - module Convert + #This module is used for convolution. + module Conv2DModule private - def im2col(img, out_w, out_h, fil_w, fil_h, strides) + def im2col(img, out_h, out_w, fil_h, fil_w, strides) bsize = img.shape[0] ch = img.shape[3] - col = SFloat.zeros(bsize, ch, fil_w, fil_h, out_w, out_h) + col = SFloat.zeros(bsize, ch, fil_h, fil_w, out_h, out_w) img = img.transpose(0, 3, 1, 2) (0...fil_h).each do |i| - i_range = (i...(i + strides[1] * out_h)).step(strides[1]).to_a + i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a (0...fil_w).each do |j| - j_range = (j...(j + strides[0] * out_w)).step(strides[0]).to_a - col[true, true, j, i, true, true] = img[true, true, j_range, i_range] + j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a + col[true, true, i, j, true, true] = img[true, true, i_range, j_range] end end - col.transpose(0, 4, 5, 2, 3, 1).reshape(bsize * out_w * out_h, fil_w * fil_h * ch) + col.transpose(0, 4, 5, 2, 3, 1).reshape(bsize * out_h * out_w, fil_h * fil_w * ch) end - def col2im(col, img_shape, out_w, out_h, fil_w, fil_h, strides) - bsize, img_w, img_h, ch = img_shape - col = col.reshape(bsize, out_w, out_h, fil_w, fil_h, ch).transpose(0, 5, 3, 4, 1, 2) - img = SFloat.zeros(bsize, ch, img_w, img_h) + def col2im(col, img_shape, out_h, out_w, fil_h, fil_w, strides) + bsize, img_h, img_w, ch = img_shape + col = col.reshape(bsize, out_h, out_w, fil_h, fil_w, ch).transpose(0, 5, 3, 4, 1, 2) + img = SFloat.zeros(bsize, ch, img_h, img_w) (0...fil_h).each do |i| - i_range = (i...(i + strides[1] * out_h)).step(strides[1]).to_a + i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a (0...fil_w).each do |j| - j_range = (j...(j + strides[0] * out_w)).step(strides[0]).to_a - img[true, true, j_range, i_range] += col[true, true, j, i, true, true] + j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a + img[true, true, i_range, j_range] += col[true, true, i, j, true, true] end end img.transpose(0, 2, 3, 1) end def padding(img, pad) - bsize, img_w, img_h, ch = img.shape - img2 = SFloat.zeros(bsize, img_w + pad[0], img_h + pad[1], ch) - i_begin = pad[1] / 2 + bsize, img_h, img_w, ch = img.shape + img2 = SFloat.zeros(bsize, img_h + pad[0], img_w + pad[1], ch) + i_begin = pad[0] / 2 i_end = i_begin + img_h - j_begin = pad[0] / 2 + j_begin = pad[1] / 2 j_end = j_begin + img_w - img2[true, j_begin...j_end, i_begin...i_end, true] = img + img2[true, i_begin...i_end, j_begin...j_end, true] = img img2 end def back_padding(img, pad) - i_begin = pad[1] / 2 - i_end = img.shape[2] - (pad[1] / 2.0).round - j_begin = pad[0] / 2 - j_end = img.shape[1] - (pad[0] / 2.0).round - img[true, j_begin...j_end, i_begin...i_end, true] + i_begin = pad[0] / 2 + i_end = img.shape[1] - (pad[0] / 2.0).round + j_begin = pad[1] / 2 + j_end = img.shape[2] - (pad[1] / 2.0).round + img[true, i_begin...i_end, j_begin...j_end, true] end - def out_size(prev_w, prev_h, fil_w, fil_h, strides) - out_w = (prev_w - fil_w) / strides[0] + 1 - out_h = (prev_h - fil_h) / strides[1] + 1 - [out_w, out_h] + def out_size(prev_h, prev_w, fil_h, fil_w, strides) + out_h = (prev_h - fil_h) / strides[0] + 1 + out_w = (prev_w - fil_w) / strides[1] + 1 + [out_h, out_w] end end class Conv2D < HasParamLayer include Initializers - include Convert + include Conv2DModule attr_reader :num_filters attr_reader :filter_size attr_reader :strides attr_reader :weight_decay @@ -255,16 +255,16 @@ weight_decay: hash[:weight_decay]) end def build(model) super - prev_w, prev_h = prev_layer.shape[0..1] - @out_size = out_size(prev_w, prev_h, *@filter_size, @strides) + prev_h, prev_w = prev_layer.shape[0..1] + @out_size = out_size(prev_h, prev_w, *@filter_size, @strides) out_w, out_h = @out_size if @padding - @pad = [prev_w - out_w, prev_h - out_h] - @out_size = [prev_w, prev_h] + @pad = [prev_h - out_h, prev_w - out_w] + @out_size = [prev_h, prev_w] end end def forward(x) x = padding(x, @pad) if @padding @@ -315,11 +315,11 @@ end end class MaxPool2D < Layer - include Convert + include Conv2DModule attr_reader :pool_size attr_reader :strides def self.load_hash(hash) @@ -339,15 +339,15 @@ def build(model) super prev_w, prev_h = prev_layer.shape[0..1] @num_channel = prev_layer.shape[2] - @out_size = out_size(prev_w, prev_h, *@pool_size, @strides) + @out_size = out_size(prev_h, prev_w, *@pool_size, @strides) out_w, out_h = @out_size if @padding - @pad = [prev_w - out_w, prev_h - out_h] - @out_size = [prev_w, prev_h] + @pad = [prev_h - out_h, prev_w - out_w] + @out_size = [prev_h, prev_w] end end def forward(x) x = padding(x, @pad) if @padding @@ -381,12 +381,10 @@ end end class UnPool2D < Layer - include Convert - attr_reader :unpool_size def initialize(unpool_size) super() @unpool_size = unpool_size.is_a?(Integer) ? [unpool_size, unpool_size] : unpool_size @@ -396,28 +394,28 @@ UnPool2D.new(hash[:unpool_size]) end def build(model) super - prev_w, prev_h = prev_layer.shape[0..1] - unpool_w, unpool_h = @unpool_size - out_w = prev_w * unpool_w + prev_h, prev_w = prev_layer.shape[0..1] + unpool_h, unpool_w = @unpool_size out_h = prev_h * unpool_h - @out_size = [out_w, out_h] + out_w = prev_w * unpool_w + @out_size = [out_h, out_w] @num_channel = prev_layer.shape[2] end def forward(x) @x_shape = x.shape - unpool_w, unpool_h = @unpool_size - x2 = SFloat.zeros(x.shape[0], x.shape[1], unpool_w, x.shape[2], unpool_h, @num_channel) + unpool_h, unpool_w = @unpool_size + x2 = SFloat.zeros(x.shape[0], x.shape[1], unpool_h, x.shape[2], unpool_w, @num_channel) x2[true, true, 0, true, 0, true] = x x2.reshape(x.shape[0], *@out_size, x.shape[3]) end def backward(dout) - unpool_w, unpool_h = @unpool_size - dout = dout.reshape(dout.shape[0], @x_shape[0], unpool_w, @x_shape[1], unpool_h, @num_channel) + unpool_h, unpool_w = @unpool_size + dout = dout.reshape(dout.shape[0], @x_shape[0], unpool_h, @x_shape[1], unpool_w, @num_channel) dout[true, true, 0, true, 0, true].clone end def shape [@out_width, @out_height, @num_channel]