lib/dnn/core/layers.rb in ruby-dnn-0.1.8 vs lib/dnn/core/layers.rb in ruby-dnn-0.2.0
- old
+ new
@@ -3,15 +3,24 @@
#Super class of all optimizer classes.
class Layer
include Numo
+ def initialize
+ @builded = false
+ end
+
#Initialize layer when model is compiled.
- def init(model)
+ def build(model)
+ @builded = true
@model = model
end
+ def builded?
+ @builded
+ end
+
#Forward propagation.
def forward() end
#Backward propagation.
def backward() end
@@ -40,11 +49,11 @@
def initialize
@params = {}
@grads = {}
end
- def init(model)
+ def build(model)
super
init_params
end
#Update the parameters.
@@ -151,123 +160,138 @@
#private module
module Convert
private
- def im2col(img, out_h, out_w, fh, fw, strides)
- bs, fn = img.shape[0..1]
- col = SFloat.zeros(bs, fn, fh, fw, out_h, out_w)
- (0...fh).each do |i|
- i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
- (0...fw).each do |j|
- j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
- col[true, true, i, j, true, true] = img[true, true, i_range, j_range]
+ def im2col(img, out_w, out_h, fil_w, fil_h, strides)
+ bsize = img.shape[0]
+ ch = img.shape[3]
+ col = SFloat.zeros(bsize, ch, fil_w, fil_h, out_w, out_h)
+ img = img.transpose(0, 3, 1, 2)
+ (0...fil_h).each do |i|
+ i_range = (i...(i + strides[1] * out_h)).step(strides[1]).to_a
+ (0...fil_w).each do |j|
+ j_range = (j...(j + strides[0] * out_w)).step(strides[0]).to_a
+ col[true, true, j, i, true, true] = img[true, true, j_range, i_range]
end
end
- col.transpose(0, 4, 5, 1, 2, 3).reshape(bs * out_h * out_w, fn * fh * fw)
+ col.transpose(0, 4, 5, 2, 3, 1).reshape(bsize * out_w * out_h, fil_w * fil_h * ch)
end
-
- def col2im(col, img_shape, out_h, out_w, fh, fw, strides)
- bs, fn, ih, iw = img_shape
- col = col.reshape(bs, out_h, out_w, fn, fh, fw).transpose(0, 3, 4, 5, 1, 2)
- img = SFloat.zeros(bs, fn, ih, iw)
- (0...fh).each do |i|
- i_range = (i...(i + strides[0] * out_h)).step(strides[0]).to_a
- (0...fw).each do |j|
- j_range = (j...(j + strides[1] * out_w)).step(strides[1]).to_a
- img[true, true, i_range, j_range] += col[true, true, i, j, true, true]
+
+ def col2im(col, img_shape, out_w, out_h, fil_w, fil_h, strides)
+ bsize, img_w, img_h, ch = img_shape
+ col = col.reshape(bsize, out_w, out_h, fil_w, fil_h, ch).transpose(0, 5, 3, 4, 1, 2)
+ img = SFloat.zeros(bsize, ch, img_w, img_h)
+ (0...fil_h).each do |i|
+ i_range = (i...(i + strides[1] * out_h)).step(strides[1]).to_a
+ (0...fil_w).each do |j|
+ j_range = (j...(j + strides[0] * out_w)).step(strides[0]).to_a
+ img[true, true, j_range, i_range] += col[true, true, j, i, true, true]
end
end
- img
+ img.transpose(0, 2, 3, 1)
end
def padding(img, pad)
- bs, c, ih, iw = img.shape
- ih2 = ih + pad * 2
- iw2 = iw + pad * 2
- img2 = SFloat.zeros(bs, c, ih2, iw2)
- img2[true, true, pad...(ih + pad), pad...(iw + pad)] = img
+ bsize, img_w, img_h, ch = img.shape
+ img2 = SFloat.zeros(bsize, img_w + pad[0], img_h + pad[1], ch)
+ i_begin = pad[1] / 2
+ i_end = i_begin + img_h
+ j_begin = pad[0] / 2
+ j_end = j_begin + img_w
+ img2[true, j_begin...j_end, i_begin...i_end, true] = img
img2
end
def back_padding(img, pad)
- i_end = img.shape[2] - pad
- j_end = img.shape[3] - pad
- img[true, true, pad...i_end, pad...j_end]
+ i_begin = pad[1] / 2
+ i_end = img.shape[2] - (pad[1] / 2.0).round
+ j_begin = pad[0] / 2
+ j_end = img.shape[1] - (pad[0] / 2.0).round
+ img[true, j_begin...j_end, i_begin...i_end, true]
end
+
+ def out_size(prev_w, prev_h, fil_w, fil_h, strides)
+ out_w = (prev_w - fil_w) / strides[1] + 1
+ out_h = (prev_h - fil_h) / strides[0] + 1
+ [out_w, out_h]
+ end
end
class Conv2D < HasParamLayer
include Initializers
include Convert
- def initialize(num_filters, filter_height, filter_width,
+ def initialize(num_filters, filter_width, filter_height,
weight_initializer: nil,
bias_initializer: nil,
strides: [1, 1],
- padding: 0,
+ padding: false,
weight_decay: 0)
super()
@num_filters = num_filters
- @filter_height = filter_height
@filter_width = filter_width
+ @filter_height = filter_height
@weight_initializer = (weight_initializer || RandomNormal.new)
@bias_initializer = (bias_initializer || Zeros.new)
@strides = strides
@padding = padding
@weight_decay = weight_decay
end
def self.load_hash(hash)
- Conv2D.new(hash[:num_filters], hash[:filter_height], hash[:filter_width],
+ Conv2D.new(hash[:num_filters], hash[:filter_width], hash[:filter_height],
weight_initializer: Util.load_hash(hash[:weight_initializer]),
bias_initializer: Util.load_hash(hash[:bias_initializer]),
strides: hash[:strides],
padding: hash[:padding],
weight_decay: hash[:weight_decay])
end
-
- def init(model)
+
+ def build(model)
super
- prev_height, prev_width = prev_layer.shape[1], prev_layer.shape[2]
- @out_height = (prev_height + @padding * 2 - @filter_height) / @strides[0] + 1
- @out_width = (prev_width + @padding * 2 - @filter_width) / @strides[1] + 1
+ prev_width, prev_height = prev_layer.shape[0..1]
+ @out_width, @out_height = out_size(prev_width, prev_height, @filter_width, @filter_height, @strides)
+ if @padding
+ @pad = [prev_width - @out_width, prev_height - @out_height]
+ @out_width = prev_width
+ @out_height = prev_height
+ end
end
-
+
def forward(x)
- x = padding(x, 2) if @padding > 0
+ x = padding(x, @pad) if @padding
@x_shape = x.shape
- @col = im2col(x, @out_height, @out_width, @filter_height, @filter_width, @strides)
+ @col = im2col(x, @out_width, @out_height, @filter_width, @filter_height, @strides)
out = @col.dot(@params[:weight])
- out.reshape(@model.batch_size, @out_height, @out_width, out.shape[3]).transpose(0, 3, 1, 2)
+ out.reshape(x.shape[0], @out_width, @out_height, out.shape[3])
end
-
+
def backward(dout)
- dout = dout.transpose(0, 2, 3, 1)
dout = dout.reshape(dout.shape[0..2].reduce(:*), dout.shape[3])
@grads[:weight] = @col.transpose.dot(dout)
if @weight_decay > 0
dridge = @weight_decay * @params[:weight]
@grads[:weight] += dridge
end
@grads[:bias] = dout.sum(0)
dcol = dout.dot(@params[:weight].transpose)
- dx = col2im(dcol, @x_shape, @out_height, @out_width, @filter_height, @filter_width, @strides)
- @padding ? back_padding(dx, @padding) : dx
+ dx = col2im(dcol, @x_shape, @out_width, @out_height, @filter_width, @filter_height, @strides)
+ @padding ? back_padding(dx, @pad) : dx
end
-
+
def shape
- [@num_filters, @out_height, @out_width]
+ [@out_width, @out_height, @num_filters]
end
def to_hash
{
name: self.class.name,
num_filters: @num_filters,
- filter_height: @filter_height,
filter_width: @filter_width,
+ filter_height: @filter_height,
weight_initializer: @weight_initializer.to_hash,
bias_initializer: @bias_initializer.to_hash,
strides: @strides,
padding: @padding,
weight_decay: @weight_decay,
@@ -275,64 +299,67 @@
end
private
def init_params
- num_prev_filter = prev_layer.shape[0]
- @params[:weight] = SFloat.new(num_prev_filter * @filter_height * @filter_height, @num_filters)
+ num_prev_filter = prev_layer.shape[2]
+ @params[:weight] = SFloat.new(num_prev_filter * @filter_width * @filter_height, @num_filters)
@params[:bias] = SFloat.new(@num_filters)
@weight_initializer.init_param(self, :weight)
@bias_initializer.init_param(self, :bias)
end
end
class MaxPool2D < Layer
include Convert
- def initialize(pool_height, pool_width, strides: nil, padding: 0)
- @pool_height = pool_height
+ def initialize(pool_width, pool_height, strides: nil, padding: false)
@pool_width = pool_width
- @strides = strides ? strides : [@pool_height, @pool_width]
+ @pool_height = pool_height
+ @strides = strides ? strides : [@pool_width, @pool_height]
@padding = padding
- end
-
- def init(model)
+ end
+
+ def build(model)
super
- prev_height, prev_width = prev_layer.shape[1], prev_layer.shape[2]
- @num_channel = prev_layer.shape[0]
- @out_height = (prev_height + @padding * 2 - @pool_height) / @strides[0] + 1
- @out_width = (prev_width + @padding * 2 - @pool_width) / @strides[1] + 1
+ prev_width, prev_height = prev_layer.shape[0..1]
+ @num_channel = prev_layer.shape[2]
+ @out_width, @out_height = out_size(prev_width, prev_height, @pool_width, @pool_height, @strides)
+ if @padding
+ @pad = [prev_width - @out_width, prev_height - @out_height]
+ @out_width = prev_width
+ @out_height = prev_height
+ end
end
-
+
def forward(x)
- x = padding(x, 2) if @padding > 0
+ x = padding(x, @pad) if @padding
@x_shape = x.shape
- col = im2col(x, @out_height, @out_width, @pool_height, @pool_width, @strides)
- col = col.reshape(x.shape[0] * @out_height * @out_width * x.shape[1], @pool_height * @pool_width)
+ col = im2col(x, @out_width, @out_height, @pool_width, @pool_height, @strides)
+ col = col.reshape(x.shape[0] * @out_width * @out_height * x.shape[3], @pool_width * @pool_height)
@max_index = col.max_index(1)
- col.max(1).reshape(x.shape[0], @out_height, @out_width, x.shape[1]).transpose(0, 3, 1, 2)
+ col.max(1).reshape(x.shape[0], @out_width, @out_height, x.shape[3])#.transpose(0, 3, 1, 2)
end
-
+
def backward(dout)
- dout = dout.transpose(0, 2, 3, 1)
- pool_size = @pool_height * @pool_width
+ pool_size = @pool_width * @pool_height
dmax = SFloat.zeros(dout.size * pool_size)
dmax[@max_index] = dout.flatten
dcol = dmax.reshape(dout.shape[0..2].reduce(:*), dout.shape[3] * pool_size)
- dx = col2im(dcol, @x_shape, @out_height, @out_width, @pool_height, @pool_width, @strides)
- @padding ? back_padding(dx, @padding) : dx
+ dx = col2im(dcol, @x_shape, @out_width, @out_height, @pool_width, @pool_height, @strides)
+ @padding ? back_padding(dx, @pad) : dx
end
-
+
def shape
- [@num_channel, @out_height, @out_width]
+ [@out_width, @out_height, @num_channel]
end
def to_hash
{
name: self.class.name,
- pool_height: @pool_height,
pool_width: @pool_width,
+ pool_height: @pool_height,
strides: @strides,
padding: @padding,
}
end
end