Sha256: edcc4c92e4fe10f3be8716149c5db19e166685c4201dea15ba554cba0720456c
Contents?: true
Size: 1.5 KB
Versions: 1
Compression:
Stored size: 1.5 KB
Contents
% ctype = dtype_to_c_type(dtype) __kernel void conv2d(const int height, const int width, __global const <%= ctype %> *images, __global const <%= ctype %> *filter, __global <%= ctype %> *output) { // Get the index of the current element to be processed const int batch_index = get_global_id(0); const int h_index = get_global_id(1); const int w_index = get_global_id(2); const int h_index_with_stride = h_index * <%= stride[0] %>; const int w_index_with_stride = w_index * <%= stride[1] %>; const int image_index = batch_index * height * width * <%= ch %>; const int image_row_width = width * <%= ch %>; for (int out_channel_index = 0; out_channel_index < <%= out_ch %>; out_channel_index++) { <%= ctype %> sum = 0; for (int channel_index = 0; channel_index < <%= ch %>; channel_index++) { for(int y = 0; y < <%= fh %>; y++) { for (int x = 0; x < <%= fw %>; x++) { if ( (h_index_with_stride + y) < height && (w_index_with_stride + x) < width) { sum += images[image_index + (h_index_with_stride + y)*image_row_width + (w_index_with_stride + x)*<%= ch %> + channel_index] * filter[y*<%= fw * ch * out_ch %> + x*<%= ch * out_ch %> + (channel_index*<%= out_ch %>) + out_channel_index]; } } } } output[batch_index * (height/<%= stride[0] %>) * (width/<%= stride[1] %>) * <%= out_ch %> + h_index * (width/<%= stride[1] %>) * <%= out_ch %> + w_index * <%= out_ch %> + out_channel_index ] = sum; } }
Version data entries
1 entries across 1 versions & 1 rubygems
Version | Path |
---|---|
tensor_stream-opencl-0.2.3 | lib/tensor_stream/opencl/kernels/conv2d.cl |