#ifdef CUDNN_FOUND <% cudnn_dtype = case type_name when 'sfloat' 'CUDNN_DATA_FLOAT' when 'dfloat' 'CUDNN_DATA_DOUBLE' else # CUDNN_DATA_HALF raise 'not supported' end %> // cover_all=true is not supported with CUDNN // x.pooling_forward(mode, kernel_size, stride: 1, pad: 0, y: nil) //CUDNN_POOLING_MAX //CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING //CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING //CUDNN_POOLING_MAX_DETERMINISTIC static VALUE <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self) { cudnnDataType_t cudnn_dtype = <%= cudnn_dtype %>; cudnnStatus_t status = 0; cudnnHandle_t handle = 0; dtype alpha = 1; dtype beta = 0; VALUE x=self, mode, kernel_size, stride, pad, y; VALUE kw_hash = Qnil; ID kw_table[4] = {rb_intern("stride"), rb_intern("pad"), rb_intern("y")}; VALUE opts[4] = {Qundef, Qundef, Qundef}; size_t ndim; cumo_narray_t *nx; size_t *x_shape; VALUE x_cont; cudnnTensorDescriptor_t x_desc = 0; cudnnTensorDescriptor_t y_desc = 0; cudnnPoolingDescriptor_t pool_desc = 0; char *x_cont_ptr, *y_ptr; cudnnPoolingMode_t int_mode; int int_kernel_size[CUMO_NA_MAX_DIMENSION]; int int_stride[CUMO_NA_MAX_DIMENSION]; int int_pad[CUMO_NA_MAX_DIMENSION]; rb_scan_args(argc, argv, "2:", &mode, &kernel_size, &kw_hash); rb_get_kwargs(kw_hash, kw_table, 0, 3, opts); stride = cumo_cuda_cudnn_option_value(opts[0], Qnil); pad = cumo_cuda_cudnn_option_value(opts[1], Qnil); y = cumo_cuda_cudnn_option_value(opts[2], Qnil); CumoGetNArray(x, nx); CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(x, cT); if (nx->ndim - 2 < 2) { rb_raise(cumo_na_eShapeError, "CUDNN pooling requires number of spatial " "dimensions to be greater than or equal to 2, but %d", nx->ndim - 2); } ndim = nx->ndim - 2; // Number of spatial dimensions // required parameter int_mode = (cudnnPoolingMode_t)NUM2INT(mode); cumo_cuda_cudnn_get_int_ary(int_kernel_size, kernel_size, ndim, 0); // default to kernel_size if (stride == Qnil) { memcpy(int_stride, int_kernel_size, sizeof(int) * ndim); } else { cumo_cuda_cudnn_get_int_ary(int_stride, stride, ndim, 0); } // default to 0 cumo_cuda_cudnn_get_int_ary(int_pad, pad, ndim, 0); x_shape = nx->shape; if (y != Qnil) { CUMO_CUDA_CUDNN_CHECK_NARRAY_TYPE(y, cT); } else { size_t *y_shape = ALLOCA_N(size_t, ndim + 2); // out_shape = (batch_size, num_channels, out_1, out_2, ..., out_N) y_shape[0] = x_shape[0]; y_shape[1] = x_shape[1]; for (size_t i = 0; i < ndim; ++i) { y_shape[i + 2] = cumo_cuda_cudnn_GetConvOutDim( x_shape[i + 2], int_kernel_size[i], int_stride[i], int_pad[i]); } y = cumo_na_new(cT, ndim + 2, y_shape); } x_cont = cumo_na_as_contiguous_array(x); x_cont_ptr = cumo_na_get_offset_pointer_for_read(x_cont); y_ptr = cumo_na_get_offset_pointer_for_write(y); status = cumo_cuda_cudnn_CreateTensorDescriptor(&x_desc, x_cont, cudnn_dtype); if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR; status = cumo_cuda_cudnn_CreateTensorDescriptor(&y_desc, y, cudnn_dtype); if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR; status = cumo_cuda_cudnn_CreatePoolingDescriptor(&pool_desc, int_mode, ndim, int_kernel_size, int_stride, int_pad); if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR; handle = cumo_cuda_cudnn_handle(); status = cudnnPoolingForward( handle, pool_desc, (void*)&alpha, x_desc, (void*)x_cont_ptr, (void*)&beta, y_desc, (void*)y_ptr); if (status != CUDNN_STATUS_SUCCESS) goto POOLING_ERROR; POOLING_ERROR: if (x_desc) cudnnDestroyTensorDescriptor(x_desc); if (y_desc) cudnnDestroyTensorDescriptor(y_desc); if (pool_desc) cudnnDestroyPoolingDescriptor(pool_desc); cumo_cuda_cudnn_check_status(status); return y; } #else // CUDNN_FOUND VALUE cumo_cuda_eCUDNNError; static VALUE <%=c_func(-1)%>(int argc, VALUE argv[], VALUE self) { rb_raise(cumo_cuda_eCUDNNError, "cuDNN is not available"); } #endif // CUDNN_FOUND