/* STARTDEF void bones_prekernel__0(, ); ENDDEF */ // Start of the kernel __global__ void bones_kernel__0(, ) { const int bones_global_id = blockIdx.x*blockDim.x + threadIdx.x; int bones_local_id = threadIdx.x; if (bones_global_id < ) { // Calculate the local and global ID(s) based on the thread id int bones_local_id_0 = bones_local_id; // Load the input data into local memory __shared__ bones_local_memory_[512+]; bones_local_id_0 = bones_local_id_0-(); bones_local_memory_[bones_local_id_0] = [bones_global_id_0]; // Load the left border into local memory if (threadIdx.x < -()) { bones_local_memory_[bones_local_id_0+] = [bones_global_id_0+]; } // Load the right border into local memory if ((threadIdx.x >= 512-) || (bones_global_id_0 >= -)) { bones_local_memory_[bones_local_id_0+] = [bones_global_id_0+]; } // Synchronize all the threads in a threadblock __syncthreads(); // Perform the main computation } } // Function to start the kernel extern "C" void bones_prekernel__0(, ) { dim3 bones_threads(512); dim3 bones_grid(DIV_CEIL(,512)); bones_kernel__0<<< bones_grid, bones_threads >>>(, ); }