% source_ctype = dtype_to_c_type(source_dt) % target_ctype = dtype_to_c_type(target_dt) __kernel void cast(const int M, const int N, __global const <%= source_ctype %> *A, __global <%= target_ctype %> *C) { // Get the index of the current element to be processed const int globalRow = get_global_id(0); // Row ID of C (0..M) const int globalCol = get_global_id(1); // Col ID of C (0..N) C[globalRow * N + globalCol] = A[globalRow * N + globalCol]; }