Sha256: a441c31bf15d65ca88a71ad6706d6a939e31cff7b6eb52ff2e05151dad416057

Contents?: true

Size: 1.01 KB

Versions: 16

Compression:

Stored size: 1.01 KB

Contents

// First naive implementation
% c_dtype = dtype_to_c_type(dtype)
__kernel void gemm_<%= dtype %>(const int M, const int N, const int K,
                      const int A_transpose,
                      const int B_transpose,
                      const __global <%= c_dtype %>* A,
                      const __global <%= c_dtype %>* B,
                      __global <%= c_dtype %>* C) {

    // Get the index of the current element to be processed
    const int globalRow = get_global_id(0); // Row ID of C (0..M)
    const int globalCol = get_global_id(1); // Col ID of C (0..N)

    // Compute a single element (loop over K)
    <%= c_dtype %> acc = 0.0f;
    for (int k=0; k<K; k++) {
        int a_index = globalRow*K + k;
        int b_index = k*N + globalCol;

        if (A_transpose) {
            a_index = M*k + globalRow;
        }

        if (B_transpose) {
            b_index = globalCol*K + k;
        }
        acc += A[a_index] * B[b_index];
    }

    // Store the result
    C[globalRow*N + globalCol] = acc;
}

Version data entries

16 entries across 16 versions & 2 rubygems

Version Path
tensor_stream-opencl-0.2.2 lib/tensor_stream/opencl/kernels/gemm.cl
tensor_stream-opencl-0.2.1 lib/tensor_stream/opencl/kernels/gemm.cl
tensor_stream-opencl-0.2.0 lib/tensor_stream/opencl/kernels/gemm.cl
tensor_stream-opencl-0.1.3 lib/tensor_stream/opencl/kernels/gemm.cl
tensor_stream-opencl-0.1.2 lib/tensor_stream/opencl/kernels/gemm.cl
tensor_stream-opencl-0.1.1 lib/tensor_stream/opencl/kernels/gemm.cl
tensor_stream-opencl-0.1.0 lib/tensor_stream/opencl/kernels/gemm.cl
tensor_stream-0.8.1 lib/tensor_stream/evaluator/opencl/kernels/gemm.cl
tensor_stream-0.8.0 lib/tensor_stream/evaluator/opencl/kernels/gemm.cl
tensor_stream-0.7.0 lib/tensor_stream/evaluator/opencl/kernels/gemm.cl
tensor_stream-0.6.1 lib/tensor_stream/evaluator/opencl/kernels/gemm.cl
tensor_stream-0.6.0 lib/tensor_stream/evaluator/opencl/kernels/gemm.cl
tensor_stream-0.5.1 lib/tensor_stream/evaluator/opencl/kernels/gemm.cl
tensor_stream-0.5.0 lib/tensor_stream/evaluator/opencl/kernels/gemm.cl
tensor_stream-0.4.1 lib/tensor_stream/evaluator/kernels/gemm.cl
tensor_stream-0.4.0 lib/tensor_stream/evaluator/kernels/gemm.cl