Sha256: 3650ef2c78c85db2fd43a651e8ffba012b7429ad3924a3f1ee8f6d51f713c534

Contents?: true

Size: 1.88 KB

Versions: 10

Compression:

Stored size: 1.88 KB

Contents

 // same dimension add floating point op
% c_dtype = dtype_to_c_type(dtype)
 __kernel void min_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global const <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
    // Get the index of the current element to be processed
    const int globalRow = get_global_id(0); // Row ID of C (0..M)
    const int globalCol = get_global_id(1); // Col ID of C (0..N)

    C[globalRow * N + globalCol] = min((<%= c_dtype %>)A[globalRow * N + globalCol],(<%= c_dtype %>) B[globalRow * N + globalCol]);
}

 // 1D + Scalar floating point add op
 __kernel void min_c_<%= dtype %>_<%= dtype %>(const int M, const int N, const int switch_op, __global const <%= c_dtype %> *A, __global const <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
    // Get the index of the current element to be processed
    const int globalRow = get_global_id(0); // Row ID of C (0..M)
    const int globalCol = get_global_id(1); // Col ID of C (0..N)

    C[globalRow * N + globalCol] = min((<%= c_dtype %>)A[globalRow * N + globalCol], (<%= c_dtype %>) B[0]);
}

 // 1D + Scalar floating point add op broadcast
 __kernel void min_b_<%= dtype %>_<%= dtype %>(const int M, const int N, const int M2, const int N2, const int switch_op, __global const <%= c_dtype %> *A, __global const <%= c_dtype %> *B, __global <%= c_dtype %> *C) {
    // Get the index of the current element to be processed
    const int globalRow = get_global_id(0); // Row ID of C (0..M)
    const int globalCol = get_global_id(1); // Col ID of C (0..N)

    int b_m_index = globalRow;
    int b_n_index = globalCol;

    if ( b_m_index >= M2) {
      b_m_index = b_m_index % M2;
    };

    if (b_n_index >= N2) {
      b_n_index = b_n_index % N2;
    }

    C[globalRow * N + globalCol] = min((<%= c_dtype %>)A[globalRow * N + globalCol], (<%= c_dtype %>)B[b_m_index * N2 + b_n_index]);
}

Version data entries

10 entries across 10 versions & 1 rubygems

Version Path
tensor_stream-opencl-0.3.2 lib/tensor_stream/opencl/kernels/min.cl
tensor_stream-opencl-0.3.1 lib/tensor_stream/opencl/kernels/min.cl
tensor_stream-opencl-0.3.0 lib/tensor_stream/opencl/kernels/min.cl
tensor_stream-opencl-0.2.10 lib/tensor_stream/opencl/kernels/min.cl
tensor_stream-opencl-0.2.9 lib/tensor_stream/opencl/kernels/min.cl
tensor_stream-opencl-0.2.8 lib/tensor_stream/opencl/kernels/min.cl
tensor_stream-opencl-0.2.6 lib/tensor_stream/opencl/kernels/min.cl
tensor_stream-opencl-0.2.5 lib/tensor_stream/opencl/kernels/min.cl
tensor_stream-opencl-0.2.4 lib/tensor_stream/opencl/kernels/min.cl
tensor_stream-opencl-0.2.3 lib/tensor_stream/opencl/kernels/min.cl