Sha256: d0afb2fd6a14e348db2c6c9add970babd9282fea01cd018b77faeadd36540af4

Contents?: true

Size: 960 Bytes

Versions: 20

Compression:

Stored size: 960 Bytes

Contents

// First naive implementation
% c_dtype = dtype_to_c_type(dtype)
__kernel void softmax_cross_grad_<%= dtype %>(const int N,
                      const __global <%= c_dtype %>* A,
                      const __global <%= c_dtype %>* L,
                      const __global <%= c_dtype %>* G,
                      __global <%= c_dtype %>* C) {

    // Get the index of the current element to be processed
    const int globalRow = get_global_id(0); // Row ID of C (0..M)

    // Compute a single element (loop over K)
    <%= c_dtype %> acc = 0.0f;
    <%= c_dtype %> max = <%= min_value_for(dtype) %>;

    for (int k=0; k<N; k++) {
      max = A[globalRow*N + k] > max ? A[globalRow*N + k] : max;
    }

    for (int k=0; k<N; k++) {
      acc += exp(A[globalRow*N + k] - max);
    }

    // Store the result
    for (int k=0; k < N; k++) {
      C[globalRow*N + k] = ((exp(A[globalRow*N + k] - max)/acc)  *  G[globalRow*N + k] - L[globalRow*N + k]);
    }
}

Version data entries

20 entries across 20 versions & 2 rubygems

Version Path
tensor_stream-opencl-0.3.2 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.3.1 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.3.0 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.2.10 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.2.9 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.2.8 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.2.6 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.2.5 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.2.4 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.2.3 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.2.2 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.2.1 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.2.0 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.1.3 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.1.2 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.1.1 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-opencl-0.1.0 lib/tensor_stream/opencl/kernels/softmax_cross_grad.cl
tensor_stream-0.8.1 lib/tensor_stream/evaluator/opencl/kernels/softmax_cross_grad.cl
tensor_stream-0.8.0 lib/tensor_stream/evaluator/opencl/kernels/softmax_cross_grad.cl
tensor_stream-0.7.0 lib/tensor_stream/evaluator/opencl/kernels/softmax_cross_grad.cl