Sha256: f3b548575ee3e8c5998068606ba4d1971c17ad05b2d20c8e06648f2c5ea4a04d

Contents?: true

Size: 787 Bytes

Versions: 10

Compression:

Stored size: 787 Bytes

Contents

% c_dtype = dtype_to_c_type(dtype)
__kernel void mean_<%= dtype %>(__global const <%= c_dtype %> *A, __global <%= c_dtype %> *C) {
    // Get the index of the current element to be processed
    const int id = get_global_id(0);
    int offset = (id + <%= index %>) * <%= w %>;
    <%= c_dtype %> sum = 0;
    <% if n > 4 %>
      for(int i = 0; i < <%= n/4 %> ; i++) {
        <% sums = 4.times.map do |i|
          "A[offset + #{i}]"
        end %>
        sum += <%= sums.join(' + ') %>;
        offset += 4;
      }
      <% if n%4!=0 %>
        <% (n % 4).times do |i| %>
          sum += A[offset + <%= i %>];
        <% end %>
      <% end %>
    <% else %>
      <% n.times do |i| %>
        sum += A[offset + <%= i %>];
      <% end %>
    <% end %>
    C[id] = sum / <%= n %>;
}

Version data entries

10 entries across 10 versions & 1 rubygems

Version Path
tensor_stream-opencl-0.3.2 lib/tensor_stream/opencl/kernels/mean.cl
tensor_stream-opencl-0.3.1 lib/tensor_stream/opencl/kernels/mean.cl
tensor_stream-opencl-0.3.0 lib/tensor_stream/opencl/kernels/mean.cl
tensor_stream-opencl-0.2.10 lib/tensor_stream/opencl/kernels/mean.cl
tensor_stream-opencl-0.2.9 lib/tensor_stream/opencl/kernels/mean.cl
tensor_stream-opencl-0.2.8 lib/tensor_stream/opencl/kernels/mean.cl
tensor_stream-opencl-0.2.6 lib/tensor_stream/opencl/kernels/mean.cl
tensor_stream-opencl-0.2.5 lib/tensor_stream/opencl/kernels/mean.cl
tensor_stream-opencl-0.2.4 lib/tensor_stream/opencl/kernels/mean.cl
tensor_stream-opencl-0.2.3 lib/tensor_stream/opencl/kernels/mean.cl