ext/cumo/narray/gen/tmpl_bit/bit_reduce.c in cumo-0.1.0 vs ext/cumo/narray/gen/tmpl_bit/bit_reduce.c in cumo-0.1.1
- old
+ new
@@ -1,93 +1,93 @@
static void
-<%=c_iter%>(na_loop_t *const lp)
+<%=c_iter%>(cumo_na_loop_t *const lp)
{
size_t i;
- BIT_DIGIT *a1, *a2;
+ CUMO_BIT_DIGIT *a1, *a2;
size_t p1, p2;
ssize_t s1, s2;
size_t *idx1, *idx2;
- BIT_DIGIT x=0, y=0;
+ CUMO_BIT_DIGIT x=0, y=0;
// TODO(sonots): CUDA kernelize
- SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
+ CUMO_SHOW_SYNCHRONIZE_FIXME_WARNING_ONCE("<%=name%>", "<%=type_name%>");
cumo_cuda_runtime_check_status(cudaDeviceSynchronize());
- INIT_COUNTER(lp, i);
- INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
- INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2);
+ CUMO_INIT_COUNTER(lp, i);
+ CUMO_INIT_PTR_BIT_IDX(lp, 0, a1, p1, s1, idx1);
+ CUMO_INIT_PTR_BIT_IDX(lp, 1, a2, p2, s2, idx2);
if (idx2) {
if (idx1) {
for (; i--;) {
- LOAD_BIT(a2, p2+*idx2, y);
+ CUMO_LOAD_BIT(a2, p2+*idx2, y);
if (y == <%=init_bit%>) {
- LOAD_BIT(a1, p1+*idx1, x);
+ CUMO_LOAD_BIT(a1, p1+*idx1, x);
if (x != <%=init_bit%>) {
- STORE_BIT(a2, p2+*idx2, x);
+ CUMO_STORE_BIT(a2, p2+*idx2, x);
}
}
idx1++;
idx2++;
}
} else {
for (; i--;) {
- LOAD_BIT(a2, p2+*idx2, y);
+ CUMO_LOAD_BIT(a2, p2+*idx2, y);
if (y == <%=init_bit%>) {
- LOAD_BIT(a1, p1, x);
+ CUMO_LOAD_BIT(a1, p1, x);
if (x != <%=init_bit%>) {
- STORE_BIT(a2, p2+*idx2, x);
+ CUMO_STORE_BIT(a2, p2+*idx2, x);
}
}
p1 += s1;
idx2++;
}
}
} else if (s2) {
if (idx1) {
for (; i--;) {
- LOAD_BIT(a2, p2, y);
+ CUMO_LOAD_BIT(a2, p2, y);
if (y == <%=init_bit%>) {
- LOAD_BIT(a1, p1+*idx1, x);
+ CUMO_LOAD_BIT(a1, p1+*idx1, x);
if (x != <%=init_bit%>) {
- STORE_BIT(a2, p2, x);
+ CUMO_STORE_BIT(a2, p2, x);
}
}
idx1++;
p2 += s2;
}
} else {
for (; i--;) {
- LOAD_BIT(a2, p2, y);
+ CUMO_LOAD_BIT(a2, p2, y);
if (y == <%=init_bit%>) {
- LOAD_BIT(a1, p1, x);
+ CUMO_LOAD_BIT(a1, p1, x);
if (x != <%=init_bit%>) {
- STORE_BIT(a2, p2, x);
+ CUMO_STORE_BIT(a2, p2, x);
}
}
p1 += s1;
p2 += s2;
}
}
} else {
- LOAD_BIT(a2, p2, x);
+ CUMO_LOAD_BIT(a2, p2, x);
if (x != <%=init_bit%>) {
return;
}
if (idx1) {
for (; i--;) {
- LOAD_BIT(a1, p1+*idx1, y);
+ CUMO_LOAD_BIT(a1, p1+*idx1, y);
if (y != <%=init_bit%>) {
- STORE_BIT(a2, p2, y);
+ CUMO_STORE_BIT(a2, p2, y);
return;
}
idx1++;
}
} else {
for (; i--;) {
- LOAD_BIT(a1, p1, y);
+ CUMO_LOAD_BIT(a1, p1, y);
if (y != <%=init_bit%>) {
- STORE_BIT(a2, p2, y);
+ CUMO_STORE_BIT(a2, p2, y);
return;
}
p1 += s1;
}
}
@@ -109,15 +109,15 @@
*/
static VALUE
<%=c_func(-1)%>(int argc, VALUE *argv, VALUE self)
{
VALUE v, reduce;
- ndfunc_arg_in_t ain[3] = {{cT,0},{sym_reduce,0},{sym_init,0}};
- ndfunc_arg_out_t aout[1] = {{cumo_cBit,0}};
- ndfunc_t ndf = {<%=c_iter%>, FULL_LOOP_NIP, 3,1, ain,aout};
+ cumo_ndfunc_arg_in_t ain[3] = {{cT,0},{cumo_sym_reduce,0},{cumo_sym_init,0}};
+ cumo_ndfunc_arg_out_t aout[1] = {{cumo_cBit,0}};
+ cumo_ndfunc_t ndf = {<%=c_iter%>, CUMO_FULL_LOOP_NIP, 3,1, ain,aout};
- reduce = na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
- v = na_ndloop(&ndf, 3, self, reduce, INT2FIX(<%=init_bit%>));
+ reduce = cumo_na_reduce_dimension(argc, argv, 1, &self, &ndf, 0);
+ v = cumo_na_ndloop(&ndf, 3, self, reduce, INT2FIX(<%=init_bit%>));
if (argc > 0) {
return v;
}
v = <%=find_tmpl("extract").c_func%>(v);
switch (v) {