ext/numo/narray/ndloop.c in numo-narray-0.9.0.7 vs ext/numo/narray/ndloop.c in numo-narray-0.9.0.8
- old
+ new
@@ -41,11 +41,11 @@
typedef struct NA_MD_LOOP {
int narg;
int nin;
int ndim; // n of total dimention
unsigned int copy_flag; // set i-th bit if i-th arg is cast
- size_t *n_ptr; // memory for n
+ void *ptr; // memory for n
na_loop_iter_t *iter_ptr; // memory for iter
size_t *n; // n of elements for each dim
na_loop_t user; // loop in user function
na_loop_xargs_t *xargs; // extra data for each arg
int writeback; // write back result to i-th arg
@@ -335,10 +335,13 @@
{
int i,j;
int narg;
int max_nd;
+ char *buf;
+ size_t n1, n2, n3, n4, n5;
+
long args_len;
na_loop_iter_t *iter;
int trans_dim;
@@ -362,36 +365,41 @@
lp->user.err_type = Qfalse;
lp->loop_opt = Qnil;
lp->writeback = -1;
lp->init_aidx = -1;
- lp->n = NULL;
- lp->n_ptr = NULL;
- lp->xargs = NULL;
- lp->user.args = NULL;
+ lp->ptr = NULL;
lp->user.n = NULL;
- lp->iter_ptr = NULL;
- lp->trans_map = NULL;
ndloop_find_max_dimension(lp, nf, args);
narg = lp->nin + nf->nout;
max_nd = lp->ndim + lp->user.ndim;
- lp->n = lp->n_ptr = ALLOC_N(size_t, max_nd+1);
- lp->xargs = ALLOC_N(na_loop_xargs_t, narg);
- lp->user.args = ALLOC_N(na_loop_args_t, narg);
- iter = ALLOC_N(na_loop_iter_t, narg*(max_nd+1));
- lp->iter_ptr = iter;
+ n1 = sizeof(size_t)*(max_nd+1);
+ n2 = sizeof(na_loop_xargs_t)*narg;
+ n2 = ((n2-1)/8+1)*8;
+ n3 = sizeof(na_loop_args_t)*narg;
+ n3 = ((n3-1)/8+1)*8;
+ n4 = sizeof(na_loop_iter_t)*narg*(max_nd+1);
+ n4 = ((n4-1)/8+1)*8;
+ n5 = sizeof(int)*(max_nd+1);
+ lp->ptr = buf = (char*)xmalloc(n1+n2+n3+n4+n5);
+ lp->n = (size_t*)buf; buf+=n1;
+ lp->xargs = (na_loop_xargs_t*)buf; buf+=n2;
+ lp->user.args = (na_loop_args_t*)buf; buf+=n3;
+ lp->iter_ptr = iter = (na_loop_iter_t*)buf; buf+=n4;
+ lp->trans_map = (int*)buf;
+
for (j=0; j<narg; j++) {
LARG(lp,j).value = Qnil;
LARG(lp,j).iter = NULL;
LARG(lp,j).shape = NULL;
LARG(lp,j).ndim = 0;
lp->xargs[j].iter = &(iter[(max_nd+1)*j]);
lp->xargs[j].bufcp = NULL;
- lp->xargs[j].flag = (j<nf->nin) ? NDL_READ : NDL_WRITE;
+ lp->xargs[j].flag = (j<lp->nin) ? NDL_READ : NDL_WRITE;
lp->xargs[j].free_user_iter = 0;
}
for (i=0; i<=max_nd; i++) {
lp->n[i] = 1;
@@ -404,11 +412,10 @@
// transpose reduce-dimensions to last dimensions
// array loop
// [*,+,*,+,*] => [*,*,*,+,+]
// trans_map=[0,3,1,4,2] <= [0,1,2,3,4]
- lp->trans_map = ALLOC_N(int, max_nd+1);
if (NDF_TEST(nf,NDF_FLAT_REDUCE) && RTEST(lp->reduce)) {
trans_dim = 0;
for (i=0; i<max_nd; i++) {
if (na_test_reduce(lp->reduce, i)) {
lp->trans_map[i] = -1;
@@ -448,11 +455,10 @@
v = LARG(lp,j).value;
if (IsNArray(v)) {
na_release_lock(v);
}
}
- //xfree(lp);
for (j=0; j<lp->narg; j++) {
//printf("lp->xargs[%d].bufcp=%lx\n",j,(size_t)(lp->xargs[j].bufcp));
if (lp->xargs[j].bufcp) {
xfree(lp->xargs[j].bufcp->buf_iter);
xfree(lp->xargs[j].bufcp->buf_ptr);
@@ -461,15 +467,10 @@
if (lp->xargs[j].free_user_iter) {
xfree(LARG(lp,j).iter);
}
}
}
- if (lp->trans_map) xfree(lp->trans_map);
- xfree(lp->xargs);
- xfree(lp->iter_ptr);
- xfree(lp->user.args);
- xfree(lp->n_ptr);
- //rb_gc_force_recycle(vlp);
+ xfree(lp->ptr);
return Qnil;
}
/*