vendor/scs/linsys/gpu/indirect/private.c in scs-0.4.0 vs vendor/scs/linsys/gpu/indirect/private.c in scs-0.4.1

- old
+ new

@@ -19,24 +19,14 @@ nrm = ABS(nrm); #endif return nrm; } -const char *SCS(get_lin_sys_method)() { +const char *scs_get_lin_sys_method() { return "sparse-indirect GPU"; } -/* -char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) { - char *str = (char *)scs_malloc(sizeof(char) * 128); - sprintf(str, "lin-sys: avg cg its: %2.2f\n", - (scs_float)p->tot_cg_its / (info->iter + 1)); - p->tot_cg_its = 0; - return str; -} -*/ - /* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */ /* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */ static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) { scs_int i, k; const ScsMatrix *A = p->A; @@ -74,11 +64,11 @@ scs_printf("finished getting pre-conditioner\n"); #endif } /* no need to update anything in this case */ -void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) { +void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) { scs_int i; /* R_x to gpu */ cudaMemcpy(p->r_x_gpu, diag_r, p->n * sizeof(scs_float), cudaMemcpyHostToDevice); @@ -91,11 +81,11 @@ /* set preconditioner M on gpu */ set_preconditioner(p, diag_r); } -void SCS(free_lin_sys_work)(ScsLinSysWork *p) { +void scs_free_lin_sys_work(ScsLinSysWork *p) { if (p) { scs_free(p->M); scs_free(p->inv_r_y); cudaFree(p->p); cudaFree(p->r); @@ -180,17 +170,17 @@ (p->Ag, p->dn_vec_m, p->dn_vec_n_p, p->cusparse_handle, &p->buffer_size, &p->buffer); } /* P comes in upper triangular, expand to full - * First compute triplet version of full matrix, then compress to csc + * First compute triplet version of full matrix, then compress to CSC * */ -static csc *fill_p_matrix(const ScsMatrix *P) { +static ScsMatrix *fill_p_matrix(const ScsMatrix *P) { scs_int i, j, k, kk; scs_int Pnzmax = 2 * P->p[P->n]; /* upper bound */ - csc *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1); - csc *P_full; + ScsMatrix *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1); + ScsMatrix *P_full; kk = 0; for (j = 0; j < P->n; j++) { /* cols */ for (k = P->p[j]; k < P->p[j + 1]; k++) { i = P->i[k]; /* row */ if (i > j) { /* only upper triangular needed */ @@ -207,20 +197,19 @@ P_tmp->p[kk] = i; P_tmp->x[kk] = P->x[k]; kk++; } } - P_tmp->nz = kk; /* set number of nonzeros */ - P_full = SCS(cs_compress)(P_tmp, SCS_NULL); + P_full = SCS(cs_compress)(P_tmp, kk, SCS_NULL); SCS(cs_spfree)(P_tmp); return P_full; } -ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P, - const scs_float *diag_r) { +ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P, + const scs_float *diag_r) { cudaError_t err; - csc *P_full; + ScsMatrix *P_full; ScsLinSysWork *p = SCS_NULL; ScsGpuMatrix *Ag = SCS_NULL; ScsGpuMatrix *Pg = SCS_NULL; int device_count; @@ -322,11 +311,11 @@ cusparseCreateDnVec(&p->dn_vec_n, Ag->n, p->tmp_m, SCS_CUDA_FLOAT); cusparseCreateDnVec(&p->dn_vec_n_p, Ag->n, p->tmp_m, SCS_CUDA_FLOAT); cusparseCreateDnVec(&p->dn_vec_m, Ag->m, p->tmp_m, SCS_CUDA_FLOAT); /* Form preconditioner and copy R_x, 1/R_y to gpu */ - SCS(update_lin_sys_diag_r)(p, diag_r); + scs_update_lin_sys_diag_r(p, diag_r); #if GPU_TRANSPOSE_MAT > 0 p->Agt = (ScsGpuMatrix *)scs_malloc(sizeof(ScsGpuMatrix)); p->Agt->n = A->m; p->Agt->m = A->n; @@ -365,11 +354,11 @@ err = cudaGetLastError(); if (err != cudaSuccess) { printf("%s:%d:%s\nERROR_CUDA (*): %s\n", __FILE__, __LINE__, __func__, cudaGetErrorString(err)); - SCS(free_lin_sys_work)(p); + scs_free_lin_sys_work(p); return SCS_NULL; } return p; } @@ -464,11 +453,11 @@ * * x = (R_x + P + A' R_y^{-1} A)^{-1} (rx + A' R_y^{-1} ry) * y = R_y^{-1} (Ax - ry) * */ -scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s, - scs_float tol) { +scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s, + scs_float tol) { scs_int cg_its, max_iters; scs_float neg_onef = -1.0; /* these are on GPU */ scs_float *bg = p->bg;