vendor/scs/linsys/gpu/indirect/private.c in scs-0.4.0 vs vendor/scs/linsys/gpu/indirect/private.c in scs-0.4.1
- old
+ new
@@ -19,24 +19,14 @@
nrm = ABS(nrm);
#endif
return nrm;
}
-const char *SCS(get_lin_sys_method)() {
+const char *scs_get_lin_sys_method() {
return "sparse-indirect GPU";
}
-/*
-char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
- char *str = (char *)scs_malloc(sizeof(char) * 128);
- sprintf(str, "lin-sys: avg cg its: %2.2f\n",
- (scs_float)p->tot_cg_its / (info->iter + 1));
- p->tot_cg_its = 0;
- return str;
-}
-*/
-
/* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
/* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
scs_int i, k;
const ScsMatrix *A = p->A;
@@ -74,11 +64,11 @@
scs_printf("finished getting pre-conditioner\n");
#endif
}
/* no need to update anything in this case */
-void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
+void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
scs_int i;
/* R_x to gpu */
cudaMemcpy(p->r_x_gpu, diag_r, p->n * sizeof(scs_float),
cudaMemcpyHostToDevice);
@@ -91,11 +81,11 @@
/* set preconditioner M on gpu */
set_preconditioner(p, diag_r);
}
-void SCS(free_lin_sys_work)(ScsLinSysWork *p) {
+void scs_free_lin_sys_work(ScsLinSysWork *p) {
if (p) {
scs_free(p->M);
scs_free(p->inv_r_y);
cudaFree(p->p);
cudaFree(p->r);
@@ -180,17 +170,17 @@
(p->Ag, p->dn_vec_m, p->dn_vec_n_p, p->cusparse_handle, &p->buffer_size,
&p->buffer);
}
/* P comes in upper triangular, expand to full
- * First compute triplet version of full matrix, then compress to csc
+ * First compute triplet version of full matrix, then compress to CSC
* */
-static csc *fill_p_matrix(const ScsMatrix *P) {
+static ScsMatrix *fill_p_matrix(const ScsMatrix *P) {
scs_int i, j, k, kk;
scs_int Pnzmax = 2 * P->p[P->n]; /* upper bound */
- csc *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
- csc *P_full;
+ ScsMatrix *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
+ ScsMatrix *P_full;
kk = 0;
for (j = 0; j < P->n; j++) { /* cols */
for (k = P->p[j]; k < P->p[j + 1]; k++) {
i = P->i[k]; /* row */
if (i > j) { /* only upper triangular needed */
@@ -207,20 +197,19 @@
P_tmp->p[kk] = i;
P_tmp->x[kk] = P->x[k];
kk++;
}
}
- P_tmp->nz = kk; /* set number of nonzeros */
- P_full = SCS(cs_compress)(P_tmp, SCS_NULL);
+ P_full = SCS(cs_compress)(P_tmp, kk, SCS_NULL);
SCS(cs_spfree)(P_tmp);
return P_full;
}
-ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
- const scs_float *diag_r) {
+ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
+ const scs_float *diag_r) {
cudaError_t err;
- csc *P_full;
+ ScsMatrix *P_full;
ScsLinSysWork *p = SCS_NULL;
ScsGpuMatrix *Ag = SCS_NULL;
ScsGpuMatrix *Pg = SCS_NULL;
int device_count;
@@ -322,11 +311,11 @@
cusparseCreateDnVec(&p->dn_vec_n, Ag->n, p->tmp_m, SCS_CUDA_FLOAT);
cusparseCreateDnVec(&p->dn_vec_n_p, Ag->n, p->tmp_m, SCS_CUDA_FLOAT);
cusparseCreateDnVec(&p->dn_vec_m, Ag->m, p->tmp_m, SCS_CUDA_FLOAT);
/* Form preconditioner and copy R_x, 1/R_y to gpu */
- SCS(update_lin_sys_diag_r)(p, diag_r);
+ scs_update_lin_sys_diag_r(p, diag_r);
#if GPU_TRANSPOSE_MAT > 0
p->Agt = (ScsGpuMatrix *)scs_malloc(sizeof(ScsGpuMatrix));
p->Agt->n = A->m;
p->Agt->m = A->n;
@@ -365,11 +354,11 @@
err = cudaGetLastError();
if (err != cudaSuccess) {
printf("%s:%d:%s\nERROR_CUDA (*): %s\n", __FILE__, __LINE__, __func__,
cudaGetErrorString(err));
- SCS(free_lin_sys_work)(p);
+ scs_free_lin_sys_work(p);
return SCS_NULL;
}
return p;
}
@@ -464,11 +453,11 @@
*
* x = (R_x + P + A' R_y^{-1} A)^{-1} (rx + A' R_y^{-1} ry)
* y = R_y^{-1} (Ax - ry)
*
*/
-scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s,
- scs_float tol) {
+scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
+ scs_float tol) {
scs_int cg_its, max_iters;
scs_float neg_onef = -1.0;
/* these are on GPU */
scs_float *bg = p->bg;