private.c in scs-0.4.1

- old
+ new

@@ -19,24 +19,14 @@
   nrm = ABS(nrm);
 #endif
   return nrm;
 }
 
-const char *SCS(get_lin_sys_method)() {
+const char *scs_get_lin_sys_method() {
   return "sparse-indirect GPU";
 }
 
-/*
-char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
-  char *str = (char *)scs_malloc(sizeof(char) * 128);
-  sprintf(str, "lin-sys: avg cg its: %2.2f\n",
-          (scs_float)p->tot_cg_its / (info->iter + 1));
-  p->tot_cg_its = 0;
-  return str;
-}
-*/
-
 /* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
 /* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
 static void set_preconditioner(ScsLinSysWork *p, const scs_float *diag_r) {
   scs_int i, k;
   const ScsMatrix *A = p->A;
@@ -74,11 +64,11 @@
   scs_printf("finished getting pre-conditioner\n");
 #endif
 }
 
 /* no need to update anything in this case */
-void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
+void scs_update_lin_sys_diag_r(ScsLinSysWork *p, const scs_float *diag_r) {
   scs_int i;
 
   /* R_x to gpu */
   cudaMemcpy(p->r_x_gpu, diag_r, p->n * sizeof(scs_float),
              cudaMemcpyHostToDevice);
@@ -91,11 +81,11 @@
 
   /* set preconditioner M on gpu */
   set_preconditioner(p, diag_r);
 }
 
-void SCS(free_lin_sys_work)(ScsLinSysWork *p) {
+void scs_free_lin_sys_work(ScsLinSysWork *p) {
   if (p) {
     scs_free(p->M);
     scs_free(p->inv_r_y);
     cudaFree(p->p);
     cudaFree(p->r);
@@ -180,17 +170,17 @@
   (p->Ag, p->dn_vec_m, p->dn_vec_n_p, p->cusparse_handle, &p->buffer_size,
    &p->buffer);
 }
 
 /* P comes in upper triangular, expand to full
- * First compute triplet version of full matrix, then compress to csc
+ * First compute triplet version of full matrix, then compress to CSC
  * */
-static csc *fill_p_matrix(const ScsMatrix *P) {
+static ScsMatrix *fill_p_matrix(const ScsMatrix *P) {
   scs_int i, j, k, kk;
   scs_int Pnzmax = 2 * P->p[P->n]; /* upper bound */
-  csc *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
-  csc *P_full;
+  ScsMatrix *P_tmp = SCS(cs_spalloc)(P->n, P->n, Pnzmax, 1, 1);
+  ScsMatrix *P_full;
   kk = 0;
   for (j = 0; j < P->n; j++) { /* cols */
     for (k = P->p[j]; k < P->p[j + 1]; k++) {
       i = P->i[k]; /* row */
       if (i > j) { /* only upper triangular needed */
@@ -207,20 +197,19 @@
       P_tmp->p[kk] = i;
       P_tmp->x[kk] = P->x[k];
       kk++;
     }
   }
-  P_tmp->nz = kk; /* set number of nonzeros */
-  P_full = SCS(cs_compress)(P_tmp, SCS_NULL);
+  P_full = SCS(cs_compress)(P_tmp, kk, SCS_NULL);
   SCS(cs_spfree)(P_tmp);
   return P_full;
 }
 
-ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
-                                      const scs_float *diag_r) {
+ScsLinSysWork *scs_init_lin_sys_work(const ScsMatrix *A, const ScsMatrix *P,
+                                     const scs_float *diag_r) {
   cudaError_t err;
-  csc *P_full;
+  ScsMatrix *P_full;
   ScsLinSysWork *p = SCS_NULL;
   ScsGpuMatrix *Ag = SCS_NULL;
   ScsGpuMatrix *Pg = SCS_NULL;
   int device_count;
 
@@ -322,11 +311,11 @@
   cusparseCreateDnVec(&p->dn_vec_n, Ag->n, p->tmp_m, SCS_CUDA_FLOAT);
   cusparseCreateDnVec(&p->dn_vec_n_p, Ag->n, p->tmp_m, SCS_CUDA_FLOAT);
   cusparseCreateDnVec(&p->dn_vec_m, Ag->m, p->tmp_m, SCS_CUDA_FLOAT);
 
   /* Form preconditioner and copy R_x, 1/R_y to gpu */
-  SCS(update_lin_sys_diag_r)(p, diag_r);
+  scs_update_lin_sys_diag_r(p, diag_r);
 
 #if GPU_TRANSPOSE_MAT > 0
   p->Agt = (ScsGpuMatrix *)scs_malloc(sizeof(ScsGpuMatrix));
   p->Agt->n = A->m;
   p->Agt->m = A->n;
@@ -365,11 +354,11 @@
 
   err = cudaGetLastError();
   if (err != cudaSuccess) {
     printf("%s:%d:%s\nERROR_CUDA (*): %s\n", __FILE__, __LINE__, __func__,
            cudaGetErrorString(err));
-    SCS(free_lin_sys_work)(p);
+    scs_free_lin_sys_work(p);
     return SCS_NULL;
   }
   return p;
 }
 
@@ -464,11 +453,11 @@
  *
  * x = (R_x + P + A' R_y^{-1} A)^{-1} (rx + A' R_y^{-1} ry)
  * y = R_y^{-1} (Ax - ry)
  *
  */
-scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s,
-                           scs_float tol) {
+scs_int scs_solve_lin_sys(ScsLinSysWork *p, scs_float *b, const scs_float *s,
+                          scs_float tol) {
   scs_int cg_its, max_iters;
   scs_float neg_onef = -1.0;
 
   /* these are on GPU */
   scs_float *bg = p->bg;