vendor/scs/linsys/cpu/indirect/private.c in scs-0.2.3 vs vendor/scs/linsys/cpu/indirect/private.c in scs-0.3.0
- old
+ new
@@ -1,45 +1,54 @@
#include "private.h"
+#include "linsys.h"
+#include "util.h"
+#include <limits.h>
-#define CG_BEST_TOL 1e-9
-#define CG_MIN_TOL 1e-1
-
-char *SCS(get_lin_sys_method)(const ScsMatrix *A, const ScsSettings *stgs) {
- char *str = (char *)scs_malloc(sizeof(char) * 128);
- sprintf(str, "sparse-indirect, nnz in A = %li, CG tol ~ 1/iter^(%2.2f)",
- (long)A->p[A->n], stgs->cg_rate);
- return str;
+const char *SCS(get_lin_sys_method)() {
+ return "sparse-indirect";
}
+/*
char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) {
char *str = (char *)scs_malloc(sizeof(char) * 128);
- sprintf(str,
- "\tLin-sys: avg # CG iterations: %2.2f, avg solve time: %1.2es\n",
- (scs_float)p->tot_cg_its / (info->iter + 1),
- p->total_solve_time / (info->iter + 1) / 1e3);
+ sprintf(str, "lin-sys: avg cg its: %2.2f\n",
+ (scs_float)p->tot_cg_its / (info->iter + 1));
p->tot_cg_its = 0;
- p->total_solve_time = 0;
return str;
}
+*/
-/* M = inv ( diag ( RHO_X * I + A'A ) ) */
-static void get_preconditioner(const ScsMatrix *A, const ScsSettings *stgs,
- ScsLinSysWork *p) {
- scs_int i;
+/* set M = inv ( diag ( rho_x * I + P + A' R_y^{-1} A ) ) */
+static void set_preconditioner(ScsLinSysWork *p) {
+ scs_int i, k;
scs_float *M = p->M;
+ const ScsMatrix *A = p->A;
+ const ScsMatrix *P = p->P;
-#if EXTRA_VERBOSE > 0
+#if VERBOSITY > 0
scs_printf("getting pre-conditioner\n");
#endif
- for (i = 0; i < A->n; ++i) {
- M[i] = 1 / (stgs->rho_x +
- SCS(norm_sq)(&(A->x[A->p[i]]), A->p[i + 1] - A->p[i]));
- /* M[i] = 1; */
+ for (i = 0; i < A->n; ++i) { /* cols */
+ M[i] = p->rho_x;
+ /* diag(A' R_y^{-1} A) */
+ for (k = A->p[i]; k < A->p[i + 1]; ++k) {
+ /* A->i[k] is row of entry k with value A->x[k] */
+ M[i] += A->x[k] * A->x[k] / p->rho_y_vec[A->i[k]];
+ }
+ if (P) {
+ for (k = P->p[i]; k < P->p[i + 1]; k++) {
+ /* diagonal element only */
+ if (P->i[k] == i) { /* row == col */
+ M[i] += P->x[k];
+ break;
+ }
+ }
+ }
+ M[i] = 1. / M[i];
}
-
-#if EXTRA_VERBOSE > 0
+#if VERBOSITY > 0
scs_printf("finished getting pre-conditioner\n");
#endif
}
static void transpose(const ScsMatrix *A, ScsLinSysWork *p) {
@@ -52,19 +61,20 @@
scs_int *Ap = A->p;
scs_int *Ai = A->i;
scs_float *Ax = A->x;
scs_int i, j, q, *z, c1, c2;
-#if EXTRA_VERBOSE > 0
+#if VERBOSITY > 0
SCS(timer) transpose_timer;
scs_printf("transposing A\n");
SCS(tic)(&transpose_timer);
#endif
z = (scs_int *)scs_calloc(m, sizeof(scs_int));
- for (i = 0; i < Ap[n]; i++) z[Ai[i]]++; /* row counts */
- SCS(cumsum)(Cp, z, m); /* row pointers */
+ for (i = 0; i < Ap[n]; i++)
+ z[Ai[i]]++; /* row counts */
+ SCS(cumsum)(Cp, z, m); /* row pointers */
for (j = 0; j < n; j++) {
c1 = Ap[j];
c2 = Ap[j + 1];
for (i = c1; i < c2; i++) {
@@ -74,11 +84,11 @@
z[Ai[i]]++;
}
}
scs_free(z);
-#if EXTRA_VERBOSE > 0
+#if VERBOSITY > 0
scs_printf("finished transposing A, time: %1.2es\n",
SCS(tocq)(&transpose_timer) / 1e3);
#endif
}
@@ -98,44 +108,63 @@
scs_free(p->M);
scs_free(p);
}
}
-/*y = (RHO_X * I + A'A)x */
-static void mat_vec(const ScsMatrix *A, const ScsSettings *s, ScsLinSysWork *p,
- const scs_float *x, scs_float *y) {
- scs_float *tmp = p->tmp;
- memset(tmp, 0, A->m * sizeof(scs_float));
- SCS(accum_by_a)(A, p, x, tmp);
- memset(y, 0, A->n * sizeof(scs_float));
- SCS(accum_by_atrans)(A, p, tmp, y);
- SCS(add_scaled_array)(y, x, A->n, s->rho_x);
+/* vec -> R_y^{-1} vec */
+static void scale_by_diag_r(scs_float *vec, ScsLinSysWork *p) {
+ scs_int i;
+ for (i = 0; i < p->m; ++i) {
+ vec[i] /= p->rho_y_vec[i];
+ }
}
-void SCS(accum_by_atrans)(const ScsMatrix *A, ScsLinSysWork *p,
- const scs_float *x, scs_float *y) {
- SCS(_accum_by_atrans)(A->n, A->x, A->i, A->p, x, y);
+/* we use a different accum_by_a here for speed */
+static void accum_by_a(ScsLinSysWork *p, const scs_float *x, scs_float *y) {
+ SCS(accum_by_atrans)(p->At, x, y);
}
-void SCS(accum_by_a)(const ScsMatrix *A, ScsLinSysWork *p, const scs_float *x,
- scs_float *y) {
- SCS(_accum_by_atrans)(p->At->n, p->At->x, p->At->i, p->At->p, x, y);
+/* y = (rho_x * I + P + A' R_y^{-1} A) x */
+static void mat_vec(const ScsMatrix *A, const ScsMatrix *P, ScsLinSysWork *p,
+ const scs_float *x, scs_float *y) {
+ scs_float *z = p->tmp;
+ memset(z, 0, A->m * sizeof(scs_float)); /* z = 0 */
+ memset(y, 0, A->n * sizeof(scs_float)); /* y = 0 */
+ if (P) {
+ SCS(accum_by_p)(P, x, y); /* y = Px */
+ }
+ accum_by_a(p, x, z); /* z = Ax */
+ scale_by_diag_r(z, p); /* z = R_y^{-1} A x */
+ SCS(accum_by_atrans)(A, z, y); /* y += A'z, y = Px + A' R_y^{-1} Ax */
+ /* y = rho_x * x + Px + A' R_y^{-1} A x */
+ SCS(add_scaled_array)(y, x, A->n, p->rho_x);
}
-static void apply_pre_conditioner(scs_float *M, scs_float *z, scs_float *r,
- scs_int n, scs_float *ipzr) {
+static void apply_pre_conditioner(scs_float *z, scs_float *r, scs_int n,
+ ScsLinSysWork *pr) {
scs_int i;
- *ipzr = 0;
+ scs_float *M = pr->M;
for (i = 0; i < n; ++i) {
z[i] = r[i] * M[i];
- *ipzr += z[i] * r[i];
}
}
-ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A,
- const ScsSettings *stgs) {
+/* no need to update anything in this case */
+void SCS(update_lin_sys_rho_y_vec)(ScsLinSysWork *p, scs_float *rho_y_vec) {
+ p->rho_y_vec = rho_y_vec; /* this isn't needed but do it to be safe */
+ set_preconditioner(p);
+}
+
+ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
+ scs_float *rho_y_vec, scs_float rho_x) {
ScsLinSysWork *p = (ScsLinSysWork *)scs_calloc(1, sizeof(ScsLinSysWork));
+ p->A = A;
+ p->P = P;
+ p->m = A->m;
+ p->n = A->n;
+ p->rho_x = rho_x;
+
p->p = (scs_float *)scs_malloc((A->n) * sizeof(scs_float));
p->r = (scs_float *)scs_malloc((A->n) * sizeof(scs_float));
p->Gp = (scs_float *)scs_malloc((A->n) * sizeof(scs_float));
p->tmp = (scs_float *)scs_malloc((A->m) * sizeof(scs_float));
@@ -147,110 +176,148 @@
p->At->p = (scs_int *)scs_malloc((A->m + 1) * sizeof(scs_int));
p->At->x = (scs_float *)scs_malloc((A->p[A->n]) * sizeof(scs_float));
transpose(A, p);
/* preconditioner memory */
- p->z = (scs_float *)scs_malloc((A->n) * sizeof(scs_float));
- p->M = (scs_float *)scs_malloc((A->n) * sizeof(scs_float));
- get_preconditioner(A, stgs, p);
+ p->rho_y_vec = rho_y_vec;
+ p->z = (scs_float *)scs_calloc(A->n, sizeof(scs_float));
+ p->M = (scs_float *)scs_calloc(A->n, sizeof(scs_float));
+ set_preconditioner(p);
- p->total_solve_time = 0;
p->tot_cg_its = 0;
if (!p->p || !p->r || !p->Gp || !p->tmp || !p->At || !p->At->i || !p->At->p ||
!p->At->x) {
SCS(free_lin_sys_work)(p);
return SCS_NULL;
}
return p;
}
-/* solves (I+A'A)x = b, s warm start, solution stored in b */
-static scs_int pcg(const ScsMatrix *A, const ScsSettings *stgs,
- ScsLinSysWork *pr, const scs_float *s, scs_float *b,
+/* solves (rho_x * I + P + A' R_y^{-1} A)x = b, s warm start, solution stored in
+ * b */
+static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *b,
scs_int max_its, scs_float tol) {
- scs_int i, n = A->n;
- scs_float ipzr, ipzr_old, alpha;
+ scs_int i, n = pr->n;
+ scs_float ztr, ztr_prev, alpha;
scs_float *p = pr->p; /* cg direction */
scs_float *Gp = pr->Gp; /* updated CG direction */
scs_float *r = pr->r; /* cg residual */
scs_float *z = pr->z; /* for preconditioning */
- scs_float *M = pr->M; /* inverse diagonal preconditioner */
- if (s == SCS_NULL) {
+ const ScsMatrix *A = pr->A;
+ const ScsMatrix *P = pr->P;
+
+ if (!s) {
+ /* take s = 0 */
+ /* r = b */
memcpy(r, b, n * sizeof(scs_float));
+ /* b = 0 */
memset(b, 0, n * sizeof(scs_float));
} else {
- mat_vec(A, stgs, pr, s, r);
- SCS(add_scaled_array)(r, b, n, -1);
- SCS(scale_array)(r, -1, n);
+ /* r = Mat * s */
+ mat_vec(A, P, pr, s, r);
+ /* r = Mat * s - b */
+ SCS(add_scaled_array)(r, b, n, -1.);
+ /* r = b - Mat * s */
+ SCS(scale_array)(r, -1., n);
+ /* b = s */
memcpy(b, s, n * sizeof(scs_float));
}
/* check to see if we need to run CG at all */
- if (SCS(norm)(r, n) < MIN(tol, 1e-18)) {
+ if (CG_NORM(r, n) < MAX(tol, 1e-12)) {
return 0;
}
- apply_pre_conditioner(M, z, r, n, &ipzr);
+ /* z = M r (M is inverse preconditioner) */
+ apply_pre_conditioner(z, r, n, pr);
+ /* ztr = z'r */
+ ztr = SCS(dot)(z, r, n);
+ /* p = z */
memcpy(p, z, n * sizeof(scs_float));
for (i = 0; i < max_its; ++i) {
- mat_vec(A, stgs, pr, p, Gp);
- alpha = ipzr / SCS(dot)(p, Gp, n);
+ /* Gp = Mat * p */
+ mat_vec(A, P, pr, p, Gp);
+ /* alpha = z'r / p'G p */
+ alpha = ztr / SCS(dot)(p, Gp, n);
+ /* b += alpha * p */
SCS(add_scaled_array)(b, p, n, alpha);
+ /* r -= alpha * G p */
SCS(add_scaled_array)(r, Gp, n, -alpha);
- if (SCS(norm)(r, n) < tol) {
-#if EXTRA_VERBOSE > 0
- scs_printf("tol: %.4e, resid: %.4e, iters: %li\n", tol, SCS(norm)(r, n),
- (long)i + 1);
+#if VERBOSITY > 3
+ scs_printf("tol: %.4e, resid: %.4e, iters: %li\n", tol, CG_NORM(r, n),
+ (long)i + 1);
#endif
+ if (CG_NORM(r, n) < tol) {
return i + 1;
}
- ipzr_old = ipzr;
- apply_pre_conditioner(M, z, r, n, &ipzr);
-
- SCS(scale_array)(p, ipzr / ipzr_old, n);
- SCS(add_scaled_array)(p, z, n, 1);
+ /* z = M r (M is inverse preconditioner) */
+ apply_pre_conditioner(z, r, n, pr);
+ ztr_prev = ztr;
+ /* ztr = z'r */
+ ztr = SCS(dot)(z, r, n);
+ /* p = beta * p, where beta = ztr / ztr_prev */
+ SCS(scale_array)(p, ztr / ztr_prev, n);
+ /* p = z + beta * p */
+ SCS(add_scaled_array)(p, z, n, 1.);
}
return i;
}
-scs_int SCS(solve_lin_sys)(const ScsMatrix *A, const ScsSettings *stgs,
- ScsLinSysWork *p, scs_float *b, const scs_float *s,
- scs_int iter) {
- scs_int cg_its;
- SCS(timer) linsys_timer;
- scs_float cg_tol =
- SCS(norm)(b, A->n) *
- (iter < 0 ? CG_BEST_TOL
- : CG_MIN_TOL / POWF((scs_float)iter + 1, stgs->cg_rate));
+/* solves Mx = b, for x but stores result in b */
+/* s contains warm-start (if available) */
+/*
+ * [x] = [rho_x I + P A' ]^{-1} [rx]
+ * [y] [ A -R_y ] [ry]
+ *
+ * R_y = diag(rho_y_vec)
+ *
+ * becomes:
+ *
+ * x = (rho_x I + P + A' R_y^{-1} A)^{-1} (rx + A' R_y^{-1} ry)
+ * y = R_y^{-1} (Ax - ry)
+ *
+ */
+scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s,
+ scs_float tol) {
+ scs_int cg_its, max_iters;
- SCS(tic)(&linsys_timer);
- /* solves Mx = b, for x but stores result in b */
- /* s contains warm-start (if available) */
- SCS(accum_by_atrans)(A, p, &(b[A->n]), b);
- /* solves (I+A'A)x = b, s warm start, solution stored in b */
- cg_its = pcg(A, stgs, p, s, b, A->n, MAX(cg_tol, CG_BEST_TOL));
- SCS(scale_array)(&(b[A->n]), -1, A->m);
- SCS(accum_by_a)(A, p, b, &(b[A->n]));
+ if (tol <= 0.) {
+ scs_printf("Warning: tol = %4f <= 0, likely compiled without setting "
+ "INDIRECT flag.\n",
+ tol);
+ }
- if (iter >= 0) {
- p->tot_cg_its += cg_its;
+ if (CG_NORM(b, p->n + p->m) <= 1e-12) {
+ memset(b, 0, (p->n + p->m) * sizeof(scs_float));
+ return 0;
}
- p->total_solve_time += SCS(tocq)(&linsys_timer);
-#if EXTRA_VERBOSE > 0
- scs_printf("linsys solve time: %1.2es\n", SCS(tocq)(&linsys_timer) / 1e3);
+ /* use p->tmp here, and in mat_vec, can do both since they don't overlap */
+ /* b = [rx; ry] */
+ /* tmp = ry */
+ memcpy(p->tmp, &(b[p->n]), p->m * sizeof(scs_float));
+ /* tmp = R_y^{-1} * ry */
+ scale_by_diag_r(p->tmp, p);
+ /* b[:n] = rx + A' R_y^{-1} ry */
+ SCS(accum_by_atrans)(p->A, p->tmp, b);
+ /* set max_iters to 10 * n (though in theory n is enough for any tol) */
+ max_iters = 10 * p->n;
+ /* solves (rho_x I + P + A' R_y^{-1} A)x = b, s warm start, solution stored in
+ * b */
+ cg_its = pcg(p, s, b, max_iters, tol); /* b[:n] = x */
+
+ /* b[n:] = -ry */
+ SCS(scale_array)(&(b[p->n]), -1., p->m);
+ /* b[n:] = Ax - ry */
+ accum_by_a(p, b, &(b[p->n]));
+ /* b[n:] = R_y^{-1} (Ax - ry) = y */
+ scale_by_diag_r(&(b[p->n]), p);
+ p->tot_cg_its += cg_its;
+#if VERBOSITY > 1
+ scs_printf("tol %.3e\n", tol);
+ scs_printf("cg_its %i\n", (int)cg_its);
#endif
return 0;
-}
-
-void SCS(normalize_a)(ScsMatrix *A, const ScsSettings *stgs, const ScsCone *k,
- ScsScaling *scal) {
- SCS(_normalize_a)(A, stgs, k, scal);
-}
-
-void SCS(un_normalize_a)(ScsMatrix *A, const ScsSettings *stgs,
- const ScsScaling *scal) {
- SCS(_un_normalize_a)(A, stgs, scal);
}