vendor/scs/linsys/cpu/indirect/private.c in scs-0.3.1 vs vendor/scs/linsys/cpu/indirect/private.c in scs-0.3.2
- old
+ new
@@ -15,37 +15,42 @@
p->tot_cg_its = 0;
return str;
}
*/
-/* set M = inv ( diag ( rho_x * I + P + A' R_y^{-1} A ) ) */
+/* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */
+/* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */
static void set_preconditioner(ScsLinSysWork *p) {
scs_int i, k;
scs_float *M = p->M;
const ScsMatrix *A = p->A;
const ScsMatrix *P = p->P;
#if VERBOSITY > 0
scs_printf("getting pre-conditioner\n");
#endif
+ /* M_ii = (R_x)_i + P_ii + a_i' (R_y)^-1 a_i */
for (i = 0; i < A->n; ++i) { /* cols */
- M[i] = p->rho_x;
- /* diag(A' R_y^{-1} A) */
+ /* M_ii = (R_x)_i */
+ M[i] = p->diag_r[i];
+ /* M_ii += a_i' (R_y)^-1 a_i */
for (k = A->p[i]; k < A->p[i + 1]; ++k) {
/* A->i[k] is row of entry k with value A->x[k] */
- M[i] += A->x[k] * A->x[k] / p->rho_y_vec[A->i[k]];
+ M[i] += A->x[k] * A->x[k] / p->diag_r[A->n + A->i[k]];
}
if (P) {
for (k = P->p[i]; k < P->p[i + 1]; k++) {
/* diagonal element only */
if (P->i[k] == i) { /* row == col */
+ /* M_ii += P_ii */
M[i] += P->x[k];
break;
}
}
}
+ /* finally invert for pre-conditioner */
M[i] = 1. / M[i];
}
#if VERBOSITY > 0
scs_printf("finished getting pre-conditioner\n");
#endif
@@ -109,36 +114,44 @@
scs_free(p);
}
}
/* vec -> R_y^{-1} vec */
-static void scale_by_diag_r(scs_float *vec, ScsLinSysWork *p) {
+static void scale_by_r_y_inv(scs_float *vec, ScsLinSysWork *p) {
scs_int i;
for (i = 0; i < p->m; ++i) {
- vec[i] /= p->rho_y_vec[i];
+ vec[i] /= p->diag_r[p->n + i];
}
}
+/* y += R_x * x */
+static void accum_by_r_x(scs_float *y, const scs_float *x, ScsLinSysWork *p) {
+ scs_int i;
+ for (i = 0; i < p->n; ++i) {
+ y[i] += p->diag_r[i] * x[i];
+ }
+}
+
/* we use a different accum_by_a here for speed */
static void accum_by_a(ScsLinSysWork *p, const scs_float *x, scs_float *y) {
SCS(accum_by_atrans)(p->At, x, y);
}
-/* y = (rho_x * I + P + A' R_y^{-1} A) x */
+/* y = (R_x + P + A' R_y^{-1} A) x */
static void mat_vec(const ScsMatrix *A, const ScsMatrix *P, ScsLinSysWork *p,
const scs_float *x, scs_float *y) {
scs_float *z = p->tmp;
memset(z, 0, A->m * sizeof(scs_float)); /* z = 0 */
memset(y, 0, A->n * sizeof(scs_float)); /* y = 0 */
if (P) {
SCS(accum_by_p)(P, x, y); /* y = Px */
}
accum_by_a(p, x, z); /* z = Ax */
- scale_by_diag_r(z, p); /* z = R_y^{-1} A x */
+ scale_by_r_y_inv(z, p); /* z = R_y^{-1} A x */
SCS(accum_by_atrans)(A, z, y); /* y += A'z, y = Px + A' R_y^{-1} Ax */
- /* y = rho_x * x + Px + A' R_y^{-1} A x */
- SCS(add_scaled_array)(y, x, A->n, p->rho_x);
+ /* y = R_x * x + Px + A' R_y^{-1} A * x */
+ accum_by_r_x(y, x, p);
}
static void apply_pre_conditioner(scs_float *z, scs_float *r, scs_int n,
ScsLinSysWork *pr) {
scs_int i;
@@ -147,40 +160,39 @@
z[i] = r[i] * M[i];
}
}
/* no need to update anything in this case */
-void SCS(update_lin_sys_rho_y_vec)(ScsLinSysWork *p, scs_float *rho_y_vec) {
- p->rho_y_vec = rho_y_vec; /* this isn't needed but do it to be safe */
+void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) {
+ p->diag_r = diag_r; /* this isn't needed but do it to be safe */
set_preconditioner(p);
}
ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P,
- scs_float *rho_y_vec, scs_float rho_x) {
+ const scs_float *diag_r) {
ScsLinSysWork *p = (ScsLinSysWork *)scs_calloc(1, sizeof(ScsLinSysWork));
p->A = A;
p->P = P;
p->m = A->m;
p->n = A->n;
- p->rho_x = rho_x;
- p->p = (scs_float *)scs_malloc((A->n) * sizeof(scs_float));
- p->r = (scs_float *)scs_malloc((A->n) * sizeof(scs_float));
- p->Gp = (scs_float *)scs_malloc((A->n) * sizeof(scs_float));
- p->tmp = (scs_float *)scs_malloc((A->m) * sizeof(scs_float));
+ p->p = (scs_float *)scs_calloc((A->n), sizeof(scs_float));
+ p->r = (scs_float *)scs_calloc((A->n), sizeof(scs_float));
+ p->Gp = (scs_float *)scs_calloc((A->n), sizeof(scs_float));
+ p->tmp = (scs_float *)scs_calloc((A->m), sizeof(scs_float));
/* memory for A transpose */
- p->At = (ScsMatrix *)scs_malloc(sizeof(ScsMatrix));
+ p->At = (ScsMatrix *)scs_calloc(1, sizeof(ScsMatrix));
p->At->m = A->n;
p->At->n = A->m;
- p->At->i = (scs_int *)scs_malloc((A->p[A->n]) * sizeof(scs_int));
- p->At->p = (scs_int *)scs_malloc((A->m + 1) * sizeof(scs_int));
- p->At->x = (scs_float *)scs_malloc((A->p[A->n]) * sizeof(scs_float));
+ p->At->i = (scs_int *)scs_calloc((A->p[A->n]), sizeof(scs_int));
+ p->At->p = (scs_int *)scs_calloc((A->m + 1), sizeof(scs_int));
+ p->At->x = (scs_float *)scs_calloc((A->p[A->n]), sizeof(scs_float));
transpose(A, p);
/* preconditioner memory */
- p->rho_y_vec = rho_y_vec;
+ p->diag_r = diag_r;
p->z = (scs_float *)scs_calloc(A->n, sizeof(scs_float));
p->M = (scs_float *)scs_calloc(A->n, sizeof(scs_float));
set_preconditioner(p);
p->tot_cg_its = 0;
@@ -190,12 +202,11 @@
return SCS_NULL;
}
return p;
}
-/* solves (rho_x * I + P + A' R_y^{-1} A)x = b, s warm start, solution stored in
- * b */
+/* solves (R_x * I + P + A' R_y^{-1} A)x = b, s warm start, solution in b */
static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *b,
scs_int max_its, scs_float tol) {
scs_int i, n = pr->n;
scs_float ztr, ztr_prev, alpha;
scs_float *p = pr->p; /* cg direction */
@@ -266,18 +277,16 @@
}
/* solves Mx = b, for x but stores result in b */
/* s contains warm-start (if available) */
/*
- * [x] = [rho_x I + P A' ]^{-1} [rx]
- * [y] [ A -R_y ] [ry]
+ * [x] = [R_x + P A' ]^{-1} [rx]
+ * [y] [ A -R_y ] [ry]
*
- * R_y = diag(rho_y_vec)
- *
* becomes:
*
- * x = (rho_x I + P + A' R_y^{-1} A)^{-1} (rx + A' R_y^{-1} ry)
+ * x = (R_x + P + A' R_y^{-1} A)^{-1} (rx + A' R_y^{-1} ry)
* y = R_y^{-1} (Ax - ry)
*
*/
scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s,
scs_float tol) {
@@ -297,24 +306,24 @@
/* use p->tmp here, and in mat_vec, can do both since they don't overlap */
/* b = [rx; ry] */
/* tmp = ry */
memcpy(p->tmp, &(b[p->n]), p->m * sizeof(scs_float));
/* tmp = R_y^{-1} * ry */
- scale_by_diag_r(p->tmp, p);
+ scale_by_r_y_inv(p->tmp, p);
/* b[:n] = rx + A' R_y^{-1} ry */
SCS(accum_by_atrans)(p->A, p->tmp, b);
/* set max_iters to 10 * n (though in theory n is enough for any tol) */
max_iters = 10 * p->n;
- /* solves (rho_x I + P + A' R_y^{-1} A)x = b, s warm start, solution stored in
+ /* solves (R_x + P + A' R_y^{-1} A)x = b, s warm start, solution stored in
* b */
cg_its = pcg(p, s, b, max_iters, tol); /* b[:n] = x */
/* b[n:] = -ry */
SCS(scale_array)(&(b[p->n]), -1., p->m);
/* b[n:] = Ax - ry */
accum_by_a(p, b, &(b[p->n]));
/* b[n:] = R_y^{-1} (Ax - ry) = y */
- scale_by_diag_r(&(b[p->n]), p);
+ scale_by_r_y_inv(&(b[p->n]), p);
p->tot_cg_its += cg_its;
#if VERBOSITY > 1
scs_printf("tol %.3e\n", tol);
scs_printf("cg_its %i\n", (int)cg_its);
#endif