vendor/scs/linsys/cpu/indirect/private.c in scs-0.3.1 vs vendor/scs/linsys/cpu/indirect/private.c in scs-0.3.2

- old
+ new

@@ -15,37 +15,42 @@ p->tot_cg_its = 0; return str; } */ -/* set M = inv ( diag ( rho_x * I + P + A' R_y^{-1} A ) ) */ +/* Not possible to do this on the fly due to M_ii += a_i' (R_y)^-1 a_i */ +/* set M = inv ( diag ( R_x + P + A' R_y^{-1} A ) ) */ static void set_preconditioner(ScsLinSysWork *p) { scs_int i, k; scs_float *M = p->M; const ScsMatrix *A = p->A; const ScsMatrix *P = p->P; #if VERBOSITY > 0 scs_printf("getting pre-conditioner\n"); #endif + /* M_ii = (R_x)_i + P_ii + a_i' (R_y)^-1 a_i */ for (i = 0; i < A->n; ++i) { /* cols */ - M[i] = p->rho_x; - /* diag(A' R_y^{-1} A) */ + /* M_ii = (R_x)_i */ + M[i] = p->diag_r[i]; + /* M_ii += a_i' (R_y)^-1 a_i */ for (k = A->p[i]; k < A->p[i + 1]; ++k) { /* A->i[k] is row of entry k with value A->x[k] */ - M[i] += A->x[k] * A->x[k] / p->rho_y_vec[A->i[k]]; + M[i] += A->x[k] * A->x[k] / p->diag_r[A->n + A->i[k]]; } if (P) { for (k = P->p[i]; k < P->p[i + 1]; k++) { /* diagonal element only */ if (P->i[k] == i) { /* row == col */ + /* M_ii += P_ii */ M[i] += P->x[k]; break; } } } + /* finally invert for pre-conditioner */ M[i] = 1. / M[i]; } #if VERBOSITY > 0 scs_printf("finished getting pre-conditioner\n"); #endif @@ -109,36 +114,44 @@ scs_free(p); } } /* vec -> R_y^{-1} vec */ -static void scale_by_diag_r(scs_float *vec, ScsLinSysWork *p) { +static void scale_by_r_y_inv(scs_float *vec, ScsLinSysWork *p) { scs_int i; for (i = 0; i < p->m; ++i) { - vec[i] /= p->rho_y_vec[i]; + vec[i] /= p->diag_r[p->n + i]; } } +/* y += R_x * x */ +static void accum_by_r_x(scs_float *y, const scs_float *x, ScsLinSysWork *p) { + scs_int i; + for (i = 0; i < p->n; ++i) { + y[i] += p->diag_r[i] * x[i]; + } +} + /* we use a different accum_by_a here for speed */ static void accum_by_a(ScsLinSysWork *p, const scs_float *x, scs_float *y) { SCS(accum_by_atrans)(p->At, x, y); } -/* y = (rho_x * I + P + A' R_y^{-1} A) x */ +/* y = (R_x + P + A' R_y^{-1} A) x */ static void mat_vec(const ScsMatrix *A, const ScsMatrix *P, ScsLinSysWork *p, const scs_float *x, scs_float *y) { scs_float *z = p->tmp; memset(z, 0, A->m * sizeof(scs_float)); /* z = 0 */ memset(y, 0, A->n * sizeof(scs_float)); /* y = 0 */ if (P) { SCS(accum_by_p)(P, x, y); /* y = Px */ } accum_by_a(p, x, z); /* z = Ax */ - scale_by_diag_r(z, p); /* z = R_y^{-1} A x */ + scale_by_r_y_inv(z, p); /* z = R_y^{-1} A x */ SCS(accum_by_atrans)(A, z, y); /* y += A'z, y = Px + A' R_y^{-1} Ax */ - /* y = rho_x * x + Px + A' R_y^{-1} A x */ - SCS(add_scaled_array)(y, x, A->n, p->rho_x); + /* y = R_x * x + Px + A' R_y^{-1} A * x */ + accum_by_r_x(y, x, p); } static void apply_pre_conditioner(scs_float *z, scs_float *r, scs_int n, ScsLinSysWork *pr) { scs_int i; @@ -147,40 +160,39 @@ z[i] = r[i] * M[i]; } } /* no need to update anything in this case */ -void SCS(update_lin_sys_rho_y_vec)(ScsLinSysWork *p, scs_float *rho_y_vec) { - p->rho_y_vec = rho_y_vec; /* this isn't needed but do it to be safe */ +void SCS(update_lin_sys_diag_r)(ScsLinSysWork *p, const scs_float *diag_r) { + p->diag_r = diag_r; /* this isn't needed but do it to be safe */ set_preconditioner(p); } ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P, - scs_float *rho_y_vec, scs_float rho_x) { + const scs_float *diag_r) { ScsLinSysWork *p = (ScsLinSysWork *)scs_calloc(1, sizeof(ScsLinSysWork)); p->A = A; p->P = P; p->m = A->m; p->n = A->n; - p->rho_x = rho_x; - p->p = (scs_float *)scs_malloc((A->n) * sizeof(scs_float)); - p->r = (scs_float *)scs_malloc((A->n) * sizeof(scs_float)); - p->Gp = (scs_float *)scs_malloc((A->n) * sizeof(scs_float)); - p->tmp = (scs_float *)scs_malloc((A->m) * sizeof(scs_float)); + p->p = (scs_float *)scs_calloc((A->n), sizeof(scs_float)); + p->r = (scs_float *)scs_calloc((A->n), sizeof(scs_float)); + p->Gp = (scs_float *)scs_calloc((A->n), sizeof(scs_float)); + p->tmp = (scs_float *)scs_calloc((A->m), sizeof(scs_float)); /* memory for A transpose */ - p->At = (ScsMatrix *)scs_malloc(sizeof(ScsMatrix)); + p->At = (ScsMatrix *)scs_calloc(1, sizeof(ScsMatrix)); p->At->m = A->n; p->At->n = A->m; - p->At->i = (scs_int *)scs_malloc((A->p[A->n]) * sizeof(scs_int)); - p->At->p = (scs_int *)scs_malloc((A->m + 1) * sizeof(scs_int)); - p->At->x = (scs_float *)scs_malloc((A->p[A->n]) * sizeof(scs_float)); + p->At->i = (scs_int *)scs_calloc((A->p[A->n]), sizeof(scs_int)); + p->At->p = (scs_int *)scs_calloc((A->m + 1), sizeof(scs_int)); + p->At->x = (scs_float *)scs_calloc((A->p[A->n]), sizeof(scs_float)); transpose(A, p); /* preconditioner memory */ - p->rho_y_vec = rho_y_vec; + p->diag_r = diag_r; p->z = (scs_float *)scs_calloc(A->n, sizeof(scs_float)); p->M = (scs_float *)scs_calloc(A->n, sizeof(scs_float)); set_preconditioner(p); p->tot_cg_its = 0; @@ -190,12 +202,11 @@ return SCS_NULL; } return p; } -/* solves (rho_x * I + P + A' R_y^{-1} A)x = b, s warm start, solution stored in - * b */ +/* solves (R_x * I + P + A' R_y^{-1} A)x = b, s warm start, solution in b */ static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *b, scs_int max_its, scs_float tol) { scs_int i, n = pr->n; scs_float ztr, ztr_prev, alpha; scs_float *p = pr->p; /* cg direction */ @@ -266,18 +277,16 @@ } /* solves Mx = b, for x but stores result in b */ /* s contains warm-start (if available) */ /* - * [x] = [rho_x I + P A' ]^{-1} [rx] - * [y] [ A -R_y ] [ry] + * [x] = [R_x + P A' ]^{-1} [rx] + * [y] [ A -R_y ] [ry] * - * R_y = diag(rho_y_vec) - * * becomes: * - * x = (rho_x I + P + A' R_y^{-1} A)^{-1} (rx + A' R_y^{-1} ry) + * x = (R_x + P + A' R_y^{-1} A)^{-1} (rx + A' R_y^{-1} ry) * y = R_y^{-1} (Ax - ry) * */ scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s, scs_float tol) { @@ -297,24 +306,24 @@ /* use p->tmp here, and in mat_vec, can do both since they don't overlap */ /* b = [rx; ry] */ /* tmp = ry */ memcpy(p->tmp, &(b[p->n]), p->m * sizeof(scs_float)); /* tmp = R_y^{-1} * ry */ - scale_by_diag_r(p->tmp, p); + scale_by_r_y_inv(p->tmp, p); /* b[:n] = rx + A' R_y^{-1} ry */ SCS(accum_by_atrans)(p->A, p->tmp, b); /* set max_iters to 10 * n (though in theory n is enough for any tol) */ max_iters = 10 * p->n; - /* solves (rho_x I + P + A' R_y^{-1} A)x = b, s warm start, solution stored in + /* solves (R_x + P + A' R_y^{-1} A)x = b, s warm start, solution stored in * b */ cg_its = pcg(p, s, b, max_iters, tol); /* b[:n] = x */ /* b[n:] = -ry */ SCS(scale_array)(&(b[p->n]), -1., p->m); /* b[n:] = Ax - ry */ accum_by_a(p, b, &(b[p->n])); /* b[n:] = R_y^{-1} (Ax - ry) = y */ - scale_by_diag_r(&(b[p->n]), p); + scale_by_r_y_inv(&(b[p->n]), p); p->tot_cg_its += cg_its; #if VERBOSITY > 1 scs_printf("tol %.3e\n", tol); scs_printf("cg_its %i\n", (int)cg_its); #endif