vendor/scs/linsys/cpu/indirect/private.c in scs-0.2.3 vs vendor/scs/linsys/cpu/indirect/private.c in scs-0.3.0

- old
+ new

@@ -1,45 +1,54 @@ #include "private.h" +#include "linsys.h" +#include "util.h" +#include <limits.h> -#define CG_BEST_TOL 1e-9 -#define CG_MIN_TOL 1e-1 - -char *SCS(get_lin_sys_method)(const ScsMatrix *A, const ScsSettings *stgs) { - char *str = (char *)scs_malloc(sizeof(char) * 128); - sprintf(str, "sparse-indirect, nnz in A = %li, CG tol ~ 1/iter^(%2.2f)", - (long)A->p[A->n], stgs->cg_rate); - return str; +const char *SCS(get_lin_sys_method)() { + return "sparse-indirect"; } +/* char *SCS(get_lin_sys_summary)(ScsLinSysWork *p, const ScsInfo *info) { char *str = (char *)scs_malloc(sizeof(char) * 128); - sprintf(str, - "\tLin-sys: avg # CG iterations: %2.2f, avg solve time: %1.2es\n", - (scs_float)p->tot_cg_its / (info->iter + 1), - p->total_solve_time / (info->iter + 1) / 1e3); + sprintf(str, "lin-sys: avg cg its: %2.2f\n", + (scs_float)p->tot_cg_its / (info->iter + 1)); p->tot_cg_its = 0; - p->total_solve_time = 0; return str; } +*/ -/* M = inv ( diag ( RHO_X * I + A'A ) ) */ -static void get_preconditioner(const ScsMatrix *A, const ScsSettings *stgs, - ScsLinSysWork *p) { - scs_int i; +/* set M = inv ( diag ( rho_x * I + P + A' R_y^{-1} A ) ) */ +static void set_preconditioner(ScsLinSysWork *p) { + scs_int i, k; scs_float *M = p->M; + const ScsMatrix *A = p->A; + const ScsMatrix *P = p->P; -#if EXTRA_VERBOSE > 0 +#if VERBOSITY > 0 scs_printf("getting pre-conditioner\n"); #endif - for (i = 0; i < A->n; ++i) { - M[i] = 1 / (stgs->rho_x + - SCS(norm_sq)(&(A->x[A->p[i]]), A->p[i + 1] - A->p[i])); - /* M[i] = 1; */ + for (i = 0; i < A->n; ++i) { /* cols */ + M[i] = p->rho_x; + /* diag(A' R_y^{-1} A) */ + for (k = A->p[i]; k < A->p[i + 1]; ++k) { + /* A->i[k] is row of entry k with value A->x[k] */ + M[i] += A->x[k] * A->x[k] / p->rho_y_vec[A->i[k]]; + } + if (P) { + for (k = P->p[i]; k < P->p[i + 1]; k++) { + /* diagonal element only */ + if (P->i[k] == i) { /* row == col */ + M[i] += P->x[k]; + break; + } + } + } + M[i] = 1. / M[i]; } - -#if EXTRA_VERBOSE > 0 +#if VERBOSITY > 0 scs_printf("finished getting pre-conditioner\n"); #endif } static void transpose(const ScsMatrix *A, ScsLinSysWork *p) { @@ -52,19 +61,20 @@ scs_int *Ap = A->p; scs_int *Ai = A->i; scs_float *Ax = A->x; scs_int i, j, q, *z, c1, c2; -#if EXTRA_VERBOSE > 0 +#if VERBOSITY > 0 SCS(timer) transpose_timer; scs_printf("transposing A\n"); SCS(tic)(&transpose_timer); #endif z = (scs_int *)scs_calloc(m, sizeof(scs_int)); - for (i = 0; i < Ap[n]; i++) z[Ai[i]]++; /* row counts */ - SCS(cumsum)(Cp, z, m); /* row pointers */ + for (i = 0; i < Ap[n]; i++) + z[Ai[i]]++; /* row counts */ + SCS(cumsum)(Cp, z, m); /* row pointers */ for (j = 0; j < n; j++) { c1 = Ap[j]; c2 = Ap[j + 1]; for (i = c1; i < c2; i++) { @@ -74,11 +84,11 @@ z[Ai[i]]++; } } scs_free(z); -#if EXTRA_VERBOSE > 0 +#if VERBOSITY > 0 scs_printf("finished transposing A, time: %1.2es\n", SCS(tocq)(&transpose_timer) / 1e3); #endif } @@ -98,44 +108,63 @@ scs_free(p->M); scs_free(p); } } -/*y = (RHO_X * I + A'A)x */ -static void mat_vec(const ScsMatrix *A, const ScsSettings *s, ScsLinSysWork *p, - const scs_float *x, scs_float *y) { - scs_float *tmp = p->tmp; - memset(tmp, 0, A->m * sizeof(scs_float)); - SCS(accum_by_a)(A, p, x, tmp); - memset(y, 0, A->n * sizeof(scs_float)); - SCS(accum_by_atrans)(A, p, tmp, y); - SCS(add_scaled_array)(y, x, A->n, s->rho_x); +/* vec -> R_y^{-1} vec */ +static void scale_by_diag_r(scs_float *vec, ScsLinSysWork *p) { + scs_int i; + for (i = 0; i < p->m; ++i) { + vec[i] /= p->rho_y_vec[i]; + } } -void SCS(accum_by_atrans)(const ScsMatrix *A, ScsLinSysWork *p, - const scs_float *x, scs_float *y) { - SCS(_accum_by_atrans)(A->n, A->x, A->i, A->p, x, y); +/* we use a different accum_by_a here for speed */ +static void accum_by_a(ScsLinSysWork *p, const scs_float *x, scs_float *y) { + SCS(accum_by_atrans)(p->At, x, y); } -void SCS(accum_by_a)(const ScsMatrix *A, ScsLinSysWork *p, const scs_float *x, - scs_float *y) { - SCS(_accum_by_atrans)(p->At->n, p->At->x, p->At->i, p->At->p, x, y); +/* y = (rho_x * I + P + A' R_y^{-1} A) x */ +static void mat_vec(const ScsMatrix *A, const ScsMatrix *P, ScsLinSysWork *p, + const scs_float *x, scs_float *y) { + scs_float *z = p->tmp; + memset(z, 0, A->m * sizeof(scs_float)); /* z = 0 */ + memset(y, 0, A->n * sizeof(scs_float)); /* y = 0 */ + if (P) { + SCS(accum_by_p)(P, x, y); /* y = Px */ + } + accum_by_a(p, x, z); /* z = Ax */ + scale_by_diag_r(z, p); /* z = R_y^{-1} A x */ + SCS(accum_by_atrans)(A, z, y); /* y += A'z, y = Px + A' R_y^{-1} Ax */ + /* y = rho_x * x + Px + A' R_y^{-1} A x */ + SCS(add_scaled_array)(y, x, A->n, p->rho_x); } -static void apply_pre_conditioner(scs_float *M, scs_float *z, scs_float *r, - scs_int n, scs_float *ipzr) { +static void apply_pre_conditioner(scs_float *z, scs_float *r, scs_int n, + ScsLinSysWork *pr) { scs_int i; - *ipzr = 0; + scs_float *M = pr->M; for (i = 0; i < n; ++i) { z[i] = r[i] * M[i]; - *ipzr += z[i] * r[i]; } } -ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, - const ScsSettings *stgs) { +/* no need to update anything in this case */ +void SCS(update_lin_sys_rho_y_vec)(ScsLinSysWork *p, scs_float *rho_y_vec) { + p->rho_y_vec = rho_y_vec; /* this isn't needed but do it to be safe */ + set_preconditioner(p); +} + +ScsLinSysWork *SCS(init_lin_sys_work)(const ScsMatrix *A, const ScsMatrix *P, + scs_float *rho_y_vec, scs_float rho_x) { ScsLinSysWork *p = (ScsLinSysWork *)scs_calloc(1, sizeof(ScsLinSysWork)); + p->A = A; + p->P = P; + p->m = A->m; + p->n = A->n; + p->rho_x = rho_x; + p->p = (scs_float *)scs_malloc((A->n) * sizeof(scs_float)); p->r = (scs_float *)scs_malloc((A->n) * sizeof(scs_float)); p->Gp = (scs_float *)scs_malloc((A->n) * sizeof(scs_float)); p->tmp = (scs_float *)scs_malloc((A->m) * sizeof(scs_float)); @@ -147,110 +176,148 @@ p->At->p = (scs_int *)scs_malloc((A->m + 1) * sizeof(scs_int)); p->At->x = (scs_float *)scs_malloc((A->p[A->n]) * sizeof(scs_float)); transpose(A, p); /* preconditioner memory */ - p->z = (scs_float *)scs_malloc((A->n) * sizeof(scs_float)); - p->M = (scs_float *)scs_malloc((A->n) * sizeof(scs_float)); - get_preconditioner(A, stgs, p); + p->rho_y_vec = rho_y_vec; + p->z = (scs_float *)scs_calloc(A->n, sizeof(scs_float)); + p->M = (scs_float *)scs_calloc(A->n, sizeof(scs_float)); + set_preconditioner(p); - p->total_solve_time = 0; p->tot_cg_its = 0; if (!p->p || !p->r || !p->Gp || !p->tmp || !p->At || !p->At->i || !p->At->p || !p->At->x) { SCS(free_lin_sys_work)(p); return SCS_NULL; } return p; } -/* solves (I+A'A)x = b, s warm start, solution stored in b */ -static scs_int pcg(const ScsMatrix *A, const ScsSettings *stgs, - ScsLinSysWork *pr, const scs_float *s, scs_float *b, +/* solves (rho_x * I + P + A' R_y^{-1} A)x = b, s warm start, solution stored in + * b */ +static scs_int pcg(ScsLinSysWork *pr, const scs_float *s, scs_float *b, scs_int max_its, scs_float tol) { - scs_int i, n = A->n; - scs_float ipzr, ipzr_old, alpha; + scs_int i, n = pr->n; + scs_float ztr, ztr_prev, alpha; scs_float *p = pr->p; /* cg direction */ scs_float *Gp = pr->Gp; /* updated CG direction */ scs_float *r = pr->r; /* cg residual */ scs_float *z = pr->z; /* for preconditioning */ - scs_float *M = pr->M; /* inverse diagonal preconditioner */ - if (s == SCS_NULL) { + const ScsMatrix *A = pr->A; + const ScsMatrix *P = pr->P; + + if (!s) { + /* take s = 0 */ + /* r = b */ memcpy(r, b, n * sizeof(scs_float)); + /* b = 0 */ memset(b, 0, n * sizeof(scs_float)); } else { - mat_vec(A, stgs, pr, s, r); - SCS(add_scaled_array)(r, b, n, -1); - SCS(scale_array)(r, -1, n); + /* r = Mat * s */ + mat_vec(A, P, pr, s, r); + /* r = Mat * s - b */ + SCS(add_scaled_array)(r, b, n, -1.); + /* r = b - Mat * s */ + SCS(scale_array)(r, -1., n); + /* b = s */ memcpy(b, s, n * sizeof(scs_float)); } /* check to see if we need to run CG at all */ - if (SCS(norm)(r, n) < MIN(tol, 1e-18)) { + if (CG_NORM(r, n) < MAX(tol, 1e-12)) { return 0; } - apply_pre_conditioner(M, z, r, n, &ipzr); + /* z = M r (M is inverse preconditioner) */ + apply_pre_conditioner(z, r, n, pr); + /* ztr = z'r */ + ztr = SCS(dot)(z, r, n); + /* p = z */ memcpy(p, z, n * sizeof(scs_float)); for (i = 0; i < max_its; ++i) { - mat_vec(A, stgs, pr, p, Gp); - alpha = ipzr / SCS(dot)(p, Gp, n); + /* Gp = Mat * p */ + mat_vec(A, P, pr, p, Gp); + /* alpha = z'r / p'G p */ + alpha = ztr / SCS(dot)(p, Gp, n); + /* b += alpha * p */ SCS(add_scaled_array)(b, p, n, alpha); + /* r -= alpha * G p */ SCS(add_scaled_array)(r, Gp, n, -alpha); - if (SCS(norm)(r, n) < tol) { -#if EXTRA_VERBOSE > 0 - scs_printf("tol: %.4e, resid: %.4e, iters: %li\n", tol, SCS(norm)(r, n), - (long)i + 1); +#if VERBOSITY > 3 + scs_printf("tol: %.4e, resid: %.4e, iters: %li\n", tol, CG_NORM(r, n), + (long)i + 1); #endif + if (CG_NORM(r, n) < tol) { return i + 1; } - ipzr_old = ipzr; - apply_pre_conditioner(M, z, r, n, &ipzr); - - SCS(scale_array)(p, ipzr / ipzr_old, n); - SCS(add_scaled_array)(p, z, n, 1); + /* z = M r (M is inverse preconditioner) */ + apply_pre_conditioner(z, r, n, pr); + ztr_prev = ztr; + /* ztr = z'r */ + ztr = SCS(dot)(z, r, n); + /* p = beta * p, where beta = ztr / ztr_prev */ + SCS(scale_array)(p, ztr / ztr_prev, n); + /* p = z + beta * p */ + SCS(add_scaled_array)(p, z, n, 1.); } return i; } -scs_int SCS(solve_lin_sys)(const ScsMatrix *A, const ScsSettings *stgs, - ScsLinSysWork *p, scs_float *b, const scs_float *s, - scs_int iter) { - scs_int cg_its; - SCS(timer) linsys_timer; - scs_float cg_tol = - SCS(norm)(b, A->n) * - (iter < 0 ? CG_BEST_TOL - : CG_MIN_TOL / POWF((scs_float)iter + 1, stgs->cg_rate)); +/* solves Mx = b, for x but stores result in b */ +/* s contains warm-start (if available) */ +/* + * [x] = [rho_x I + P A' ]^{-1} [rx] + * [y] [ A -R_y ] [ry] + * + * R_y = diag(rho_y_vec) + * + * becomes: + * + * x = (rho_x I + P + A' R_y^{-1} A)^{-1} (rx + A' R_y^{-1} ry) + * y = R_y^{-1} (Ax - ry) + * + */ +scs_int SCS(solve_lin_sys)(ScsLinSysWork *p, scs_float *b, const scs_float *s, + scs_float tol) { + scs_int cg_its, max_iters; - SCS(tic)(&linsys_timer); - /* solves Mx = b, for x but stores result in b */ - /* s contains warm-start (if available) */ - SCS(accum_by_atrans)(A, p, &(b[A->n]), b); - /* solves (I+A'A)x = b, s warm start, solution stored in b */ - cg_its = pcg(A, stgs, p, s, b, A->n, MAX(cg_tol, CG_BEST_TOL)); - SCS(scale_array)(&(b[A->n]), -1, A->m); - SCS(accum_by_a)(A, p, b, &(b[A->n])); + if (tol <= 0.) { + scs_printf("Warning: tol = %4f <= 0, likely compiled without setting " + "INDIRECT flag.\n", + tol); + } - if (iter >= 0) { - p->tot_cg_its += cg_its; + if (CG_NORM(b, p->n + p->m) <= 1e-12) { + memset(b, 0, (p->n + p->m) * sizeof(scs_float)); + return 0; } - p->total_solve_time += SCS(tocq)(&linsys_timer); -#if EXTRA_VERBOSE > 0 - scs_printf("linsys solve time: %1.2es\n", SCS(tocq)(&linsys_timer) / 1e3); + /* use p->tmp here, and in mat_vec, can do both since they don't overlap */ + /* b = [rx; ry] */ + /* tmp = ry */ + memcpy(p->tmp, &(b[p->n]), p->m * sizeof(scs_float)); + /* tmp = R_y^{-1} * ry */ + scale_by_diag_r(p->tmp, p); + /* b[:n] = rx + A' R_y^{-1} ry */ + SCS(accum_by_atrans)(p->A, p->tmp, b); + /* set max_iters to 10 * n (though in theory n is enough for any tol) */ + max_iters = 10 * p->n; + /* solves (rho_x I + P + A' R_y^{-1} A)x = b, s warm start, solution stored in + * b */ + cg_its = pcg(p, s, b, max_iters, tol); /* b[:n] = x */ + + /* b[n:] = -ry */ + SCS(scale_array)(&(b[p->n]), -1., p->m); + /* b[n:] = Ax - ry */ + accum_by_a(p, b, &(b[p->n])); + /* b[n:] = R_y^{-1} (Ax - ry) = y */ + scale_by_diag_r(&(b[p->n]), p); + p->tot_cg_its += cg_its; +#if VERBOSITY > 1 + scs_printf("tol %.3e\n", tol); + scs_printf("cg_its %i\n", (int)cg_its); #endif return 0; -} - -void SCS(normalize_a)(ScsMatrix *A, const ScsSettings *stgs, const ScsCone *k, - ScsScaling *scal) { - SCS(_normalize_a)(A, stgs, k, scal); -} - -void SCS(un_normalize_a)(ScsMatrix *A, const ScsSettings *stgs, - const ScsScaling *scal) { - SCS(_un_normalize_a)(A, stgs, scal); }