vendor/scs/src/aa.c in scs-0.2.3 vs vendor/scs/src/aa.c in scs-0.3.0

- old
+ new

@@ -1,52 +1,103 @@ +/* + * Anderson acceleration. + * + * x: input iterate + * x_prev: previous input iterate + * f: f(x) output of map f applied to x + * g: x - f (error) + * g_prev: previous error + * s: x - x_prev + * y: g - g_prev + * d: s - y = f - f_prev + * + * capital letters are the variables stacked columnwise + * idx tracks current index where latest quantities written + * idx cycles from left to right columns in matrix + * + * Type-I: + * return f = f - (S - Y) * ( S'Y + r I)^{-1} ( S'g ) + * + * Type-II: + * return f = f - (S - Y) * ( Y'Y + r I)^{-1} ( Y'g ) + * + */ + #include "aa.h" #include "scs_blas.h" -/* This file uses Anderson acceleration to improve the convergence of - * a fixed point mapping. - * At each iteration we need to solve a (small) linear system, we - * do this using LAPACK ?gesv. - */ +#define MAX(a, b) (((a) > (b)) ? (a) : (b)) +#define MIN(a, b) (((a) < (b)) ? (a) : (b)) +#define FILL_MEMORY_BEFORE_SOLVE (1) #ifndef USE_LAPACK -typedef void * ACCEL_WORK; +typedef void *ACCEL_WORK; -AaWork *aa_init(aa_int dim, aa_int aa_mem, aa_int type1) { return SCS_NULL; } -aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) { return 0; } -void aa_finish(AaWork *a) {} +AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization, + aa_float relaxation, aa_float safeguard_factor, + aa_float max_weight_norm, aa_int verbosity) { + return SCS_NULL; +} +aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) { + return 0; +} +aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) { + return 0; +} +void aa_finish(AaWork *a) { +} +void aa_reset(AaWork *a) { +} #else -/* contains the necessary parameters to perform aa at each step */ -struct ACCEL_WORK { - aa_int type1; /* bool, if true type 1 aa otherwise type 2 */ - aa_int k; /* aa memory */ - aa_int l; /* variable dimension */ - aa_int iter; /* current iteration */ +#if PROFILING > 0 - aa_float *x; /* x input to map*/ - aa_float *f; /* f(x) output of map */ - aa_float *g; /* x - f(x) */ +#define TIME_TIC \ + timer __t; \ + tic(&__t); +#define TIME_TOC toc(__func__, &__t); - /* from previous iteration */ - aa_float *g_prev; /* x - f(x) */ +#include <time.h> +typedef struct timer { + struct timespec tic; + struct timespec toc; +} timer; - aa_float *y; /* g - g_prev */ - aa_float *s; /* x - x_prev */ - aa_float *d; /* f - f_prev */ +void tic(timer *t) { + clock_gettime(CLOCK_MONOTONIC, &t->tic); +} - aa_float *Y; /* matrix of stacked y values */ - aa_float *S; /* matrix of stacked s values */ - aa_float *D; /* matrix of stacked d values = (S-Y) */ - aa_float *M; /* S'Y or Y'Y depending on type of aa */ +aa_float tocq(timer *t) { + struct timespec temp; - /* workspace variables */ - aa_float *work; - blas_int *ipiv; -}; + clock_gettime(CLOCK_MONOTONIC, &t->toc); + if ((t->toc.tv_nsec - t->tic.tv_nsec) < 0) { + temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec - 1; + temp.tv_nsec = 1e9 + t->toc.tv_nsec - t->tic.tv_nsec; + } else { + temp.tv_sec = t->toc.tv_sec - t->tic.tv_sec; + temp.tv_nsec = t->toc.tv_nsec - t->tic.tv_nsec; + } + return (aa_float)temp.tv_sec * 1e3 + (aa_float)temp.tv_nsec / 1e6; +} + +aa_float toc(const char *str, timer *t) { + aa_float time = tocq(t); + printf("%s - time: %8.4f milli-seconds.\n", str, time); + return time; +} + +#else + +#define TIME_TIC +#define TIME_TOC + +#endif + /* BLAS functions used */ aa_float BLAS(nrm2)(blas_int *n, aa_float *x, blas_int *incx); void BLAS(axpy)(blas_int *n, aa_float *a, const aa_float *x, blas_int *incx, aa_float *y, blas_int *incy); void BLAS(gemv)(const char *trans, const blas_int *m, const blas_int *n, @@ -57,151 +108,362 @@ blas_int *ipiv, aa_float *b, blas_int *ldb, blas_int *info); void BLAS(gemm)(const char *transa, const char *transb, blas_int *m, blas_int *n, blas_int *k, aa_float *alpha, aa_float *a, blas_int *lda, aa_float *b, blas_int *ldb, aa_float *beta, aa_float *c, blas_int *ldc); +void BLAS(scal)(const blas_int *n, const aa_float *a, aa_float *x, + const blas_int *incx); +/* This file uses Anderson acceleration to improve the convergence of + * a fixed point mapping. + * At each iteration we need to solve a (small) linear system, we + * do this using LAPACK ?gesv. + */ + +/* contains the necessary parameters to perform aa at each step */ +struct ACCEL_WORK { + aa_int type1; /* bool, if true type 1 aa otherwise type 2 */ + aa_int mem; /* aa memory */ + aa_int dim; /* variable dimension */ + aa_int iter; /* current iteration */ + aa_int verbosity; /* verbosity level, 0 is no printing */ + aa_int success; /* was the last AA step successful or not */ + + aa_float relaxation; /* relaxation x and f, beta in some papers */ + aa_float regularization; /* regularization */ + aa_float safeguard_factor; /* safeguard tolerance factor */ + aa_float max_weight_norm; /* maximum norm of AA weights */ + + aa_float *x; /* x input to map*/ + aa_float *f; /* f(x) output of map */ + aa_float *g; /* x - f(x) */ + aa_float norm_g; /* ||x - f(x)|| */ + + /* from previous iteration */ + aa_float *g_prev; /* x_prev - f(x_prev) */ + + aa_float *y; /* g - g_prev */ + aa_float *s; /* x - x_prev */ + aa_float *d; /* f - f_prev */ + + aa_float *Y; /* matrix of stacked y values */ + aa_float *S; /* matrix of stacked s values */ + aa_float *D; /* matrix of stacked d values = (S-Y) */ + aa_float *M; /* S'Y or Y'Y depending on type of aa */ + + /* workspace variables */ + aa_float *work; /* scratch space */ + blas_int *ipiv; /* permutation variable, not used after solve */ + + aa_float *x_work; /* workspace (= x) for when relaxation != 1.0 */ +}; + +/* add regularization dependent on Y and S matrices */ +static aa_float compute_regularization(AaWork *a, aa_int len) { + /* typically type-I does better with higher regularization than type-II */ + TIME_TIC + aa_float r, nrm_m; + blas_int btotal = (blas_int)(len * len), one = 1; + nrm_m = BLAS(nrm2)(&btotal, a->M, &one); + r = a->regularization * nrm_m; + if (a->verbosity > 2) { + printf("iter: %i, norm: M %.2e, r: %.2e\n", (int)a->iter, nrm_m, r); + } + TIME_TOC + return r; +} + /* sets a->M to S'Y or Y'Y depending on type of aa used */ -static void set_m(AaWork *a) { - blas_int bl = (blas_int)(a->l), bk = (blas_int)a->k; - aa_float onef = 1.0, zerof = 0.0; +/* M is len x len after this */ +static void set_m(AaWork *a, aa_int len) { + TIME_TIC + aa_int i; + blas_int bdim = (blas_int)(a->dim); + blas_int blen = (blas_int)len; + aa_float onef = 1.0, zerof = 0.0, r; + /* if len < mem this only uses len cols */ BLAS(gemm) - ("Trans", "No", &bk, &bk, &bl, &onef, a->type1 ? a->S : a->Y, &bl, a->Y, &bl, - &zerof, a->M, &bk); + ("Trans", "No", &blen, &blen, &bdim, &onef, a->type1 ? a->S : a->Y, &bdim, + a->Y, &bdim, &zerof, a->M, &blen); + if (a->regularization > 0) { + r = compute_regularization(a, len); + for (i = 0; i < len; ++i) { + a->M[i + len * i] += r; + } + } + TIME_TOC + return; } +/* initialize accel params, in particular x_prev, f_prev, g_prev */ +static void init_accel_params(const aa_float *x, const aa_float *f, AaWork *a) { + TIME_TIC + blas_int bdim = (blas_int)a->dim; + aa_float neg_onef = -1.0; + blas_int one = 1; + /* x_prev = x */ + memcpy(a->x, x, sizeof(aa_float) * a->dim); + /* f_prev = f */ + memcpy(a->f, f, sizeof(aa_float) * a->dim); + /* g_prev = x */ + memcpy(a->g_prev, x, sizeof(aa_float) * a->dim); + /* g_prev = x_prev - f_prev */ + BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g_prev, &one); + TIME_TOC +} + /* updates the workspace parameters for aa for this iteration */ -static void update_accel_params(const aa_float *x, const aa_float *f, - AaWork *a) { +static void update_accel_params(const aa_float *x, const aa_float *f, AaWork *a, + aa_int len) { /* at the start a->x = x_prev and a->f = f_prev */ - aa_int idx = a->iter % a->k; - aa_int l = a->l; - + TIME_TIC + aa_int idx = (a->iter - 1) % a->mem; blas_int one = 1; - blas_int bl = (blas_int)l; + blas_int bdim = (blas_int)a->dim; aa_float neg_onef = -1.0; /* g = x */ - memcpy(a->g, x, sizeof(aa_float) * l); + memcpy(a->g, x, sizeof(aa_float) * a->dim); /* s = x */ - memcpy(a->s, x, sizeof(aa_float) * l); + memcpy(a->s, x, sizeof(aa_float) * a->dim); /* d = f */ - memcpy(a->d, f, sizeof(aa_float) * l); - /* g -= f */ - BLAS(axpy)(&bl, &neg_onef, f, &one, a->g, &one); - /* s -= x_prev */ - BLAS(axpy)(&bl, &neg_onef, a->x, &one, a->s, &one); - /* d -= f_prev */ - BLAS(axpy)(&bl, &neg_onef, a->f, &one, a->d, &one); + memcpy(a->d, f, sizeof(aa_float) * a->dim); + /* g = x - f */ + BLAS(axpy)(&bdim, &neg_onef, f, &one, a->g, &one); + /* s = x - x_prev */ + BLAS(axpy)(&bdim, &neg_onef, a->x, &one, a->s, &one); + /* d = f - f_prev */ + BLAS(axpy)(&bdim, &neg_onef, a->f, &one, a->d, &one); /* g, s, d correct here */ /* y = g */ - memcpy(a->y, a->g, sizeof(aa_float) * l); - /* y -= g_prev */ - BLAS(axpy)(&bl, &neg_onef, a->g_prev, &one, a->y, &one); + memcpy(a->y, a->g, sizeof(aa_float) * a->dim); + /* y = g - g_prev */ + BLAS(axpy)(&bdim, &neg_onef, a->g_prev, &one, a->y, &one); /* y correct here */ /* copy y into idx col of Y */ - memcpy(&(a->Y[idx * l]), a->y, sizeof(aa_float) * l); + memcpy(&(a->Y[idx * a->dim]), a->y, sizeof(aa_float) * a->dim); /* copy s into idx col of S */ - memcpy(&(a->S[idx * l]), a->s, sizeof(aa_float) * l); + memcpy(&(a->S[idx * a->dim]), a->s, sizeof(aa_float) * a->dim); /* copy d into idx col of D */ - memcpy(&(a->D[idx * l]), a->d, sizeof(aa_float) * l); + memcpy(&(a->D[idx * a->dim]), a->d, sizeof(aa_float) * a->dim); - /* Y, S,D correct here */ + /* Y, S, D correct here */ - memcpy(a->f, f, sizeof(aa_float) * l); - memcpy(a->x, x, sizeof(aa_float) * l); + /* set a->f and a->x for next iter (x_prev and f_prev) */ + memcpy(a->f, f, sizeof(aa_float) * a->dim); + memcpy(a->x, x, sizeof(aa_float) * a->dim); + /* workspace for when relaxation != 1.0 */ + if (a->x_work) { + memcpy(a->x_work, x, sizeof(aa_float) * a->dim); + } + /* x, f correct here */ - /* set M = S'*Y */ - set_m(a); + memcpy(a->g_prev, a->g, sizeof(aa_float) * a->dim); + /* g_prev set for next iter here */ - /* M correct here */ + /* compute ||g|| = ||f - x|| */ + a->norm_g = BLAS(nrm2)(&bdim, a->g, &one); - memcpy(a->g_prev, a->g, sizeof(aa_float) * l); + TIME_TOC + return; +} - /* g_prev set for next iter here */ +/* f = (1-relaxation) * \sum_i a_i x_i + relaxation * \sum_i a_i f_i */ +static void relax(aa_float *f, AaWork *a, aa_int len) { + TIME_TIC + /* x_work = x - S * work */ + blas_int bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len; + aa_float onef = 1.0, neg_onef = -1.0; + aa_float one_m_relaxation = 1. - a->relaxation; + BLAS(gemv) + ("NoTrans", &bdim, &blen, &neg_onef, a->S, &bdim, a->work, &one, &onef, + a->x_work, &one); + /* f = relaxation * f */ + BLAS(scal)(&blen, &a->relaxation, f, &one); + /* f += (1 - relaxation) * x_work */ + BLAS(axpy)(&blen, &one_m_relaxation, a->x_work, &one, f, &one); + TIME_TOC } -/* solves the system of equations to perform the aa update +/* solves the system of equations to perform the AA update * at the end f contains the next iterate to be returned */ -static aa_int solve(aa_float *f, AaWork *a, aa_int len) { - blas_int info = -1, bl = (blas_int)(a->l), one = 1, blen = (blas_int)len, - bk = (blas_int)a->k; - aa_float neg_onef = -1.0, onef = 1.0, zerof = 0.0, nrm; +static aa_float solve(aa_float *f, AaWork *a, aa_int len) { + TIME_TIC + blas_int info = -1, bdim = (blas_int)(a->dim), one = 1, blen = (blas_int)len; + aa_float onef = 1.0, zerof = 0.0, neg_onef = -1.0, aa_norm; + /* work = S'g or Y'g */ BLAS(gemv) - ("Trans", &bl, &blen, &onef, a->type1 ? a->S : a->Y, &bl, a->g, &one, &zerof, - a->work, &one); - /* work = M \ work, where M = S'Y or M = Y'Y */ - BLAS(gesv)(&blen, &one, a->M, &bk, a->ipiv, a->work, &blen, &info); - nrm = BLAS(nrm2)(&bk, a->work, &one); - if (info < 0 || nrm >= MAX_AA_NRM) { - #if EXTRA_VERBOSE > 0 - scs_printf("Error in AA type %i, iter: %i, info: %i, norm %1.2e\n", - a->type1 ? 1 : 2, (int)a->iter, (int)info, nrm); - #endif - return -1; + ("Trans", &bdim, &blen, &onef, a->type1 ? a->S : a->Y, &bdim, a->g, &one, + &zerof, a->work, &one); + + /* work = M \ work, where update_accel_params has set M = S'Y or M = Y'Y */ + BLAS(gesv)(&blen, &one, a->M, &blen, a->ipiv, a->work, &blen, &info); + aa_norm = BLAS(nrm2)(&blen, a->work, &one); + if (a->verbosity > 1) { + printf("AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n", + a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm); } - /* if solve was successful then set f -= D * work */ + + /* info < 0 input error, input > 0 matrix is singular */ + if (info != 0 || aa_norm >= a->max_weight_norm) { + if (a->verbosity > 0) { + printf("Error in AA type %i, iter: %i, len %i, info: %i, aa_norm %.2e\n", + a->type1 ? 1 : 2, (int)a->iter, (int)len, (int)info, aa_norm); + } + a->success = 0; + /* reset aa for stability */ + aa_reset(a); + TIME_TOC + return -aa_norm; + } + + /* here work = gamma, ie, the correct AA shifted weights */ + /* if solve was successful compute new point */ + + /* first set f -= D * work */ BLAS(gemv) - ("NoTrans", &bl, &blen, &neg_onef, a->D, &bl, a->work, &one, &onef, f, &one); - return (aa_int)info; + ("NoTrans", &bdim, &blen, &neg_onef, a->D, &bdim, a->work, &one, &onef, f, + &one); + + /* if relaxation is not 1 then need to incorporate */ + if (a->relaxation != 1.0) { + relax(f, a, len); + } + + a->success = 1; /* this should be the only place we set success = 1 */ + TIME_TOC + return aa_norm; } /* * API functions below this line, see aa.h for descriptions. */ -AaWork *aa_init(aa_int l, aa_int aa_mem, aa_int type1) { +AaWork *aa_init(aa_int dim, aa_int mem, aa_int type1, aa_float regularization, + aa_float relaxation, aa_float safeguard_factor, + aa_float max_weight_norm, aa_int verbosity) { + TIME_TIC AaWork *a = (AaWork *)calloc(1, sizeof(AaWork)); if (!a) { - scs_printf("Failed to allocate memory for AA.\n"); + printf("Failed to allocate memory for AA.\n"); return (void *)0; } a->type1 = type1; a->iter = 0; - a->l = l; - a->k = aa_mem; - if (a->k <= 0) { + a->dim = dim; + a->mem = MIN(mem, dim); /* for rank stability */ + a->regularization = regularization; + a->relaxation = relaxation; + a->safeguard_factor = safeguard_factor; + a->max_weight_norm = max_weight_norm; + a->success = 0; + a->verbosity = verbosity; + if (a->mem <= 0) { return a; } - a->x = (aa_float *)calloc(a->l, sizeof(aa_float)); - a->f = (aa_float *)calloc(a->l, sizeof(aa_float)); - a->g = (aa_float *)calloc(a->l, sizeof(aa_float)); + a->x = (aa_float *)calloc(a->dim, sizeof(aa_float)); + a->f = (aa_float *)calloc(a->dim, sizeof(aa_float)); + a->g = (aa_float *)calloc(a->dim, sizeof(aa_float)); - a->g_prev = (aa_float *)calloc(a->l, sizeof(aa_float)); + a->g_prev = (aa_float *)calloc(a->dim, sizeof(aa_float)); - a->y = (aa_float *)calloc(a->l, sizeof(aa_float)); - a->s = (aa_float *)calloc(a->l, sizeof(aa_float)); - a->d = (aa_float *)calloc(a->l, sizeof(aa_float)); + a->y = (aa_float *)calloc(a->dim, sizeof(aa_float)); + a->s = (aa_float *)calloc(a->dim, sizeof(aa_float)); + a->d = (aa_float *)calloc(a->dim, sizeof(aa_float)); - a->Y = (aa_float *)calloc(a->l * a->k, sizeof(aa_float)); - a->S = (aa_float *)calloc(a->l * a->k, sizeof(aa_float)); - a->D = (aa_float *)calloc(a->l * a->k, sizeof(aa_float)); + a->Y = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float)); + a->S = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float)); + a->D = (aa_float *)calloc(a->dim * a->mem, sizeof(aa_float)); - a->M = (aa_float *)calloc(a->k * a->k, sizeof(aa_float)); - a->work = (aa_float *)calloc(a->k, sizeof(aa_float)); - a->ipiv = (blas_int *)calloc(a->k, sizeof(blas_int)); + a->M = (aa_float *)calloc(a->mem * a->mem, sizeof(aa_float)); + a->work = (aa_float *)calloc(MAX(a->mem, a->dim), sizeof(aa_float)); + a->ipiv = (blas_int *)calloc(a->mem, sizeof(blas_int)); + + if (relaxation != 1.0) { + a->x_work = (aa_float *)calloc(a->dim, sizeof(aa_float)); + } else { + a->x_work = 0; + } + TIME_TOC return a; } -aa_int aa_apply(aa_float *f, const aa_float *x, AaWork *a) { - if (a->k <= 0) { - return 0; +aa_float aa_apply(aa_float *f, const aa_float *x, AaWork *a) { + TIME_TIC + aa_float aa_norm = 0; + aa_int len = MIN(a->iter, a->mem); + a->success = 0; /* if we make an AA step we set this to 1 later */ + if (a->mem <= 0) { + TIME_TOC + return aa_norm; /* 0 */ } - update_accel_params(x, f, a); - if (a->iter++ == 0) { + if (a->iter == 0) { + /* if first iteration then seed params for next iter */ + init_accel_params(x, f, a); + a->iter++; + TIME_TOC + return aa_norm; /* 0 */ + } + /* set various accel quantities */ + update_accel_params(x, f, a, len); + + /* only perform solve steps when the memory is full */ + if (!FILL_MEMORY_BEFORE_SOLVE || a->iter >= a->mem) { + /* set M = S'Y or Y'Y depending on type of aa used */ + set_m(a, len); + /* solve linear system, new point overwrites f if successful */ + aa_norm = solve(f, a, len); + } + a->iter++; + TIME_TOC + return aa_norm; +} + +aa_int aa_safeguard(aa_float *f_new, aa_float *x_new, AaWork *a) { + TIME_TIC + blas_int bdim = (blas_int)a->dim; + blas_int one = 1; + aa_float neg_onef = -1.0; + aa_float norm_diff; + if (!a->success) { + /* last AA update was not successful, no need for safeguarding */ + TIME_TOC return 0; } - /* solve linear system, new point overwrites f if successful */ - return solve(f, a, MIN(a->iter - 1, a->k)); + + /* reset success indicator in case safeguarding called multiple times */ + a->success = 0; + + /* work = x_new */ + memcpy(a->work, x_new, a->dim * sizeof(aa_float)); + /* work = x_new - f_new */ + BLAS(axpy)(&bdim, &neg_onef, f_new, &one, a->work, &one); + /* norm_diff = || f_new - x_new || */ + norm_diff = BLAS(nrm2)(&bdim, a->work, &one); + /* g = f - x */ + if (norm_diff > a->safeguard_factor * a->norm_g) { + /* in this case we reject the AA step and reset */ + memcpy(f_new, a->f, a->dim * sizeof(aa_float)); + memcpy(x_new, a->x, a->dim * sizeof(aa_float)); + if (a->verbosity > 0) { + printf("AA rejection, iter: %i, norm_diff %.4e, prev_norm_diff %.4e\n", + (int)a->iter, norm_diff, a->norm_g); + } + aa_reset(a); + TIME_TOC + return -1; + } + TIME_TOC + return 0; } void aa_finish(AaWork *a) { if (a) { free(a->x); @@ -215,10 +477,23 @@ free(a->S); free(a->D); free(a->M); free(a->work); free(a->ipiv); + if (a->x_work) { + free(a->x_work); + } free(a); } + return; +} + +void aa_reset(AaWork *a) { + /* to reset we simply set a->iter = 0 */ + if (a->verbosity > 0) { + printf("AA reset.\n"); + } + a->iter = 0; + return; } #endif