/* * CRF1d context (forward-backward, viterbi, etc). * * Copyright (c) 2007-2010, Naoaki Okazaki * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * Neither the names of the authors nor the names of its contributors * may be used to endorse or promote products derived from this * software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ /* $Id$ */ #ifdef HAVE_CONFIG_H #include #endif/*HAVE_CONFIG_H*/ #include #include #include #include #include #include #include "crf1d.h" #include "vecmath.h" crf1d_context_t* crf1dc_new(int flag, int L, int T) { int ret = 0; crf1d_context_t* ctx = NULL; ctx = (crf1d_context_t*)calloc(1, sizeof(crf1d_context_t)); if (ctx != NULL) { ctx->flag = flag; ctx->num_labels = L; ctx->trans = (floatval_t*)calloc(L * L, sizeof(floatval_t)); if (ctx->trans == NULL) goto error_exit; if (ctx->flag & CTXF_MARGINALS) { ctx->exp_trans = (floatval_t*)_aligned_malloc((L * L + 4) * sizeof(floatval_t), 16); if (ctx->exp_trans == NULL) goto error_exit; ctx->mexp_trans = (floatval_t*)calloc(L * L, sizeof(floatval_t)); if (ctx->mexp_trans == NULL) goto error_exit; } if (ret = crf1dc_set_num_items(ctx, T)) { goto error_exit; } /* T gives the 'hint' for maximum length of items. */ ctx->num_items = 0; } return ctx; error_exit: crf1dc_delete(ctx); return NULL; } int crf1dc_set_num_items(crf1d_context_t* ctx, int T) { const int L = ctx->num_labels; ctx->num_items = T; if (ctx->cap_items < T) { free(ctx->backward_edge); free(ctx->mexp_state); _aligned_free(ctx->exp_state); free(ctx->scale_factor); free(ctx->row); free(ctx->beta_score); free(ctx->alpha_score); ctx->alpha_score = (floatval_t*)calloc(T * L, sizeof(floatval_t)); if (ctx->alpha_score == NULL) return CRFSUITEERR_OUTOFMEMORY; ctx->beta_score = (floatval_t*)calloc(T * L, sizeof(floatval_t)); if (ctx->beta_score == NULL) return CRFSUITEERR_OUTOFMEMORY; ctx->scale_factor = (floatval_t*)calloc(T, sizeof(floatval_t)); if (ctx->scale_factor == NULL) return CRFSUITEERR_OUTOFMEMORY; ctx->row = (floatval_t*)calloc(L, sizeof(floatval_t)); if (ctx->row == NULL) return CRFSUITEERR_OUTOFMEMORY; if (ctx->flag & CTXF_VITERBI) { ctx->backward_edge = (int*)calloc(T * L, sizeof(int)); if (ctx->backward_edge == NULL) return CRFSUITEERR_OUTOFMEMORY; } ctx->state = (floatval_t*)calloc(T * L, sizeof(floatval_t)); if (ctx->state == NULL) return CRFSUITEERR_OUTOFMEMORY; if (ctx->flag & CTXF_MARGINALS) { ctx->exp_state = (floatval_t*)_aligned_malloc((T * L + 4) * sizeof(floatval_t), 16); if (ctx->exp_state == NULL) return CRFSUITEERR_OUTOFMEMORY; ctx->mexp_state = (floatval_t*)calloc(T * L, sizeof(floatval_t)); if (ctx->mexp_state == NULL) return CRFSUITEERR_OUTOFMEMORY; } ctx->cap_items = T; } return 0; } void crf1dc_delete(crf1d_context_t* ctx) { if (ctx != NULL) { free(ctx->backward_edge); free(ctx->mexp_state); _aligned_free(ctx->exp_state); free(ctx->state); free(ctx->scale_factor); free(ctx->row); free(ctx->beta_score); free(ctx->alpha_score); free(ctx->mexp_trans); _aligned_free(ctx->exp_trans); free(ctx->trans); } free(ctx); } void crf1dc_reset(crf1d_context_t* ctx, int flag) { const int T = ctx->num_items; const int L = ctx->num_labels; if (flag & RF_STATE) { veczero(ctx->state, T*L); } if (flag & RF_TRANS) { veczero(ctx->trans, L*L); } if (ctx->flag & CTXF_MARGINALS) { veczero(ctx->mexp_state, T*L); veczero(ctx->mexp_trans, L*L); ctx->log_norm = 0; } } void crf1dc_exp_state(crf1d_context_t* ctx) { const int T = ctx->num_items; const int L = ctx->num_labels; veccopy(ctx->exp_state, ctx->state, L * T); vecexp(ctx->exp_state, L * T); } void crf1dc_exp_transition(crf1d_context_t* ctx) { const int L = ctx->num_labels; veccopy(ctx->exp_trans, ctx->trans, L * L); vecexp(ctx->exp_trans, L * L); } void crf1dc_alpha_score(crf1d_context_t* ctx) { int i, t; floatval_t sum, *cur = NULL; floatval_t *scale = &ctx->scale_factor[0]; const floatval_t *prev = NULL, *trans = NULL, *state = NULL; const int T = ctx->num_items; const int L = ctx->num_labels; /* Compute the alpha scores on nodes (0, *). alpha[0][j] = state[0][j] */ cur = ALPHA_SCORE(ctx, 0); state = EXP_STATE_SCORE(ctx, 0); veccopy(cur, state, L); sum = vecsum(cur, L); *scale = (sum != 0.) ? 1. / sum : 1.; vecscale(cur, *scale, L); ++scale; /* Compute the alpha scores on nodes (t, *). alpha[t][j] = state[t][j] * \sum_{i} alpha[t-1][i] * trans[i][j] */ for (t = 1;t < T;++t) { prev = ALPHA_SCORE(ctx, t-1); cur = ALPHA_SCORE(ctx, t); state = EXP_STATE_SCORE(ctx, t); veczero(cur, L); for (i = 0;i < L;++i) { trans = EXP_TRANS_SCORE(ctx, i); vecaadd(cur, prev[i], trans, L); } vecmul(cur, state, L); sum = vecsum(cur, L); *scale = (sum != 0.) ? 1. / sum : 1.; vecscale(cur, *scale, L); ++scale; } /* Compute the logarithm of the normalization factor here. norm = 1. / (C[0] * C[1] ... * C[T-1]) log(norm) = - \sum_{t = 0}^{T-1} log(C[t]). */ ctx->log_norm = -vecsumlog(ctx->scale_factor, T); } void crf1dc_beta_score(crf1d_context_t* ctx) { int i, t; floatval_t *cur = NULL; floatval_t *row = ctx->row; const floatval_t *next = NULL, *state = NULL, *trans = NULL; const int T = ctx->num_items; const int L = ctx->num_labels; const floatval_t *scale = &ctx->scale_factor[T-1]; /* Compute the beta scores at (T-1, *). */ cur = BETA_SCORE(ctx, T-1); vecset(cur, *scale, L); --scale; /* Compute the beta scores at (t, *). */ for (t = T-2;0 <= t;--t) { cur = BETA_SCORE(ctx, t); next = BETA_SCORE(ctx, t+1); state = EXP_STATE_SCORE(ctx, t+1); veccopy(row, next, L); vecmul(row, state, L); /* Compute the beta score at (t, i). */ for (i = 0;i < L;++i) { trans = EXP_TRANS_SCORE(ctx, i); cur[i] = vecdot(trans, row, L); } vecscale(cur, *scale, L); --scale; } } void crf1dc_marginals(crf1d_context_t* ctx) { int i, j, t; const int T = ctx->num_items; const int L = ctx->num_labels; /* Compute the model expectations of states. p(t,i) = fwd[t][i] * bwd[t][i] / norm = (1. / C[t]) * fwd'[t][i] * bwd'[t][i] */ for (t = 0;t < T;++t) { floatval_t *fwd = ALPHA_SCORE(ctx, t); floatval_t *bwd = BETA_SCORE(ctx, t); floatval_t *prob = STATE_MEXP(ctx, t); veccopy(prob, fwd, L); vecmul(prob, bwd, L); vecscale(prob, 1. / ctx->scale_factor[t], L); } /* Compute the model expectations of transitions. p(t,i,t+1,j) = fwd[t][i] * edge[i][j] * state[t+1][j] * bwd[t+1][j] / norm = (fwd'[t][i] / (C[0] ... C[t])) * edge[i][j] * state[t+1][j] * (bwd'[t+1][j] / (C[t+1] ... C[T-1])) * (C[0] * ... * C[T-1]) = fwd'[t][i] * edge[i][j] * state[t+1][j] * bwd'[t+1][j] The model expectation of a transition (i -> j) is the sum of the marginal probabilities p(t,i,t+1,j) over t. */ for (t = 0;t < T-1;++t) { floatval_t *fwd = ALPHA_SCORE(ctx, t); floatval_t *state = EXP_STATE_SCORE(ctx, t+1); floatval_t *bwd = BETA_SCORE(ctx, t+1); floatval_t *row = ctx->row; /* row[j] = state[t+1][j] * bwd'[t+1][j] */ veccopy(row, bwd, L); vecmul(row, state, L); for (i = 0;i < L;++i) { floatval_t *edge = EXP_TRANS_SCORE(ctx, i); floatval_t *prob = TRANS_MEXP(ctx, i); for (j = 0;j < L;++j) { prob[j] += fwd[i] * edge[j] * row[j]; } } } } floatval_t crf1dc_marginal_point(crf1d_context_t *ctx, int l, int t) { floatval_t *fwd = ALPHA_SCORE(ctx, t); floatval_t *bwd = BETA_SCORE(ctx, t); return fwd[l] * bwd[l] / ctx->scale_factor[t]; } floatval_t crf1dc_marginal_path(crf1d_context_t *ctx, const int *path, int begin, int end) { int t; /* Compute the marginal probability of a (partial) path. a = path[begin], b = path[begin+1], ..., y = path[end-2], z = path[end-1] fwd[begin][a] = (fwd'[begin][a] / (C[0] ... C[begin]) bwd[end-1][z] = (bwd'[end-1][z] / (C[end-1] ... C[T-1])) norm = 1 / (C[0] * ... * C[T-1]) p(a, b, ..., z) = fwd[begin][a] * edge[a][b] * state[begin+1][b] * ... * edge[y][z] * state[end-1][z] * bwd[end-1][z] / norm = fwd'[begin][a] * edge[a][b] * state[begin+1][b] * ... * edge[y][z] * state[end-1][z] * bwd'[end-1][z] * (C[begin+1] * ... * C[end-2]) */ floatval_t *fwd = ALPHA_SCORE(ctx, begin); floatval_t *bwd = BETA_SCORE(ctx, end-1); floatval_t prob = fwd[path[begin]] * bwd[path[end-1]] / ctx->scale_factor[begin]; for (t = begin;t < end-1;++t) { floatval_t *state = EXP_STATE_SCORE(ctx, t+1); floatval_t *edge = EXP_TRANS_SCORE(ctx, path[t]); prob *= (edge[path[t+1]] * state[path[t+1]] * ctx->scale_factor[t]); } return prob; } #if 0 /* Sigh, this was found to be slower than the forward-backward algorithm. */ #define ADJACENCY(ctx, i) \ (&MATRIX(ctx->adj, ctx->num_labels, 0, i)) void crf1dc_marginal_without_beta(crf1d_context_t* ctx) { int i, j, t; floatval_t *prob = NULL; floatval_t *row = ctx->row; const floatval_t *fwd = NULL; const int T = ctx->num_items; const int L = ctx->num_labels; /* Compute marginal probabilities of states at T-1 p(T-1,j) = fwd'[T-1][j] */ fwd = ALPHA_SCORE(ctx, T-1); prob = STATE_MEXP(ctx, T-1); veccopy(prob, fwd, L); /* Repeat the following computation for t = T-1,T-2, ..., 1. 1) Compute p(t-1,i,t,j) using p(t,j) 2) Compute p(t,i) using p(t-1,i,t,j) */ for (t = T-1;0 < t;--t) { fwd = ALPHA_SCORE(ctx, t-1); prob = STATE_MEXP(ctx, t); veczero(ctx->adj, L*L); veczero(row, L); /* Compute adj[i][j] and row[j]. adj[i][j] = fwd'[t-1][i] * edge[i][j] row[j] = \sum_{i} adj[i][j] */ for (i = 0;i < L;++i) { floatval_t *adj = ADJACENCY(ctx, i); floatval_t *edge = EXP_TRANS_SCORE(ctx, i); vecaadd(adj, fwd[i], edge, L); vecadd(row, adj, L); } /* Find z such that z * \sum_{i] adj[i][j] = p(t,j). Thus, z = p(t,j) / row[j]; we overwrite row with z. */ vecinv(row, L); vecmul(row, prob, L); /* Apply the partition factor z (row[j]) to adj[i][j]. */ for (i = 0;i < L;++i) { floatval_t *adj = ADJACENCY(ctx, i); vecmul(adj, row, L); } /* Now that adj[i][j] presents p(t-1,i,t,j), accumulate model expectations of transitions. */ for (i = 0;i < L;++i) { floatval_t *adj = ADJACENCY(ctx, i); floatval_t *prob = TRANS_MEXP(ctx, i); vecadd(prob, adj, L); } /* Compute the marginal probability of states at t-1. p(t-1,i) = \sum_{j} p(t-1,i,t,j) */ prob = STATE_MEXP(ctx, t-1); for (i = 0;i < L;++i) { floatval_t *adj = ADJACENCY(ctx, i); prob[i] = vecsum(adj, L); } } } #endif floatval_t crf1dc_score(crf1d_context_t* ctx, const int *labels) { int i, j, t; floatval_t ret = 0; const floatval_t *state = NULL, *cur = NULL, *trans = NULL; const int T = ctx->num_items; const int L = ctx->num_labels; /* Stay at (0, labels[0]). */ i = labels[0]; state = STATE_SCORE(ctx, 0); ret = state[i]; /* Loop over the rest of items. */ for (t = 1;t < T;++t) { j = labels[t]; trans = TRANS_SCORE(ctx, i); state = STATE_SCORE(ctx, t); /* Transit from (t-1, i) to (t, j). */ ret += trans[j]; ret += state[j]; i = j; } return ret; } floatval_t crf1dc_lognorm(crf1d_context_t* ctx) { return ctx->log_norm; } floatval_t crf1dc_viterbi(crf1d_context_t* ctx, int *labels) { int i, j, t; int *back = NULL; floatval_t max_score, score, *cur = NULL; const floatval_t *prev = NULL, *state = NULL, *trans = NULL; const int T = ctx->num_items; const int L = ctx->num_labels; /* This function assumes state and trans scores to be in the logarithm domain. */ /* Compute the scores at (0, *). */ cur = ALPHA_SCORE(ctx, 0); state = STATE_SCORE(ctx, 0); for (j = 0;j < L;++j) { cur[j] = state[j]; } /* Compute the scores at (t, *). */ for (t = 1;t < T;++t) { prev = ALPHA_SCORE(ctx, t-1); cur = ALPHA_SCORE(ctx, t); state = STATE_SCORE(ctx, t); back = BACKWARD_EDGE_AT(ctx, t); /* Compute the score of (t, j). */ for (j = 0;j < L;++j) { max_score = -FLOAT_MAX; for (i = 0;i < L;++i) { /* Transit from (t-1, i) to (t, j). */ trans = TRANS_SCORE(ctx, i); score = prev[i] + trans[j]; /* Store this path if it has the maximum score. */ if (max_score < score) { max_score = score; /* Backward link (#t, #j) -> (#t-1, #i). */ back[j] = i; } } /* Add the state score on (t, j). */ cur[j] = max_score + state[j]; } } /* Find the node (#T, #i) that reaches EOS with the maximum score. */ max_score = -FLOAT_MAX; prev = ALPHA_SCORE(ctx, T-1); for (i = 0;i < L;++i) { if (max_score < prev[i]) { max_score = prev[i]; labels[T-1] = i; /* Tag the item #T. */ } } /* Tag labels by tracing the backward links. */ for (t = T-2;0 <= t;--t) { back = BACKWARD_EDGE_AT(ctx, t+1); labels[t] = back[labels[t+1]]; } /* Return the maximum score (without the normalization factor subtracted). */ return max_score; } static void check_values(FILE *fp, floatval_t cv, floatval_t tv) { if (fabs(cv - tv) < 1e-9) { fprintf(fp, "OK (%f)\n", cv); } else { fprintf(fp, "FAIL: %f (%f)\n", cv, tv); } } void crf1dc_debug_context(FILE *fp) { int y1, y2, y3; floatval_t norm = 0; const int L = 3; const int T = 3; crf1d_context_t *ctx = crf1dc_new(CTXF_MARGINALS, L, T); floatval_t *trans = NULL, *state = NULL; floatval_t scores[3][3][3]; int labels[3]; /* Initialize the state scores. */ state = EXP_STATE_SCORE(ctx, 0); state[0] = .4; state[1] = .5; state[2] = .1; state = EXP_STATE_SCORE(ctx, 1); state[0] = .4; state[1] = .1; state[2] = .5; state = EXP_STATE_SCORE(ctx, 2); state[0] = .4; state[1] = .1; state[2] = .5; /* Initialize the transition scores. */ trans = EXP_TRANS_SCORE(ctx, 0); trans[0] = .3; trans[1] = .1; trans[2] = .4; trans = EXP_TRANS_SCORE(ctx, 1); trans[0] = .6; trans[1] = .2; trans[2] = .1; trans = EXP_TRANS_SCORE(ctx, 2); trans[0] = .5; trans[1] = .2; trans[2] = .1; ctx->num_items = ctx->cap_items; crf1dc_alpha_score(ctx); crf1dc_beta_score(ctx); /* Compute the score of every label sequence. */ for (y1 = 0;y1 < L;++y1) { floatval_t s1 = EXP_STATE_SCORE(ctx, 0)[y1]; for (y2 = 0;y2 < L;++y2) { floatval_t s2 = s1; s2 *= EXP_TRANS_SCORE(ctx, y1)[y2]; s2 *= EXP_STATE_SCORE(ctx, 1)[y2]; for (y3 = 0;y3 < L;++y3) { floatval_t s3 = s2; s3 *= EXP_TRANS_SCORE(ctx, y2)[y3]; s3 *= EXP_STATE_SCORE(ctx, 2)[y3]; scores[y1][y2][y3] = s3; } } } /* Compute the partition factor. */ norm = 0.; for (y1 = 0;y1 < L;++y1) { for (y2 = 0;y2 < L;++y2) { for (y3 = 0;y3 < L;++y3) { norm += scores[y1][y2][y3]; } } } /* Check the partition factor. */ fprintf(fp, "Check for the partition factor... "); check_values(fp, exp(ctx->log_norm), norm); /* Compute the sequence probabilities. */ for (y1 = 0;y1 < L;++y1) { for (y2 = 0;y2 < L;++y2) { for (y3 = 0;y3 < L;++y3) { floatval_t logp; labels[0] = y1; labels[1] = y2; labels[2] = y3; logp = crf1dc_score(ctx, labels) - crf1dc_lognorm(ctx); fprintf(fp, "Check for the sequence %d-%d-%d... ", y1, y2, y3); check_values(fp, exp(logp), scores[y1][y2][y3] / norm); } } } /* Compute the marginal probability at t=0 */ for (y1 = 0;y1 < L;++y1) { floatval_t a, b, c, s = 0.; for (y2 = 0;y2 < L;++y2) { for (y3 = 0;y3 < L;++y3) { s += scores[y1][y2][y3]; } } a = ALPHA_SCORE(ctx, 0)[y1]; b = BETA_SCORE(ctx, 0)[y1]; c = 1. / ctx->scale_factor[0]; fprintf(fp, "Check for the marginal probability (0,%d)... ", y1); check_values(fp, a * b * c, s / norm); } /* Compute the marginal probability at t=1 */ for (y2 = 0;y2 < L;++y2) { floatval_t a, b, c, s = 0.; for (y1 = 0;y1 < L;++y1) { for (y3 = 0;y3 < L;++y3) { s += scores[y1][y2][y3]; } } a = ALPHA_SCORE(ctx, 1)[y2]; b = BETA_SCORE(ctx, 1)[y2]; c = 1. / ctx->scale_factor[1]; fprintf(fp, "Check for the marginal probability (1,%d)... ", y2); check_values(fp, a * b * c, s / norm); } /* Compute the marginal probability at t=2 */ for (y3 = 0;y3 < L;++y3) { floatval_t a, b, c, s = 0.; for (y1 = 0;y1 < L;++y1) { for (y2 = 0;y2 < L;++y2) { s += scores[y1][y2][y3]; } } a = ALPHA_SCORE(ctx, 2)[y3]; b = BETA_SCORE(ctx, 2)[y3]; c = 1. / ctx->scale_factor[2]; fprintf(fp, "Check for the marginal probability (2,%d)... ", y3); check_values(fp, a * b * c, s / norm); } /* Compute the marginal probabilities of transitions. */ for (y1 = 0;y1 < L;++y1) { for (y2 = 0;y2 < L;++y2) { floatval_t a, b, s, t, p = 0.; for (y3 = 0;y3 < L;++y3) { p += scores[y1][y2][y3]; } a = ALPHA_SCORE(ctx, 0)[y1]; b = BETA_SCORE(ctx, 1)[y2]; s = EXP_STATE_SCORE(ctx, 1)[y2]; t = EXP_TRANS_SCORE(ctx, y1)[y2]; fprintf(fp, "Check for the marginal probability (0,%d)-(1,%d)... ", y1, y2); check_values(fp, a * t * s * b, p / norm); } } for (y2 = 0;y2 < L;++y2) { for (y3 = 0;y3 < L;++y3) { floatval_t a, b, s, t, p = 0.; for (y1 = 0;y1 < L;++y1) { p += scores[y1][y2][y3]; } a = ALPHA_SCORE(ctx, 1)[y2]; b = BETA_SCORE(ctx, 2)[y3]; s = EXP_STATE_SCORE(ctx, 2)[y3]; t = EXP_TRANS_SCORE(ctx, y2)[y3]; fprintf(fp, "Check for the marginal probability (1,%d)-(2,%d)... ", y2, y3); check_values(fp, a * t * s * b, p / norm); } } }