ext/numo/liblinear/src/linear.cpp in numo-liblinear-2.0.0 vs ext/numo/liblinear/src/linear.cpp in numo-liblinear-2.1.0

- old
+ new

@@ -54,22 +54,22 @@ while(x->index != -1) { ret += x->value*x->value; x++; } - return (ret); + return ret; } static double dot(const double *s, const feature_node *x) { double ret = 0; while(x->index != -1) { ret += s[x->index-1]*x->value; x++; } - return (ret); + return ret; } static double sparse_dot(const feature_node *x1, const feature_node *x2) { double ret = 0; @@ -87,11 +87,11 @@ ++x2; else ++x1; } } - return (ret); + return ret; } static void axpy(const double a, const feature_node *x, double *y) { while(x->index != -1) @@ -162,11 +162,11 @@ wTw -= w[w_size-1]*w[w_size-1]; for(i=0;i<l;i++) f += C_times_loss(i, wx[i]); f = f + 0.5 * wTw; - return(f); + return f; } int l2r_erm_fun::get_nr_variable(void) { return prob->n; @@ -874,40 +874,41 @@ // // where Qij = yi yj xi^T xj and // D is a diagonal matrix // // In L1-SVM case: -// upper_bound_i = Cp if y_i = 1 -// upper_bound_i = Cn if y_i = -1 -// D_ii = 0 +// upper_bound_i = Cp if y_i = 1 +// upper_bound_i = Cn if y_i = -1 +// D_ii = 0 // In L2-SVM case: -// upper_bound_i = INF -// D_ii = 1/(2*Cp) if y_i = 1 -// D_ii = 1/(2*Cn) if y_i = -1 +// upper_bound_i = INF +// D_ii = 1/(2*Cp) if y_i = 1 +// D_ii = 1/(2*Cn) if y_i = -1 // // Given: // x, y, Cp, Cn // eps is the stopping tolerance // // solution will be put in w // +// this function returns the number of iterations +// // See Algorithm 3 of Hsieh et al., ICML 2008 #undef GETI #define GETI(i) (y[i]+1) // To support weights for instances, use GETI(i) (i) -static void solve_l2r_l1l2_svc( - const problem *prob, double *w, double eps, - double Cp, double Cn, int solver_type) +static int solve_l2r_l1l2_svc(const problem *prob, const parameter *param, double *w, double Cp, double Cn, int max_iter=300) { int l = prob->l; int w_size = prob->n; + double eps = param->eps; + int solver_type = param->solver_type; int i, s, iter = 0; double C, d, G; double *QD = new double[l]; - int max_iter = 1000; int *index = new int[l]; double *alpha = new double[l]; schar *y = new schar[l]; int active_size = l; @@ -1022,11 +1023,12 @@ iter++; if(iter % 10 == 0) info("."); - if(PGmax_new - PGmin_new <= eps) + if(PGmax_new - PGmin_new <= eps && + fabs(PGmax_new) <= eps && fabs(PGmin_new) <= eps) { if(active_size == l) break; else { @@ -1044,12 +1046,10 @@ if (PGmin_old >= 0) PGmin_old = -INF; } info("\noptimization finished, #iter = %d\n",iter); - if (iter >= max_iter) - info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n"); // calculate objective value double v = 0; int nSV = 0; @@ -1066,10 +1066,12 @@ delete [] QD; delete [] alpha; delete [] y; delete [] index; + + return iter; } // A coordinate descent algorithm for // L1-loss and L2-loss epsilon-SVR dual problem @@ -1079,39 +1081,39 @@ // // where Qij = xi^T xj and // D is a diagonal matrix // // In L1-SVM case: -// upper_bound_i = C -// lambda_i = 0 +// upper_bound_i = C +// lambda_i = 0 // In L2-SVM case: -// upper_bound_i = INF -// lambda_i = 1/(2*C) +// upper_bound_i = INF +// lambda_i = 1/(2*C) // // Given: // x, y, p, C // eps is the stopping tolerance // // solution will be put in w // +// this function returns the number of iterations +// // See Algorithm 4 of Ho and Lin, 2012 #undef GETI #define GETI(i) (0) // To support weights for instances, use GETI(i) (i) -static void solve_l2r_l1l2_svr( - const problem *prob, double *w, const parameter *param, - int solver_type) +static int solve_l2r_l1l2_svr(const problem *prob, const parameter *param, double *w, int max_iter=300) { + const int solver_type = param->solver_type; int l = prob->l; double C = param->C; double p = param->p; int w_size = prob->n; double eps = param->eps; int i, s, iter = 0; - int max_iter = 1000; int active_size = l; int *index = new int[l]; double d, G, H; double Gmax_old = INF; @@ -1258,12 +1260,10 @@ Gmax_old = Gmax_new; } info("\noptimization finished, #iter = %d\n", iter); - if(iter >= max_iter) - info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n"); // calculate objective value double v = 0; int nSV = 0; for(i=0; i<w_size; i++) @@ -1280,10 +1280,12 @@ info("nSV = %d\n",nSV); delete [] beta; delete [] QD; delete [] index; + + return iter; } // A coordinate descent algorithm for // the dual of L2-regularized logistic regression problems @@ -1299,23 +1301,25 @@ // x, y, Cp, Cn // eps is the stopping tolerance // // solution will be put in w // +// this function returns the number of iterations +// // See Algorithm 5 of Yu et al., MLJ 2010 #undef GETI #define GETI(i) (y[i]+1) // To support weights for instances, use GETI(i) (i) -void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn) +static int solve_l2r_lr_dual(const problem *prob, const parameter *param, double *w, double Cp, double Cn, int max_iter=300) { int l = prob->l; int w_size = prob->n; + double eps = param->eps; int i, s, iter = 0; double *xTx = new double[l]; - int max_iter = 1000; int *index = new int[l]; double *alpha = new double[2*l]; // store alpha and C - alpha schar *y = new schar[l]; int max_inner_iter = 100; // for inner Newton double innereps = 1e-2; @@ -1426,12 +1430,10 @@ innereps = max(innereps_min, 0.1*innereps); } info("\noptimization finished, #iter = %d\n",iter); - if (iter >= max_iter) - info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n"); // calculate objective value double v = 0; for(i=0; i<w_size; i++) @@ -1444,10 +1446,12 @@ delete [] xTx; delete [] alpha; delete [] y; delete [] index; + + return iter; } // A coordinate descent algorithm for // L1-regularized L2-loss support vector classification // @@ -1457,25 +1461,26 @@ // x, y, Cp, Cn // eps is the stopping tolerance // // solution will be put in w // +// this function returns the number of iterations +// // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008) // // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1 // must have been added to the original data. (see -B and -R option) #undef GETI #define GETI(i) (y[i]+1) // To support weights for instances, use GETI(i) (i) -static void solve_l1r_l2_svc( - problem *prob_col, double *w, double eps, - double Cp, double Cn, int regularize_bias) +static int solve_l1r_l2_svc(const problem *prob_col, const parameter* param, double *w, double Cp, double Cn, double eps) { int l = prob_col->l; int w_size = prob_col->n; + int regularize_bias = param->regularize_bias; int j, s, iter = 0; int max_iter = 1000; int active_size = w_size; int max_num_linesearch = 20; @@ -1745,10 +1750,12 @@ delete [] index; delete [] y; delete [] b; delete [] xj_sq; + + return iter; } // A coordinate descent algorithm for // L1-regularized logistic regression problems // @@ -1758,25 +1765,26 @@ // x, y, Cp, Cn // eps is the stopping tolerance // // solution will be put in w // +// this function returns the number of iterations +// // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008) // // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1 // must have been added to the original data. (see -B and -R option) #undef GETI #define GETI(i) (y[i]+1) // To support weights for instances, use GETI(i) (i) -static void solve_l1r_lr( - const problem *prob_col, double *w, double eps, - double Cp, double Cn, int regularize_bias) +static int solve_l1r_lr(const problem *prob_col, const parameter *param, double *w, double Cp, double Cn, double eps) { int l = prob_col->l; int w_size = prob_col->n; + int regularize_bias = param->regularize_bias; int j, s, newton_iter=0, iter=0; int max_newton_iter = 100; int max_iter = 1000; int max_num_linesearch = 20; int active_size; @@ -2141,10 +2149,12 @@ delete [] xTd; delete [] exp_wTx; delete [] exp_wTx_new; delete [] tau; delete [] D; + + return newton_iter; } struct heap { enum HEAP_TYPE { MIN, MAX }; int _size; @@ -2228,16 +2238,20 @@ // x, nu // eps is the stopping tolerance // // solution will be put in w and rho // +// this function returns the number of iterations +// // See Algorithm 7 in supplementary materials of Chou et al., SDM 2020. -static void solve_oneclass_svm(const problem *prob, double *w, double *rho, double eps, double nu) +static int solve_oneclass_svm(const problem *prob, const parameter *param, double *w, double *rho) { int l = prob->l; int w_size = prob->n; + double eps = param->eps; + double nu = param->nu; int i, j, s, iter = 0; double Gi, Gj; double Qij, quad_coef, delta, sum; double old_alpha_i; double *QD = new double[l]; @@ -2246,17 +2260,17 @@ double *alpha = new double[l]; int max_inner_iter; int max_iter = 1000; int active_size = l; - double negGmax; // max { -grad(f)_i | alpha_i < 1 } - double negGmin; // min { -grad(f)_i | alpha_i > 0 } + double negGmax; // max { -grad(f)_i | alpha_i < 1 } + double negGmin; // min { -grad(f)_i | alpha_i > 0 } int *most_violating_i = new int[l]; int *most_violating_j = new int[l]; - int n = (int)(nu*l); // # of alpha's at upper bound + int n = (int)(nu*l); // # of alpha's at upper bound for(i=0; i<n; i++) alpha[i] = 1; if (n<l) alpha[i] = nu*l-n; for(i=n+1; i<l; i++) @@ -2477,10 +2491,12 @@ delete [] G; delete [] index; delete [] alpha; delete [] most_violating_i; delete [] most_violating_j; + + return iter; } // transpose matrix X from row format to column format static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col) { @@ -2614,115 +2630,157 @@ free(data_label); } static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn) { - double eps = param->eps; + int solver_type = param->solver_type; + int dual_solver_max_iter = 300; + int iter; - int pos = 0; - int neg = 0; - for(int i=0;i<prob->l;i++) - if(prob->y[i] > 0) - pos++; - neg = prob->l - pos; - double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l; + bool is_regression = (solver_type==L2R_L2LOSS_SVR || + solver_type==L2R_L1LOSS_SVR_DUAL || + solver_type==L2R_L2LOSS_SVR_DUAL); - function *fun_obj=NULL; - switch(param->solver_type) + // Some solvers use Cp,Cn but not C array; extensions possible but no plan for now + double *C = new double[prob->l]; + double primal_solver_tol = param->eps; + if(is_regression) { - case L2R_LR: + for(int i=0;i<prob->l;i++) + C[i] = param->C; + } + else + { + int pos = 0; + for(int i=0;i<prob->l;i++) { - double *C = new double[prob->l]; - for(int i = 0; i < prob->l; i++) + if(prob->y[i] > 0) { - if(prob->y[i] > 0) - C[i] = Cp; - else - C[i] = Cn; + pos++; + C[i] = Cp; } - fun_obj=new l2r_lr_fun(prob, param, C); - NEWTON newton_obj(fun_obj, primal_solver_tol); + else + C[i] = Cn; + } + int neg = prob->l - pos; + primal_solver_tol = param->eps*max(min(pos,neg), 1)/prob->l; + } + + switch(solver_type) + { + case L2R_LR: + { + l2r_lr_fun fun_obj(prob, param, C); + NEWTON newton_obj(&fun_obj, primal_solver_tol); newton_obj.set_print_string(liblinear_print_string); newton_obj.newton(w); - delete fun_obj; - delete[] C; break; } case L2R_L2LOSS_SVC: { - double *C = new double[prob->l]; - for(int i = 0; i < prob->l; i++) - { - if(prob->y[i] > 0) - C[i] = Cp; - else - C[i] = Cn; - } - fun_obj=new l2r_l2_svc_fun(prob, param, C); - NEWTON newton_obj(fun_obj, primal_solver_tol); + l2r_l2_svc_fun fun_obj(prob, param, C); + NEWTON newton_obj(&fun_obj, primal_solver_tol); newton_obj.set_print_string(liblinear_print_string); newton_obj.newton(w); - delete fun_obj; - delete[] C; break; } case L2R_L2LOSS_SVC_DUAL: - solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL); + { + iter = solve_l2r_l1l2_svc(prob, param, w, Cp, Cn, dual_solver_max_iter); + if(iter >= dual_solver_max_iter) + { + info("\nWARNING: reaching max number of iterations\nSwitching to use -s 2\n\n"); + // primal_solver_tol obtained from eps for dual may be too loose + primal_solver_tol *= 0.1; + l2r_l2_svc_fun fun_obj(prob, param, C); + NEWTON newton_obj(&fun_obj, primal_solver_tol); + newton_obj.set_print_string(liblinear_print_string); + newton_obj.newton(w); + } break; + } case L2R_L1LOSS_SVC_DUAL: - solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL); + { + iter = solve_l2r_l1l2_svc(prob, param, w, Cp, Cn, dual_solver_max_iter); + if(iter >= dual_solver_max_iter) + info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n"); break; + } case L1R_L2LOSS_SVC: { problem prob_col; feature_node *x_space = NULL; transpose(prob, &x_space ,&prob_col); - solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias); + solve_l1r_l2_svc(&prob_col, param, w, Cp, Cn, primal_solver_tol); delete [] prob_col.y; delete [] prob_col.x; delete [] x_space; break; } case L1R_LR: { problem prob_col; feature_node *x_space = NULL; transpose(prob, &x_space ,&prob_col); - solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias); + solve_l1r_lr(&prob_col, param, w, Cp, Cn, primal_solver_tol); delete [] prob_col.y; delete [] prob_col.x; delete [] x_space; break; } case L2R_LR_DUAL: - solve_l2r_lr_dual(prob, w, eps, Cp, Cn); + { + iter = solve_l2r_lr_dual(prob, param, w, Cp, Cn, dual_solver_max_iter); + if(iter >= dual_solver_max_iter) + { + info("\nWARNING: reaching max number of iterations\nSwitching to use -s 0\n\n"); + // primal_solver_tol obtained from eps for dual may be too loose + primal_solver_tol *= 0.1; + l2r_lr_fun fun_obj(prob, param, C); + NEWTON newton_obj(&fun_obj, primal_solver_tol); + newton_obj.set_print_string(liblinear_print_string); + newton_obj.newton(w); + } break; + } case L2R_L2LOSS_SVR: { - double *C = new double[prob->l]; - for(int i = 0; i < prob->l; i++) - C[i] = param->C; - - fun_obj=new l2r_l2_svr_fun(prob, param, C); - NEWTON newton_obj(fun_obj, param->eps); + l2r_l2_svr_fun fun_obj(prob, param, C); + NEWTON newton_obj(&fun_obj, primal_solver_tol); newton_obj.set_print_string(liblinear_print_string); newton_obj.newton(w); - delete fun_obj; - delete[] C; break; - } case L2R_L1LOSS_SVR_DUAL: - solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL); + { + iter = solve_l2r_l1l2_svr(prob, param, w, dual_solver_max_iter); + if(iter >= dual_solver_max_iter) + info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster (also see FAQ)\n\n"); + break; + } case L2R_L2LOSS_SVR_DUAL: - solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL); + { + iter = solve_l2r_l1l2_svr(prob, param, w, dual_solver_max_iter); + if(iter >= dual_solver_max_iter) + { + info("\nWARNING: reaching max number of iterations\nSwitching to use -s 11\n\n"); + // primal_solver_tol obtained from eps for dual may be too loose + primal_solver_tol *= 0.001; + l2r_l2_svr_fun fun_obj(prob, param, C); + NEWTON newton_obj(&fun_obj, primal_solver_tol); + newton_obj.set_print_string(liblinear_print_string); + newton_obj.newton(w); + } break; + } default: fprintf(stderr, "ERROR: unknown solver_type\n"); break; } + + delete[] C; } // Calculate the initial C for parameter selection static double calc_start_C(const problem *prob, const parameter *param) { @@ -2766,11 +2824,11 @@ } return pow( 2, floor(log(min_C) / log(2.0)) ); } -static double calc_max_p(const problem *prob, const parameter *param) +static double calc_max_p(const problem *prob) { int i; double max_p = 0.0; for(i = 0; i < prob->l; i++) max_p = max(max_p, fabs(prob->y[i])); @@ -2936,11 +2994,11 @@ else if(check_oneclass_model(model_)) { model_->w = Malloc(double, w_size); model_->nr_class = 2; model_->label = NULL; - solve_oneclass_svm(prob, model_->w, &(model_->rho), param->eps, param->nu); + solve_oneclass_svm(prob, param, model_->w, &(model_->rho)); } else { int nr_class; int *label = NULL; @@ -3171,11 +3229,10 @@ { subprob[i].x[k] = prob->x[perm[j]]; subprob[i].y[k] = prob->y[perm[j]]; ++k; } - } struct parameter param_tmp = *param; *best_p = -1; if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC) @@ -3191,11 +3248,11 @@ *best_C = best_C_tmp; *best_score = best_score_tmp; } else if(param->solver_type == L2R_L2LOSS_SVR) { - double max_p = calc_max_p(prob, &param_tmp); + double max_p = calc_max_p(prob); int num_p_steps = 20; double max_C = 1048576; *best_score = INF; i = num_p_steps-1; @@ -3653,10 +3710,10 @@ return "eps <= 0"; if(param->C <= 0) return "C <= 0"; - if(param->p < 0) + if(param->p < 0 && param->solver_type == L2R_L2LOSS_SVR) return "p < 0"; if(prob->bias >= 0 && param->solver_type == ONECLASS_SVM) return "prob->bias >=0, but this is ignored in ONECLASS_SVM";