linear.cpp in numo-liblinear-2.1.0

- old
+ new

@@ -54,22 +54,22 @@
 		while(x->index != -1)
 		{
 			ret += x->value*x->value;
 			x++;
 		}
-		return (ret);
+		return ret;
 	}
 
 	static double dot(const double *s, const feature_node *x)
 	{
 		double ret = 0;
 		while(x->index != -1)
 		{
 			ret += s[x->index-1]*x->value;
 			x++;
 		}
-		return (ret);
+		return ret;
 	}
 
 	static double sparse_dot(const feature_node *x1, const feature_node *x2)
 	{
 		double ret = 0;
@@ -87,11 +87,11 @@
 					++x2;
 				else
 					++x1;
 			}
 		}
-		return (ret);
+		return ret;
 	}
 
 	static void axpy(const double a, const feature_node *x, double *y)
 	{
 		while(x->index != -1)
@@ -162,11 +162,11 @@
 		wTw -= w[w_size-1]*w[w_size-1];
 	for(i=0;i<l;i++)
 		f += C_times_loss(i, wx[i]);
 	f = f + 0.5 * wTw;
 
-	return(f);
+	return f;
 }
 
 int l2r_erm_fun::get_nr_variable(void)
 {
 	return prob->n;
@@ -874,40 +874,41 @@
 //
 //  where Qij = yi yj xi^T xj and
 //  D is a diagonal matrix
 //
 // In L1-SVM case:
-// 		upper_bound_i = Cp if y_i = 1
-// 		upper_bound_i = Cn if y_i = -1
-// 		D_ii = 0
+//              upper_bound_i = Cp if y_i = 1
+//              upper_bound_i = Cn if y_i = -1
+//              D_ii = 0
 // In L2-SVM case:
-// 		upper_bound_i = INF
-// 		D_ii = 1/(2*Cp)	if y_i = 1
-// 		D_ii = 1/(2*Cn)	if y_i = -1
+//              upper_bound_i = INF
+//              D_ii = 1/(2*Cp) if y_i = 1
+//              D_ii = 1/(2*Cn) if y_i = -1
 //
 // Given:
 // x, y, Cp, Cn
 // eps is the stopping tolerance
 //
 // solution will be put in w
 //
+// this function returns the number of iterations
+//
 // See Algorithm 3 of Hsieh et al., ICML 2008
 
 #undef GETI
 #define GETI(i) (y[i]+1)
 // To support weights for instances, use GETI(i) (i)
 
-static void solve_l2r_l1l2_svc(
-	const problem *prob, double *w, double eps,
-	double Cp, double Cn, int solver_type)
+static int solve_l2r_l1l2_svc(const problem *prob, const parameter *param, double *w, double Cp, double Cn, int max_iter=300)
 {
 	int l = prob->l;
 	int w_size = prob->n;
+	double eps = param->eps;
+	int solver_type = param->solver_type;
 	int i, s, iter = 0;
 	double C, d, G;
 	double *QD = new double[l];
-	int max_iter = 1000;
 	int *index = new int[l];
 	double *alpha = new double[l];
 	schar *y = new schar[l];
 	int active_size = l;
 
@@ -1022,11 +1023,12 @@
 
 		iter++;
 		if(iter % 10 == 0)
 			info(".");
 
-		if(PGmax_new - PGmin_new <= eps)
+		if(PGmax_new - PGmin_new <= eps &&
+			fabs(PGmax_new) <= eps && fabs(PGmin_new) <= eps)
 		{
 			if(active_size == l)
 				break;
 			else
 			{
@@ -1044,12 +1046,10 @@
 		if (PGmin_old >= 0)
 			PGmin_old = -INF;
 	}
 
 	info("\noptimization finished, #iter = %d\n",iter);
-	if (iter >= max_iter)
-		info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");
 
 	// calculate objective value
 
 	double v = 0;
 	int nSV = 0;
@@ -1066,10 +1066,12 @@
 
 	delete [] QD;
 	delete [] alpha;
 	delete [] y;
 	delete [] index;
+
+	return iter;
 }
 
 
 // A coordinate descent algorithm for
 // L1-loss and L2-loss epsilon-SVR dual problem
@@ -1079,39 +1081,39 @@
 //
 //  where Qij = xi^T xj and
 //  D is a diagonal matrix
 //
 // In L1-SVM case:
-// 		upper_bound_i = C
-// 		lambda_i = 0
+//              upper_bound_i = C
+//              lambda_i = 0
 // In L2-SVM case:
-// 		upper_bound_i = INF
-// 		lambda_i = 1/(2*C)
+//              upper_bound_i = INF
+//              lambda_i = 1/(2*C)
 //
 // Given:
 // x, y, p, C
 // eps is the stopping tolerance
 //
 // solution will be put in w
 //
+// this function returns the number of iterations
+//
 // See Algorithm 4 of Ho and Lin, 2012
 
 #undef GETI
 #define GETI(i) (0)
 // To support weights for instances, use GETI(i) (i)
 
-static void solve_l2r_l1l2_svr(
-	const problem *prob, double *w, const parameter *param,
-	int solver_type)
+static int solve_l2r_l1l2_svr(const problem *prob, const parameter *param, double *w, int max_iter=300)
 {
+	const int solver_type = param->solver_type;
 	int l = prob->l;
 	double C = param->C;
 	double p = param->p;
 	int w_size = prob->n;
 	double eps = param->eps;
 	int i, s, iter = 0;
-	int max_iter = 1000;
 	int active_size = l;
 	int *index = new int[l];
 
 	double d, G, H;
 	double Gmax_old = INF;
@@ -1258,12 +1260,10 @@
 
 		Gmax_old = Gmax_new;
 	}
 
 	info("\noptimization finished, #iter = %d\n", iter);
-	if(iter >= max_iter)
-		info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster\n\n");
 
 	// calculate objective value
 	double v = 0;
 	int nSV = 0;
 	for(i=0; i<w_size; i++)
@@ -1280,10 +1280,12 @@
 	info("nSV = %d\n",nSV);
 
 	delete [] beta;
 	delete [] QD;
 	delete [] index;
+
+	return iter;
 }
 
 
 // A coordinate descent algorithm for
 // the dual of L2-regularized logistic regression problems
@@ -1299,23 +1301,25 @@
 // x, y, Cp, Cn
 // eps is the stopping tolerance
 //
 // solution will be put in w
 //
+// this function returns the number of iterations
+//
 // See Algorithm 5 of Yu et al., MLJ 2010
 
 #undef GETI
 #define GETI(i) (y[i]+1)
 // To support weights for instances, use GETI(i) (i)
 
-void solve_l2r_lr_dual(const problem *prob, double *w, double eps, double Cp, double Cn)
+static int solve_l2r_lr_dual(const problem *prob, const parameter *param, double *w, double Cp, double Cn, int max_iter=300)
 {
 	int l = prob->l;
 	int w_size = prob->n;
+	double eps = param->eps;
 	int i, s, iter = 0;
 	double *xTx = new double[l];
-	int max_iter = 1000;
 	int *index = new int[l];
 	double *alpha = new double[2*l]; // store alpha and C - alpha
 	schar *y = new schar[l];
 	int max_inner_iter = 100; // for inner Newton
 	double innereps = 1e-2;
@@ -1426,12 +1430,10 @@
 			innereps = max(innereps_min, 0.1*innereps);
 
 	}
 
 	info("\noptimization finished, #iter = %d\n",iter);
-	if (iter >= max_iter)
-		info("\nWARNING: reaching max number of iterations\nUsing -s 0 may be faster (also see FAQ)\n\n");
 
 	// calculate objective value
 
 	double v = 0;
 	for(i=0; i<w_size; i++)
@@ -1444,10 +1446,12 @@
 
 	delete [] xTx;
 	delete [] alpha;
 	delete [] y;
 	delete [] index;
+
+	return iter;
 }
 
 // A coordinate descent algorithm for
 // L1-regularized L2-loss support vector classification
 //
@@ -1457,25 +1461,26 @@
 // x, y, Cp, Cn
 // eps is the stopping tolerance
 //
 // solution will be put in w
 //
+// this function returns the number of iterations
+//
 // See Yuan et al. (2010) and appendix of LIBLINEAR paper, Fan et al. (2008)
 //
 // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
 // must have been added to the original data. (see -B and -R option)
 
 #undef GETI
 #define GETI(i) (y[i]+1)
 // To support weights for instances, use GETI(i) (i)
 
-static void solve_l1r_l2_svc(
-	problem *prob_col, double *w, double eps,
-	double Cp, double Cn, int regularize_bias)
+static int solve_l1r_l2_svc(const problem *prob_col, const parameter* param, double *w, double Cp, double Cn, double eps)
 {
 	int l = prob_col->l;
 	int w_size = prob_col->n;
+	int regularize_bias = param->regularize_bias;
 	int j, s, iter = 0;
 	int max_iter = 1000;
 	int active_size = w_size;
 	int max_num_linesearch = 20;
 
@@ -1745,10 +1750,12 @@
 
 	delete [] index;
 	delete [] y;
 	delete [] b;
 	delete [] xj_sq;
+
+	return iter;
 }
 
 // A coordinate descent algorithm for
 // L1-regularized logistic regression problems
 //
@@ -1758,25 +1765,26 @@
 // x, y, Cp, Cn
 // eps is the stopping tolerance
 //
 // solution will be put in w
 //
+// this function returns the number of iterations
+//
 // See Yuan et al. (2011) and appendix of LIBLINEAR paper, Fan et al. (2008)
 //
 // To not regularize the bias (i.e., regularize_bias = 0), a constant feature = 1
 // must have been added to the original data. (see -B and -R option)
 
 #undef GETI
 #define GETI(i) (y[i]+1)
 // To support weights for instances, use GETI(i) (i)
 
-static void solve_l1r_lr(
-	const problem *prob_col, double *w, double eps,
-	double Cp, double Cn, int regularize_bias)
+static int solve_l1r_lr(const problem *prob_col, const parameter *param, double *w, double Cp, double Cn, double eps)
 {
 	int l = prob_col->l;
 	int w_size = prob_col->n;
+	int regularize_bias = param->regularize_bias;
 	int j, s, newton_iter=0, iter=0;
 	int max_newton_iter = 100;
 	int max_iter = 1000;
 	int max_num_linesearch = 20;
 	int active_size;
@@ -2141,10 +2149,12 @@
 	delete [] xTd;
 	delete [] exp_wTx;
 	delete [] exp_wTx_new;
 	delete [] tau;
 	delete [] D;
+
+	return newton_iter;
 }
 
 struct heap {
 	enum HEAP_TYPE { MIN, MAX };
 	int _size;
@@ -2228,16 +2238,20 @@
 // x, nu
 // eps is the stopping tolerance
 //
 // solution will be put in w and rho
 //
+// this function returns the number of iterations
+//
 // See Algorithm 7 in supplementary materials of Chou et al., SDM 2020.
 
-static void solve_oneclass_svm(const problem *prob, double *w, double *rho, double eps, double nu)
+static int solve_oneclass_svm(const problem *prob, const parameter *param, double *w, double *rho)
 {
 	int l = prob->l;
 	int w_size = prob->n;
+	double eps = param->eps;
+	double nu = param->nu;
 	int i, j, s, iter = 0;
 	double Gi, Gj;
 	double Qij, quad_coef, delta, sum;
 	double old_alpha_i;
 	double *QD = new double[l];
@@ -2246,17 +2260,17 @@
 	double *alpha = new double[l];
 	int max_inner_iter;
 	int max_iter = 1000;
 	int active_size = l;
 
-	double negGmax;			// max { -grad(f)_i | alpha_i < 1 }
-	double negGmin;			// min { -grad(f)_i | alpha_i > 0 }
+	double negGmax;                 // max { -grad(f)_i | alpha_i < 1 }
+	double negGmin;                 // min { -grad(f)_i | alpha_i > 0 }
 
 	int *most_violating_i = new int[l];
 	int *most_violating_j = new int[l];
 
-	int n = (int)(nu*l);		// # of alpha's at upper bound
+	int n = (int)(nu*l);            // # of alpha's at upper bound
 	for(i=0; i<n; i++)
 		alpha[i] = 1;
 	if (n<l)
 		alpha[i] = nu*l-n;
 	for(i=n+1; i<l; i++)
@@ -2477,10 +2491,12 @@
 	delete [] G;
 	delete [] index;
 	delete [] alpha;
 	delete [] most_violating_i;
 	delete [] most_violating_j;
+
+	return iter;
 }
 
 // transpose matrix X from row format to column format
 static void transpose(const problem *prob, feature_node **x_space_ret, problem *prob_col)
 {
@@ -2614,115 +2630,157 @@
 	free(data_label);
 }
 
 static void train_one(const problem *prob, const parameter *param, double *w, double Cp, double Cn)
 {
-	double eps = param->eps;
+	int solver_type = param->solver_type;
+	int dual_solver_max_iter = 300;
+	int iter;
 
-	int pos = 0;
-	int neg = 0;
-	for(int i=0;i<prob->l;i++)
-		if(prob->y[i] > 0)
-			pos++;
-	neg = prob->l - pos;
-	double primal_solver_tol = eps*max(min(pos,neg), 1)/prob->l;
+	bool is_regression = (solver_type==L2R_L2LOSS_SVR ||
+				solver_type==L2R_L1LOSS_SVR_DUAL ||
+				solver_type==L2R_L2LOSS_SVR_DUAL);
 
-	function *fun_obj=NULL;
-	switch(param->solver_type)
+	// Some solvers use Cp,Cn but not C array; extensions possible but no plan for now
+	double *C = new double[prob->l];
+	double primal_solver_tol = param->eps;
+	if(is_regression)
 	{
-		case L2R_LR:
+		for(int i=0;i<prob->l;i++)
+			C[i] = param->C;
+	}
+	else
+	{
+		int pos = 0;
+		for(int i=0;i<prob->l;i++)
 		{
-			double *C = new double[prob->l];
-			for(int i = 0; i < prob->l; i++)
+			if(prob->y[i] > 0)
 			{
-				if(prob->y[i] > 0)
-					C[i] = Cp;
-				else
-					C[i] = Cn;
+				pos++;
+				C[i] = Cp;
 			}
-			fun_obj=new l2r_lr_fun(prob, param, C);
-			NEWTON newton_obj(fun_obj, primal_solver_tol);
+			else
+				C[i] = Cn;
+		}
+		int neg = prob->l - pos;
+		primal_solver_tol = param->eps*max(min(pos,neg), 1)/prob->l;
+	}
+
+	switch(solver_type)
+	{
+		case L2R_LR:
+		{
+			l2r_lr_fun fun_obj(prob, param, C);
+			NEWTON newton_obj(&fun_obj, primal_solver_tol);
 			newton_obj.set_print_string(liblinear_print_string);
 			newton_obj.newton(w);
-			delete fun_obj;
-			delete[] C;
 			break;
 		}
 		case L2R_L2LOSS_SVC:
 		{
-			double *C = new double[prob->l];
-			for(int i = 0; i < prob->l; i++)
-			{
-				if(prob->y[i] > 0)
-					C[i] = Cp;
-				else
-					C[i] = Cn;
-			}
-			fun_obj=new l2r_l2_svc_fun(prob, param, C);
-			NEWTON newton_obj(fun_obj, primal_solver_tol);
+			l2r_l2_svc_fun fun_obj(prob, param, C);
+			NEWTON newton_obj(&fun_obj, primal_solver_tol);
 			newton_obj.set_print_string(liblinear_print_string);
 			newton_obj.newton(w);
-			delete fun_obj;
-			delete[] C;
 			break;
 		}
 		case L2R_L2LOSS_SVC_DUAL:
-			solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L2LOSS_SVC_DUAL);
+		{
+			iter = solve_l2r_l1l2_svc(prob, param, w, Cp, Cn, dual_solver_max_iter);
+			if(iter >= dual_solver_max_iter)
+			{
+				info("\nWARNING: reaching max number of iterations\nSwitching to use -s 2\n\n");
+				// primal_solver_tol obtained from eps for dual may be too loose
+				primal_solver_tol *= 0.1;
+				l2r_l2_svc_fun fun_obj(prob, param, C);
+				NEWTON newton_obj(&fun_obj, primal_solver_tol);
+				newton_obj.set_print_string(liblinear_print_string);
+				newton_obj.newton(w);
+			}
 			break;
+		}
 		case L2R_L1LOSS_SVC_DUAL:
-			solve_l2r_l1l2_svc(prob, w, eps, Cp, Cn, L2R_L1LOSS_SVC_DUAL);
+		{
+			iter = solve_l2r_l1l2_svc(prob, param, w, Cp, Cn, dual_solver_max_iter);
+			if(iter >= dual_solver_max_iter)
+				info("\nWARNING: reaching max number of iterations\nUsing -s 2 may be faster (also see FAQ)\n\n");			
 			break;
+		}
 		case L1R_L2LOSS_SVC:
 		{
 			problem prob_col;
 			feature_node *x_space = NULL;
 			transpose(prob, &x_space ,&prob_col);
-			solve_l1r_l2_svc(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias);
+			solve_l1r_l2_svc(&prob_col, param, w, Cp, Cn, primal_solver_tol);
 			delete [] prob_col.y;
 			delete [] prob_col.x;
 			delete [] x_space;
 			break;
 		}
 		case L1R_LR:
 		{
 			problem prob_col;
 			feature_node *x_space = NULL;
 			transpose(prob, &x_space ,&prob_col);
-			solve_l1r_lr(&prob_col, w, primal_solver_tol, Cp, Cn, param->regularize_bias);
+			solve_l1r_lr(&prob_col, param, w, Cp, Cn, primal_solver_tol);
 			delete [] prob_col.y;
 			delete [] prob_col.x;
 			delete [] x_space;
 			break;
 		}
 		case L2R_LR_DUAL:
-			solve_l2r_lr_dual(prob, w, eps, Cp, Cn);
+		{
+			iter = solve_l2r_lr_dual(prob, param, w, Cp, Cn, dual_solver_max_iter);
+			if(iter >= dual_solver_max_iter)
+			{
+				info("\nWARNING: reaching max number of iterations\nSwitching to use -s 0\n\n");
+				// primal_solver_tol obtained from eps for dual may be too loose
+				primal_solver_tol *= 0.1;
+				l2r_lr_fun fun_obj(prob, param, C);
+				NEWTON newton_obj(&fun_obj, primal_solver_tol);
+				newton_obj.set_print_string(liblinear_print_string);
+				newton_obj.newton(w);
+			}
 			break;
+		}
 		case L2R_L2LOSS_SVR:
 		{
-			double *C = new double[prob->l];
-			for(int i = 0; i < prob->l; i++)
-				C[i] = param->C;
-
-			fun_obj=new l2r_l2_svr_fun(prob, param, C);
-			NEWTON newton_obj(fun_obj, param->eps);
+			l2r_l2_svr_fun fun_obj(prob, param, C);
+			NEWTON newton_obj(&fun_obj, primal_solver_tol);
 			newton_obj.set_print_string(liblinear_print_string);
 			newton_obj.newton(w);
-			delete fun_obj;
-			delete[] C;
 			break;
-
 		}
 		case L2R_L1LOSS_SVR_DUAL:
-			solve_l2r_l1l2_svr(prob, w, param, L2R_L1LOSS_SVR_DUAL);
+		{
+			iter = solve_l2r_l1l2_svr(prob, param, w, dual_solver_max_iter);
+			if(iter >= dual_solver_max_iter)
+				info("\nWARNING: reaching max number of iterations\nUsing -s 11 may be faster (also see FAQ)\n\n");			
+
 			break;
+		}
 		case L2R_L2LOSS_SVR_DUAL:
-			solve_l2r_l1l2_svr(prob, w, param, L2R_L2LOSS_SVR_DUAL);
+		{
+			iter = solve_l2r_l1l2_svr(prob, param, w, dual_solver_max_iter);
+			if(iter >= dual_solver_max_iter)
+			{
+				info("\nWARNING: reaching max number of iterations\nSwitching to use -s 11\n\n");
+				// primal_solver_tol obtained from eps for dual may be too loose
+				primal_solver_tol *= 0.001;
+				l2r_l2_svr_fun fun_obj(prob, param, C);
+				NEWTON newton_obj(&fun_obj, primal_solver_tol);
+				newton_obj.set_print_string(liblinear_print_string);
+				newton_obj.newton(w);
+			}
 			break;
+		}
 		default:
 			fprintf(stderr, "ERROR: unknown solver_type\n");
 			break;
 	}
+
+	delete[] C;
 }
 
 // Calculate the initial C for parameter selection
 static double calc_start_C(const problem *prob, const parameter *param)
 {
@@ -2766,11 +2824,11 @@
 	}
 
 	return pow( 2, floor(log(min_C) / log(2.0)) );
 }
 
-static double calc_max_p(const problem *prob, const parameter *param)
+static double calc_max_p(const problem *prob)
 {
 	int i;
 	double max_p = 0.0;
 	for(i = 0; i < prob->l; i++)
 		max_p = max(max_p, fabs(prob->y[i]));
@@ -2936,11 +2994,11 @@
 	else if(check_oneclass_model(model_))
 	{
 		model_->w = Malloc(double, w_size);
 		model_->nr_class = 2;
 		model_->label = NULL;
-		solve_oneclass_svm(prob, model_->w, &(model_->rho), param->eps, param->nu);
+		solve_oneclass_svm(prob, param, model_->w, &(model_->rho));
 	}
 	else
 	{
 		int nr_class;
 		int *label = NULL;
@@ -3171,11 +3229,10 @@
 		{
 			subprob[i].x[k] = prob->x[perm[j]];
 			subprob[i].y[k] = prob->y[perm[j]];
 			++k;
 		}
-
 	}
 
 	struct parameter param_tmp = *param;
 	*best_p = -1;
 	if(param->solver_type == L2R_LR || param->solver_type == L2R_L2LOSS_SVC)
@@ -3191,11 +3248,11 @@
 		*best_C = best_C_tmp;
 		*best_score = best_score_tmp;
 	}
 	else if(param->solver_type == L2R_L2LOSS_SVR)
 	{
-		double max_p = calc_max_p(prob, &param_tmp);
+		double max_p = calc_max_p(prob);
 		int num_p_steps = 20;
 		double max_C = 1048576;
 		*best_score = INF;
 
 		i = num_p_steps-1;
@@ -3653,10 +3710,10 @@
 		return "eps <= 0";
 
 	if(param->C <= 0)
 		return "C <= 0";
 
-	if(param->p < 0)
+	if(param->p < 0 && param->solver_type == L2R_L2LOSS_SVR)
 		return "p < 0";
 
 	if(prob->bias >= 0 && param->solver_type == ONECLASS_SVM)
 		return "prob->bias >=0, but this is ignored in ONECLASS_SVM";