/* RubySVM 1.0 by Rudi Cilibrasi (cilibrar@ofb.net) * Released under the GPL * Mon May 12 11:20:48 CEST 2003, * based on libsvm-2.4 */ #define obstack_chunk_alloc xmalloc #define obstack_chunk_free free #define HAVE_DEFINE_ALLOC_FUNCTION 1 #include "ruby.h" #include "node.h" #include #include #include #include #include #include VALUE mSVM, cSVMProblem, cSVMParameter, cSVMModel; static VALUE cMarshal; static int getSVCount(struct svm_model *m); struct RSVM_Problem { struct svm_problem prob; struct obstack xs, ys; int k; }; struct RSVM_Model { struct svm_model *m; }; struct RSVM_Parameter { struct svm_parameter p; }; VALUE svmpa_new(VALUE cl); /* * Converts a Ruby array of consecutive values into a list of * value-index svm_node's. */ struct svm_node *rubyArrayToNodelist(VALUE xs) { //struct obstack xso; struct svm_node *n; int i; int len = RARRAY(xs)->len; n = (struct svm_node *) calloc(sizeof(struct svm_node), len+1); for (i = 0; i < len; ++i) { n[i].value = 1; n[i].index = NUM2DBL(rb_ary_entry(xs, i)); } n[i].value = 0; n[i].index = -1; return n; } /* * Serializes an SVMParameter object */ VALUE svmpa_svm_dump(VALUE self, VALUE limit) { struct RSVM_Parameter *rp; VALUE obj = rb_ary_new(); Data_Get_Struct(self, struct RSVM_Parameter, rp); rb_ary_push(obj, INT2NUM(rp->p.svm_type)); rb_ary_push(obj, INT2NUM(rp->p.kernel_type)); rb_ary_push(obj, rb_float_new(rp->p.degree)); rb_ary_push(obj, rb_float_new(rp->p.gamma)); rb_ary_push(obj, rb_float_new(rp->p.coef0)); rb_ary_push(obj, rb_float_new(rp->p.cache_size)); rb_ary_push(obj, rb_float_new(rp->p.eps)); rb_ary_push(obj, rb_float_new(rp->p.C)); rb_ary_push(obj, rb_float_new(rp->p.nu)); rb_ary_push(obj, rb_float_new(rp->p.p)); rb_ary_push(obj, INT2NUM(rp->p.shrinking)); return rb_funcall(cMarshal, rb_intern("dump"), 1, obj); } /* * Deserializes an SVMParameter object */ VALUE svmpa_svm_load(VALUE kl, VALUE obj) { struct RSVM_Parameter *rp; printf("In load!\n"); VALUE self = svmpa_new(cSVMParameter); Data_Get_Struct(self, struct RSVM_Parameter, rp); printf("RP is %p\n", rp); obj = rb_funcall(cMarshal, rb_intern("load"), 1, obj); rp->p.svm_type = NUM2INT(rb_ary_entry(obj, 0)); printf("first!\n"); rp->p.kernel_type = NUM2INT(rb_ary_entry(obj, 1)); rp->p.degree = (int) NUM2DBL(rb_ary_entry(obj, 2)); rp->p.gamma = NUM2DBL(rb_ary_entry(obj, 3)); rp->p.coef0 = NUM2DBL(rb_ary_entry(obj, 4)); rp->p.cache_size = NUM2DBL(rb_ary_entry(obj, 5)); printf("midway!\n"); rp->p.eps = NUM2DBL(rb_ary_entry(obj, 6)); rp->p.C = NUM2DBL(rb_ary_entry(obj, 7)); rp->p.nu = NUM2DBL(rb_ary_entry(obj, 8)); rp->p.p = NUM2DBL(rb_ary_entry(obj, 9)); rp->p.shrinking = NUM2INT(rb_ary_entry(obj, 10)); printf("Never returned!\n"); return self; } /* * Gets gamma value, the exponent used in the kernel function */ VALUE svmpa_gamma(VALUE self) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); return rb_float_new(rp->p.gamma); } /* * Sets gamma value, the exponent used in the kernel function */ VALUE svmpa_gammaeq(VALUE self, VALUE eq) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); rp->p.gamma = NUM2DBL(eq); return Qnil; } /* * Gets coef0, the constant added in the polynomial kernel */ VALUE svmpa_coef0(VALUE self) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); return rb_float_new(rp->p.coef0); } /* * Sets coef0, the constant added in the polynomial kernel */ VALUE svmpa_coef0eq(VALUE self, VALUE eq) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); rp->p.coef0 = NUM2DBL(eq); return Qnil; } /* * Gets coef0, the constant added in the polynomial kernel */ VALUE svmpa_probability(VALUE self) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); return rb_float_new(rp->p.probability); } /* * Sets probability */ VALUE svmpa_probabilityeq(VALUE self, VALUE eq) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); rp->p.probability = NUM2DBL(eq); return Qnil; } /* * Gets cachesize, the number of megabytes of memory to use for the cache */ VALUE svmpa_cache_size(VALUE self) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); return rb_float_new(rp->p.cache_size); } /* * Sets cachesize, the number of megabytes of memory to use for the cache */ VALUE svmpa_cache_sizeeq(VALUE self, VALUE eq) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); rp->p.cache_size = NUM2DBL(eq); return Qnil; } /* * Gets eps, the tolerance of termination criterion */ VALUE svmpa_eps(VALUE self) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); return rb_float_new(rp->p.eps); } /* * Sets eps, the tolerance of termination criterion */ VALUE svmpa_epseq(VALUE self, VALUE eq) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); rp->p.eps = NUM2DBL(eq); return Qnil; } /* * Gets C, the cost parameter of C-SVC, epsilon-SVR, and nu-SVR */ VALUE svmpa_C(VALUE self) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); return rb_float_new(rp->p.C); } /* * Sets C, the cost parameter of C-SVC, epsilon-SVR, and nu-SVR */ VALUE svmpa_Ceq(VALUE self, VALUE eq) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); rp->p.C = NUM2DBL(eq); return Qnil; } /* * Gets nu, the SV-ratio parameter of nu-SVC, one-class SVM, and nu-SVR */ VALUE svmpa_nu(VALUE self) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); return rb_float_new(rp->p.nu); } /* * Sets nu, the SV-ratio parameter of nu-SVC, one-class SVM, and nu-SVR */ VALUE svmpa_nueq(VALUE self, VALUE eq) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); rp->p.nu = NUM2DBL(eq); return Qnil; } /* * Gets p, the zero-loss width zone in epsilon-insensitive SVR */ VALUE svmpa_p(VALUE self) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); return rb_float_new(rp->p.p); } /* * Sets p, the zero-loss width zone in epsilon-insensitive SVR */ VALUE svmpa_peq(VALUE self, VALUE eq) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); rp->p.p = NUM2DBL(eq); return Qnil; } /* * Gets degree, the degree of the kernel function */ VALUE svmpa_degree(VALUE self) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); return rb_float_new(rp->p.degree); } /* * Sets degree, the degree of the kernel function */ VALUE svmpa_degreeeq(VALUE self, VALUE eq) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); rp->p.degree = (int) NUM2DBL(eq); return Qnil; } /* * Gets kernel_type, which is one of: * * LINEAR * * POLY * * RBF * * SIGMOID */ VALUE svmpa_kernel_type(VALUE self) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); return INT2FIX(rp->p.kernel_type); } /* * Sets kernel_type, which is one of: * * LINEAR * * POLY * * RBF * * SIGMOID */ VALUE svmpa_kernel_typeeq(VALUE self, VALUE eq) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); rp->p.kernel_type = FIX2INT(eq); return Qnil; } /* * Gets svm_type, which is one of: * * C_SVC * * NU_SVC * * ONE_CLASS * * EPSILON_SVR * * NU_SVR */ VALUE svmpa_svm_type(VALUE self) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); return INT2FIX(rp->p.svm_type); } /* * Sets svm_type, which is one of: * * C_SVC * * NU_SVC * * ONE_CLASS * * EPSILON_SVR * * NU_SVR */ VALUE svmpa_svm_typeeq(VALUE self, VALUE eq) { struct RSVM_Parameter *rp; Data_Get_Struct(self, struct RSVM_Parameter, rp); rp->p.svm_type = FIX2INT(eq); return Qnil; } struct RSVM_Parameter *newParameter() { struct RSVM_Parameter *rp = (struct RSVM_Parameter *) calloc(sizeof(struct RSVM_Parameter), 1); rp->p.svm_type = C_SVC; rp->p.kernel_type = RBF; rp->p.degree = 3; rp->p.gamma = 0; rp->p.coef0 = 0; rp->p.nu = 0.5; rp->p.cache_size = 40; rp->p.C = 1; rp->p.eps = 1e-3; rp->p.p = 0.1; rp->p.shrinking = 1; rp->p.nr_weight = 0; return rp; } /* * Creates a new, empty SVMProblem object. */ struct RSVM_Problem *newProblem() { struct RSVM_Problem *rprob = (struct RSVM_Problem *) calloc(sizeof(struct RSVM_Problem), 1); rprob->prob.l = 0; rprob->prob.x = NULL; rprob->prob.y = NULL; obstack_init(&rprob->xs); obstack_init(&rprob->ys); return rprob; } /* * Adds an example to an SVMProblem given a target value and an input vector. */ void addExample(struct RSVM_Problem *rp, double y, struct svm_node *x) { obstack_grow(&rp->ys, &y, sizeof(double)); obstack_grow(&rp->xs, &x, sizeof(struct svm_node *)); } void syncProblem(struct RSVM_Problem *rp) { rp->prob.l = obstack_object_size(&rp->ys) / sizeof(double); rp->prob.y = (double *) obstack_base(&rp->ys); rp->prob.x = (struct svm_node **) obstack_base(&rp->xs); } /* * Frees an SVMModel */ static void svmpm_free(void *ptr) { struct RSVM_Model *rp = (struct RSVM_Model *) ptr; svm_destroy_model(rp->m); free(rp); } /* * Frees an SVMParameter */ static void svmpa_free(void *ptr) { struct RSVM_Parameter *rp = (struct RSVM_Parameter *) ptr; free(rp); } /* * Frees an SVMProblem */ static void svmpr_free(void *ptr) { struct RSVM_Problem *rp = (struct RSVM_Problem *) ptr; int i; syncProblem(rp); for (i = 0; i < rp->prob.l; ++i) free(rp->prob.x[i]); obstack_free(&(rp->xs),NULL); obstack_free(&(rp->ys),NULL); free(rp); } /* * Creates a new SVMParameter object. * Uses the following default values: * * svm_type = C_SVC * * kernel_type = RBF * * degree = 3 * * gamma = 1 / k (0 means this also) * * coef0 = 0 * * nu = 0.5 * * cache_size = 40 * * C = 1 * * eps = 1e-3 * * p = 0.1 * * shrinking = 1 * * nr_weight = 0 */ VALUE svmpa_new(VALUE cl) { struct RSVM_Parameter *rp = newParameter(); VALUE tdata = Data_Wrap_Struct(cl, 0, svmpa_free, rp); printf("In the new!!\n"); rb_obj_call_init(tdata, 0, NULL); return tdata; } #ifdef HAVE_DEFINE_ALLOC_FUNCTION static VALUE svmpa_allocate(VALUE kl) { return svmpa_new(kl); } #endif /* * Creates a new, empty SVMProblem object. */ VALUE svmpr_new(VALUE cl) { struct RSVM_Problem *rp = newProblem(); VALUE tdata = Data_Wrap_Struct(cl, 0, svmpr_free, rp); rb_obj_call_init(tdata, 0, NULL); return tdata; } /* * Trains an SVM according to a given problem set and parameter specification */ VALUE svmpm_new(VALUE cl, VALUE prob, VALUE par) { struct RSVM_Model *rp = (struct RSVM_Model *) calloc(sizeof(struct RSVM_Model), 1); struct RSVM_Problem *cpro; struct RSVM_Parameter *cpa; bool defgamma = false; VALUE tdata = Data_Wrap_Struct(cl, 0, svmpm_free, rp); Data_Get_Struct(prob, struct RSVM_Problem, cpro); Data_Get_Struct(par, struct RSVM_Parameter, cpa); syncProblem(cpro); if (cpa->p.gamma == 0) defgamma = true; if (defgamma) cpa->p.gamma = 1.0 / (double) cpro->k; cpa->p.probability = 1; rp->m = svm_train(&cpro->prob, &cpa->p); if (defgamma) cpa->p.gamma = 0; rb_obj_call_init(tdata, 0, NULL); return tdata; } static VALUE svmpm_predict_values(VALUE self, VALUE xs) { struct RSVM_Model *rp; double *pe; struct svm_node *x = rubyArrayToNodelist(xs); int i, nr_class, numvals; VALUE decvals; Data_Get_Struct(self, struct RSVM_Model, rp); nr_class = svm_get_nr_class(rp->m); decvals = rb_ary_new(); numvals = (nr_class * (nr_class - 1))/2; pe = (double *) calloc(numvals, sizeof(double)); svm_predict_values(rp->m, x, pe); for (i = 0; i < numvals; i += 1) rb_ary_push(decvals, rb_float_new(pe[i])); free(pe); return decvals; } static VALUE svmpm_predict_probability(VALUE self, VALUE xs) { double result; struct RSVM_Model *rp; double *pe; struct svm_node *x = rubyArrayToNodelist(xs); int i; VALUE probs, retval; retval = rb_ary_new(); Data_Get_Struct(self, struct RSVM_Model, rp); probs = rb_ary_new(); pe = (double *) calloc(svm_get_nr_class(rp->m), sizeof(double)); result = svm_predict_probability(rp->m, x, pe); for (i = 0; i < svm_get_nr_class(rp->m); i += 1) rb_ary_push(probs, rb_float_new(pe[i])); free(pe); rb_ary_push(retval, rb_float_new(result)); rb_ary_push(retval, probs); return retval; } /* * Predicts a value (regression or classification) based on an input vector */ static VALUE svmpm_predict(VALUE self, VALUE xs) { double result; struct RSVM_Model *rp; Data_Get_Struct(self, struct RSVM_Model, rp); struct svm_node *x = rubyArrayToNodelist(xs); result = svm_predict(rp->m, x); free(x); return rb_float_new(result); } static VALUE svmpm_save(VALUE self, VALUE filename) { int result; struct RSVM_Model *rp; Data_Get_Struct(self, struct RSVM_Model, rp); char *name = STR2CSTR(filename); result = svm_save_model(name, rp->m); return INT2FIX(result); } static VALUE svmpm_load(VALUE cl, VALUE filename) { struct RSVM_Model *rp = (struct RSVM_Model *) calloc(sizeof(struct RSVM_Model), 1); char *name = STR2CSTR(filename); struct svm_model * model = svm_load_model(name); rp->m = model; return Data_Wrap_Struct(cl, 0, svmpm_free, rp); } /* * Initializes an SVMModel */ static VALUE svmpm_init(VALUE self) { return self; } /* * Initializes an SVMParameter */ static VALUE svmpa_init(VALUE self) { return self; } /* * Initializes an SVMProblem */ static VALUE svmpr_init(VALUE self) { return self; } /* * Returns the number of samples in an SVMProblem */ static VALUE svmpr_size(VALUE self) { struct RSVM_Problem *rp; Data_Get_Struct(self, struct RSVM_Problem, rp); syncProblem(rp); return INT2FIX(rp->prob.l); } /* * Returns the number of Support Vectors in an SVMModel */ static VALUE svmpm_svcount(VALUE self) { struct RSVM_Model *rp; Data_Get_Struct(self, struct RSVM_Model, rp); return INT2FIX(getSVCount(rp->m)); } /* * Adds a training example to an SVMProblem */ static VALUE svmpr_addex(VALUE self, VALUE y, VALUE xs) { struct RSVM_Problem *rp; struct svm_node *fini; double yd; Data_Get_Struct(self, struct RSVM_Problem, rp); yd = NUM2DBL(y); fini = rubyArrayToNodelist(xs); addExample(rp, yd, fini); if (rp->k == 0) rp->k = RARRAY(xs)->len; return Qnil; } /* To be removed in next version */ struct svm_model { svm_parameter param; // parameter int nr_class; // number of classes, = 2 in regression/one class svm int l; // total #SV svm_node **SV; // SVs (SV[l]) double **sv_coef; // coefficients for SVs in decision functions (sv_coef[n-1][l]) double *rho; // constants in decision functions (rho[n*(n-1)/2]) // for classification only int *label; // label of each class (label[n]) int *nSV; // number of SVs for each class (nSV[n]) // nSV[0] + nSV[1] + ... + nSV[n-1] = l // XXX int free_sv; // 1 if svm_model is created by svm_load_model // 0 if svm_model is created by svm_train }; static int getSVCount(struct svm_model *m) { return m->l; } extern "C" { void Init_SVM(); }; void Init_SVM() { #ifdef QUIETFUNC svm_set_verbosity(0); #endif mSVM = rb_define_module("SVM"); cSVMProblem = rb_define_class_under(mSVM, "Problem", rb_cObject); cSVMParameter = rb_define_class_under(mSVM, "Parameter", rb_cObject); cSVMModel = rb_define_class_under(mSVM, "Model", rb_cObject); rb_define_singleton_method(cSVMProblem, "new", (VALUE (*) (...))svmpr_new, 0); rb_define_method(cSVMProblem, "initialize", (VALUE (*) (...))svmpr_init, 0); rb_define_method(cSVMProblem, "size", (VALUE (*) (...))svmpr_size, 0); rb_define_method(cSVMProblem, "addExample", (VALUE (*) (...))svmpr_addex, 2); rb_define_const(mSVM, "C_SVC", INT2FIX(C_SVC)); rb_define_const(mSVM, "NU_SVC", INT2FIX(NU_SVC)); rb_define_const(mSVM, "ONE_CLASS", INT2FIX(ONE_CLASS)); rb_define_const(mSVM, "EPSILON_SVR", INT2FIX(EPSILON_SVR)); rb_define_const(mSVM, "NU_SVR", INT2FIX(NU_SVR)); rb_define_const(mSVM, "LINEAR", INT2FIX(LINEAR)); rb_define_const(mSVM, "POLY", INT2FIX(POLY)); rb_define_const(mSVM, "RBF", INT2FIX(RBF)); rb_define_const(mSVM, "SIGMOID", INT2FIX(SIGMOID)); rb_define_singleton_method(cSVMParameter, "new", (VALUE (*) (...))svmpa_new, 0); rb_define_method(cSVMParameter, "degree", (VALUE (*) (...))svmpa_degree, 0); rb_define_method(cSVMParameter, "degree=", (VALUE (*) (...))svmpa_degreeeq, 1); rb_define_method(cSVMParameter, "gamma", (VALUE (*) (...))svmpa_gamma, 0); rb_define_method(cSVMParameter, "gamma=", (VALUE (*) (...))svmpa_gammaeq, 1); rb_define_method(cSVMParameter, "coef0", (VALUE (*) (...))svmpa_coef0, 0); rb_define_method(cSVMParameter, "coef0=", (VALUE (*) (...))svmpa_coef0eq, 1); rb_define_method(cSVMParameter, "probability", (VALUE (*) (...))svmpa_probability, 0); rb_define_method(cSVMParameter, "probability=", (VALUE (*) (...))svmpa_probabilityeq, 1); rb_define_method(cSVMParameter, "cache_size", (VALUE (*) (...))svmpa_cache_size, 0); rb_define_method(cSVMParameter, "cache_size=", (VALUE (*) (...))svmpa_cache_sizeeq, 1); rb_define_method(cSVMParameter, "eps", (VALUE (*) (...))svmpa_eps, 0); rb_define_method(cSVMParameter, "eps=", (VALUE (*) (...))svmpa_epseq, 1); rb_define_method(cSVMParameter, "C", (VALUE (*) (...))svmpa_C, 0); rb_define_method(cSVMParameter, "C=", (VALUE (*) (...))svmpa_Ceq, 1); rb_define_method(cSVMParameter, "nu", (VALUE (*) (...))svmpa_nu, 0); rb_define_method(cSVMParameter, "nu=", (VALUE (*) (...))svmpa_nueq, 1); rb_define_method(cSVMParameter, "p", (VALUE (*) (...))svmpa_p, 0); rb_define_method(cSVMParameter, "p=", (VALUE (*) (...))svmpa_peq, 1); rb_define_method(cSVMParameter, "kernel_type", (VALUE (*) (...))svmpa_kernel_type, 0); rb_define_method(cSVMParameter, "kernel_type=", (VALUE (*) (...))svmpa_kernel_typeeq, 1); rb_define_method(cSVMParameter, "svm_type", (VALUE (*) (...))svmpa_svm_type, 0); rb_define_method(cSVMParameter, "svm_type=", (VALUE (*) (...))svmpa_svm_typeeq, 1); /* rb_define_method(cSVMParameter, "_dump_data", (VALUE (*) (...))svmpa_svm_dump_data, 0); rb_define_method(cSVMParameter, "_load_data", (VALUE (*) (...))svmpa_svm_load_data, 1); */ rb_define_method(cSVMParameter, "_dump", (VALUE (*) (...))svmpa_svm_dump, 1); rb_define_singleton_method(cSVMParameter, "_load", (VALUE (*) (...))svmpa_svm_load, 1); #ifdef HAVE_DEFINE_ALLOC_FUNCTION rb_define_alloc_func(cSVMModel, svmpa_allocate); #endif /*rb_undef_alloc_func(cSVMModel); */ /* rb_add_method(cSVMModel, ID_ALLOCATOR, NEW_CFUNC(svmpa_allocate, 0), NOEX_PRIVATE | NOEX_CFUNC); */ /* rb_define_singleton_method(cSVMModel, "allocate", (VALUE (*) (...))svmpa_allocate, 1); rb_define_singleton_method(cSVMModel, "_alloc", (VALUE (*) (...))svmpa_allocate, 1); */ rb_define_singleton_method(cSVMModel, "new", (VALUE (*) (...))svmpm_new, 2); rb_define_method(cSVMModel, "predict", (VALUE (*) (...))svmpm_predict, 1); rb_define_method(cSVMModel, "predict_probability", (VALUE (*) (...))svmpm_predict_probability, 1); rb_define_method(cSVMModel, "predict_values", (VALUE (*) (...))svmpm_predict_values, 1); rb_define_method(cSVMModel, "svcount", (VALUE (*) (...))svmpm_svcount, 0); rb_define_method(cSVMModel, "save", (VALUE (*) (...))svmpm_save, 1); rb_define_singleton_method(cSVMModel, "load", (VALUE (*) (...))svmpm_load, 1); cMarshal = rb_const_get(rb_cObject, rb_intern("Marshal")); }