// Copyright (C) 2013 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_ #define DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_ #include "vector_normalizer_frobmetric_abstract.h" #include "../matrix.h" #include "../optimization.h" namespace dlib { // ---------------------------------------------------------------------------------------- template < typename matrix_type > struct frobmetric_training_sample { matrix_type anchor_vect; std::vector<matrix_type> near_vects; std::vector<matrix_type> far_vects; unsigned long num_triples ( ) const { return near_vects.size() * far_vects.size(); } void clear() { near_vects.clear(); far_vects.clear(); } }; template < typename matrix_type > void serialize(const frobmetric_training_sample<matrix_type>& item, std::ostream& out) { int version = 1; serialize(version, out); serialize(item.anchor_vect, out); serialize(item.near_vects, out); serialize(item.far_vects, out); } template < typename matrix_type > void deserialize(frobmetric_training_sample<matrix_type>& item, std::istream& in) { int version = 0; deserialize(version, in); if (version != 1) throw serialization_error("Unexpected version found while deserializing dlib::frobmetric_training_sample."); deserialize(item.anchor_vect, in); deserialize(item.near_vects, in); deserialize(item.far_vects, in); } // ---------------------------------------------------------------------------------------- template < typename matrix_type > class vector_normalizer_frobmetric { public: typedef typename matrix_type::mem_manager_type mem_manager_type; typedef typename matrix_type::type scalar_type; typedef matrix_type result_type; private: struct compact_frobmetric_training_sample { std::vector<matrix_type> near_vects; std::vector<matrix_type> far_vects; }; struct objective { objective ( const std::vector<compact_frobmetric_training_sample>& samples_, matrix<double,0,0,mem_manager_type>& Aminus_, const matrix<double,0,1,mem_manager_type>& bias_ ) : samples(samples_), Aminus(Aminus_), bias(bias_) {} double operator()(const matrix<double,0,1,mem_manager_type>& u) const { long idx = 0; const long dims = samples[0].far_vects[0].size(); // Here we compute \hat A from the paper, which we refer to as just A in // the code. matrix<double,0,0,mem_manager_type> A(dims,dims); A = 0; std::vector<double> ufar, unear; for (unsigned long i = 0; i < samples.size(); ++i) { ufar.assign(samples[i].far_vects.size(),0); unear.assign(samples[i].near_vects.size(),0); for (unsigned long j = 0; j < unear.size(); ++j) { for (unsigned long k = 0; k < ufar.size(); ++k) { const double val = u(idx++); ufar[k] -= val; unear[j] += val; } } for (unsigned long j = 0; j < unear.size(); ++j) A += unear[j]*samples[i].near_vects[j]*trans(samples[i].near_vects[j]); for (unsigned long j = 0; j < ufar.size(); ++j) A += ufar[j]*samples[i].far_vects[j]*trans(samples[i].far_vects[j]); } eigenvalue_decomposition<matrix<double,0,0,mem_manager_type> > ed(make_symmetric(A)); Aminus = ed.get_pseudo_v()*diagm(upperbound(ed.get_real_eigenvalues(),0))*trans(ed.get_pseudo_v()); // Do this to avoid numeric instability later on since the above // computation can make Aminus slightly non-symmetric. Aminus = make_symmetric(Aminus); return dot(u,bias) - 0.5*sum(squared(Aminus)); } private: const std::vector<compact_frobmetric_training_sample>& samples; matrix<double,0,0,mem_manager_type>& Aminus; const matrix<double,0,1,mem_manager_type>& bias; }; struct derivative { derivative ( unsigned long num_triples_, const std::vector<compact_frobmetric_training_sample>& samples_, matrix<double,0,0,mem_manager_type>& Aminus_, const matrix<double,0,1,mem_manager_type>& bias_ ) : num_triples(num_triples_), samples(samples_), Aminus(Aminus_), bias(bias_) {} matrix<double,0,1,mem_manager_type> operator()(const matrix<double,0,1,mem_manager_type>& ) const { // Note that Aminus is a function of u (the argument to this function), but // since Aminus will have been computed already by the most recent call to // the objective function we don't need to do anything with u. We can just // use Aminus right away. matrix<double,0,1,mem_manager_type> grad(num_triples); long idx = 0; std::vector<double> ufar, unear; for (unsigned long i = 0; i < samples.size(); ++i) { ufar.resize(samples[i].far_vects.size()); unear.resize(samples[i].near_vects.size()); for (unsigned long j = 0; j < unear.size(); ++j) unear[j] = sum(pointwise_multiply(Aminus, samples[i].near_vects[j]*trans(samples[i].near_vects[j]))); for (unsigned long j = 0; j < ufar.size(); ++j) ufar[j] = sum(pointwise_multiply(Aminus, samples[i].far_vects[j]*trans(samples[i].far_vects[j]))); for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j) { for (unsigned long k = 0; k < samples[i].far_vects.size(); ++k) { grad(idx) = bias(idx) + ufar[k]-unear[j]; idx++; } } } return grad; } private: const unsigned long num_triples; const std::vector<compact_frobmetric_training_sample>& samples; matrix<double,0,0,mem_manager_type>& Aminus; const matrix<double,0,1,mem_manager_type>& bias; }; class custom_stop_strategy { public: custom_stop_strategy( double C_, double eps_, bool be_verbose_, unsigned long max_iter_ ) { _c = C_; _cur_iter = 0; _gradient_thresh = eps_; _max_iter = max_iter_; _verbose = be_verbose_; } template <typename T> bool should_continue_search ( const T& u, const double , const T& grad ) { ++_cur_iter; double max_gradient = 0; for (long i = 0; i < grad.size(); ++i) { const bool at_lower_bound = (0 >= u(i) && grad(i) > 0); const bool at_upper_bound = (_c/grad.size() <= u(i) && grad(i) < 0); if (!at_lower_bound && !at_upper_bound) max_gradient = std::max(std::abs(grad(i)), max_gradient); } if (_verbose) { std::cout << "iteration: " << _cur_iter << " max_gradient: "<< max_gradient << std::endl; } // Only stop when the largest non-bound-constrained element of the gradient // is lower than the threshold. if (max_gradient < _gradient_thresh) return false; // Check if we have hit the max allowable number of iterations. if (_cur_iter > _max_iter) { return false; } return true; } private: bool _verbose; unsigned long _max_iter; unsigned long _cur_iter; double _c; double _gradient_thresh; }; public: vector_normalizer_frobmetric ( ) { verbose = false; eps = 0.1; C = 1; max_iter = 5000; _use_identity_matrix_prior = false; } bool uses_identity_matrix_prior ( ) const { return _use_identity_matrix_prior; } void set_uses_identity_matrix_prior ( bool use_prior ) { _use_identity_matrix_prior = use_prior; } void be_verbose( ) { verbose = true; } void set_epsilon ( double eps_ ) { // make sure requires clause is not broken DLIB_ASSERT(eps_ > 0, "\t void vector_normalizer_frobmetric::set_epsilon(eps_)" << "\n\t invalid inputs were given to this function" << "\n\t eps: " << eps_ ); eps = eps_; } double get_epsilon ( ) const { return eps; } void set_c ( double C_ ) { // make sure requires clause is not broken DLIB_ASSERT(C_ > 0, "\t void vector_normalizer_frobmetric::set_c()" << "\n\t C_ must be greater than 0" << "\n\t C_: " << C_ << "\n\t this: " << this ); C = C_; } void set_max_iterations ( unsigned long max_iterations ) { max_iter = max_iterations; } unsigned long get_max_iterations ( ) const { return max_iter; } double get_c ( ) const { return C; } void be_quiet ( ) { verbose = false; } void train ( const std::vector<frobmetric_training_sample<matrix_type> >& samples ) { // make sure requires clause is not broken DLIB_ASSERT(samples.size() > 0, "\tvoid vector_normalizer_frobmetric::train()" << "\n\t you have to give a nonempty set of samples to this function" ); #ifdef ENABLE_ASSERTS { const long dims = samples[0].anchor_vect.size(); DLIB_ASSERT(dims != 0, "\tvoid vector_normalizer_frobmetric::train()" << "\n\t The dimension of the input vectors can't be zero." ); for (unsigned long i = 0; i < samples.size(); ++i) { DLIB_ASSERT(is_col_vector(samples[i].anchor_vect), "\tvoid vector_normalizer_frobmetric::train()" << "\n\t Invalid inputs were given to this function." << "\n\t i: " << i ); DLIB_ASSERT(samples[i].anchor_vect.size() == dims, "\tvoid vector_normalizer_frobmetric::train()" << "\n\t Invalid inputs were given to this function." << "\n\t i: " << i << "\n\t dims: " << dims << "\n\t samples[i].anchor_vect.size(): " << samples[i].anchor_vect.size() ); DLIB_ASSERT(samples[i].num_triples() != 0, "\tvoid vector_normalizer_frobmetric::train()" << "\n\t It is illegal for a training sample to have no data in it" << "\n\t i: " << i ); for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j) { DLIB_ASSERT(is_col_vector(samples[i].near_vects[j]), "\tvoid vector_normalizer_frobmetric::train()" << "\n\t Invalid inputs were given to this function." << "\n\t i: " << i << "\n\t j: " << j ); DLIB_ASSERT(samples[i].near_vects[j].size() == dims, "\tvoid vector_normalizer_frobmetric::train()" << "\n\t Invalid inputs were given to this function." << "\n\t i: " << i << "\n\t j: " << j << "\n\t dims: " << dims << "\n\t samples[i].near_vects[j].size(): " << samples[i].near_vects[j].size() ); } for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j) { DLIB_ASSERT(is_col_vector(samples[i].far_vects[j]), "\tvoid vector_normalizer_frobmetric::train()" << "\n\t Invalid inputs were given to this function." << "\n\t i: " << i << "\n\t j: " << j ); DLIB_ASSERT(samples[i].far_vects[j].size() == dims, "\tvoid vector_normalizer_frobmetric::train()" << "\n\t Invalid inputs were given to this function." << "\n\t i: " << i << "\n\t j: " << j << "\n\t dims: " << dims << "\n\t samples[i].far_vects[j].size(): " << samples[i].far_vects[j].size() ); } } } #endif // end ENABLE_ASSERTS // compute the mean sample m = 0; for (unsigned long i = 0; i < samples.size(); ++i) m += samples[i].anchor_vect; m /= samples.size(); DLIB_ASSERT(is_finite(m), "Some of the input vectors to vector_normalizer_frobmetric::train() have infinite or NaN values"); // Now we need to find tform. So we setup the optimization problem and run it // over the next few lines of code. unsigned long num_triples = 0; for (unsigned long i = 0; i < samples.size(); ++i) num_triples += samples[i].near_vects.size()*samples[i].far_vects.size(); matrix<double,0,1,mem_manager_type> u(num_triples); matrix<double,0,1,mem_manager_type> bias(num_triples); u = 0; bias = 1; // precompute all the anchor_vect to far_vects/near_vects pairs std::vector<compact_frobmetric_training_sample> data(samples.size()); unsigned long cnt = 0; std::vector<double> far_norm, near_norm; for (unsigned long i = 0; i < data.size(); ++i) { far_norm.clear(); near_norm.clear(); data[i].far_vects.reserve(samples[i].far_vects.size()); data[i].near_vects.reserve(samples[i].near_vects.size()); for (unsigned long j = 0; j < samples[i].far_vects.size(); ++j) { data[i].far_vects.push_back(samples[i].anchor_vect - samples[i].far_vects[j]); if (_use_identity_matrix_prior) far_norm.push_back(length_squared(data[i].far_vects.back())); } for (unsigned long j = 0; j < samples[i].near_vects.size(); ++j) { data[i].near_vects.push_back(samples[i].anchor_vect - samples[i].near_vects[j]); if (_use_identity_matrix_prior) near_norm.push_back(length_squared(data[i].near_vects.back())); } // Note that this loop only executes if _use_identity_matrix_prior == true. for (unsigned long j = 0; j < near_norm.size(); ++j) { for (unsigned long k = 0; k < far_norm.size(); ++k) { bias(cnt++) = 1 - (far_norm[k] - near_norm[j]); } } } // Now run the main part of the algorithm matrix<double,0,0,mem_manager_type> Aminus; find_max_box_constrained(lbfgs_search_strategy(10), custom_stop_strategy(C, eps, verbose, max_iter), objective(data, Aminus, bias), derivative(num_triples, data, Aminus, bias), u, 0, C/num_triples); // What we need is the optimal Aminus which is a function of u. So we already // have what we need and just need to put it into tform. eigenvalue_decomposition<matrix<double,0,0,mem_manager_type> > ed(make_symmetric(-Aminus)); matrix<double,0,1,mem_manager_type> eigs = ed.get_real_eigenvalues(); // But first, discard the components that are zero to within the machine epsilon. const double tol = max(eigs)*std::numeric_limits<double>::epsilon(); for (long i = 0; i < eigs.size(); ++i) { if (eigs(i) < tol) eigs(i) = 0; } if (_use_identity_matrix_prior) tform = matrix_cast<scalar_type>(identity_matrix(Aminus) + diagm(sqrt(eigs))*trans(ed.get_pseudo_v())); else tform = matrix_cast<scalar_type>(diagm(sqrt(eigs))*trans(ed.get_pseudo_v())); // Pre-apply the transform to m so we don't have to do it inside operator() // every time it's called. m = tform*m; } long in_vector_size ( ) const { return m.nr(); } long out_vector_size ( ) const { return m.nr(); } const matrix<scalar_type,0,1,mem_manager_type>& transformed_means ( ) const { return m; } const matrix<scalar_type,0,0,mem_manager_type>& transform ( ) const { return tform; } const result_type& operator() ( const matrix_type& x ) const { // make sure requires clause is not broken DLIB_ASSERT(in_vector_size() != 0 && in_vector_size() == x.size() && is_col_vector(x) == true, "\tmatrix vector_normalizer_frobmetric::operator()" << "\n\t you have given invalid arguments to this function" << "\n\t in_vector_size(): " << in_vector_size() << "\n\t x.size(): " << x.size() << "\n\t is_col_vector(x): " << is_col_vector(x) << "\n\t this: " << this ); temp_out = tform*x-m; return temp_out; } template <typename mt> friend void deserialize ( vector_normalizer_frobmetric<mt>& item, std::istream& in ); template <typename mt> friend void serialize ( const vector_normalizer_frobmetric<mt>& item, std::ostream& out ); private: // ------------------- private data members ------------------- matrix_type m; matrix<scalar_type,0,0,mem_manager_type> tform; bool verbose; double eps; double C; unsigned long max_iter; bool _use_identity_matrix_prior; // This is just a temporary variable that doesn't contribute to the // state of this object. mutable matrix_type temp_out; }; // ---------------------------------------------------------------------------------------- template < typename matrix_type > void serialize ( const vector_normalizer_frobmetric<matrix_type>& item, std::ostream& out ) { const int version = 2; serialize(version, out); serialize(item.m, out); serialize(item.tform, out); serialize(item.verbose, out); serialize(item.eps, out); serialize(item.C, out); serialize(item.max_iter, out); serialize(item._use_identity_matrix_prior, out); } // ---------------------------------------------------------------------------------------- template < typename matrix_type > void deserialize ( vector_normalizer_frobmetric<matrix_type>& item, std::istream& in ) { int version = 0; deserialize(version, in); if (version != 1 && version != 2) throw serialization_error("Unsupported version found while deserializing dlib::vector_normalizer_frobmetric."); deserialize(item.m, in); deserialize(item.tform, in); deserialize(item.verbose, in); deserialize(item.eps, in); deserialize(item.C, in); deserialize(item.max_iter, in); if (version == 2) deserialize(item._use_identity_matrix_prior, in); else item._use_identity_matrix_prior = false; } // ---------------------------------------------------------------------------------------- } #endif // DLIB_VECTOR_NORMALIZER_FRoBMETRIC_Hh_