// Copyright (C) 2011 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #undef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_ #ifdef DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_ #include "structural_svm_object_detection_problem_abstract.h" #include "../image_processing/object_detector_abstract.h" #include "../image_processing/box_overlap_testing_abstract.h" #include "../image_processing/full_object_detection_abstract.h" namespace dlib { // ---------------------------------------------------------------------------------------- template < typename image_scanner_type > class structural_object_detection_trainer : noncopyable { /*! REQUIREMENTS ON image_scanner_type image_scanner_type must be an implementation of dlib/image_processing/scan_fhog_pyramid_abstract.h or dlib/image_processing/scan_image_custom_abstract.h or dlib/image_processing/scan_image_pyramid_abstract.h or dlib/image_processing/scan_image_boxes_abstract.h WHAT THIS OBJECT REPRESENTS This object is a tool for learning to detect objects in images based on a set of labeled images. The training procedure produces an object_detector which can be used to predict the locations of objects in new images. Note that this is just a convenience wrapper around the structural_svm_object_detection_problem to make it look similar to all the other trainers in dlib. !*/ public: typedef double scalar_type; typedef default_memory_manager mem_manager_type; typedef object_detector<image_scanner_type> trained_function_type; explicit structural_object_detection_trainer ( const image_scanner_type& scanner ); /*! requires - scanner.get_num_detection_templates() > 0 ensures - #get_c() == 1 - this object isn't verbose - #get_epsilon() == 0.1 - #get_num_threads() == 2 - #get_max_cache_size() == 5 - #get_match_eps() == 0.5 - #get_loss_per_missed_target() == 1 - #get_loss_per_false_alarm() == 1 - This object will attempt to learn a model for the given scanner object when train() is called. - #get_scanner() == scanner (note that only the "configuration" of scanner is copied. I.e. the copy is done using copy_configuration()) - #auto_set_overlap_tester() == true !*/ const image_scanner_type& get_scanner ( ) const; /*! ensures - returns the image scanner used by this object. !*/ bool auto_set_overlap_tester ( ) const; /*! ensures - if (this object will automatically determine an appropriate state for the overlap tester used for non-max suppression.) then - returns true - In this case, it is determined using the find_tight_overlap_tester() routine based on the truth_object_detections given to the structural_object_detection_trainer::train() method. - else - returns false !*/ void set_overlap_tester ( const test_box_overlap& tester ); /*! ensures - #get_overlap_tester() == tester - #auto_set_overlap_tester() == false !*/ test_box_overlap get_overlap_tester ( ) const; /*! requires - auto_set_overlap_tester() == false ensures - returns the overlap tester object which will be used to perform non-max suppression. In particular, this function returns the overlap tester which will populate the object_detector returned by train(). !*/ void set_num_threads ( unsigned long num ); /*! ensures - #get_num_threads() == num !*/ unsigned long get_num_threads ( ) const; /*! ensures - returns the number of threads used during training. You should usually set this equal to the number of processing cores on your machine. !*/ void set_epsilon ( scalar_type eps ); /*! requires - eps > 0 ensures - #get_epsilon() == eps !*/ const scalar_type get_epsilon ( ) const; /*! ensures - returns the error epsilon that determines when training should stop. Smaller values may result in a more accurate solution but take longer to train. You can think of this epsilon value as saying "solve the optimization problem until the average loss per sample is within epsilon of its optimal value". !*/ void set_max_cache_size ( unsigned long max_size ); /*! ensures - #get_max_cache_size() == max_size !*/ unsigned long get_max_cache_size ( ) const; /*! ensures - During training, this object basically runs the object detector on each image, over and over. To speed this up, it is possible to cache the results of these detector invocations. This function returns the number of cache elements per training sample kept in the cache. Note that a value of 0 means caching is not used at all. Note also that each cache element takes up about sizeof(double)*scanner.get_num_dimensions() memory (where scanner is the scanner given to this object's constructor). !*/ void be_verbose ( ); /*! ensures - This object will print status messages to standard out so that a user can observe the progress of the algorithm. !*/ void be_quiet ( ); /*! ensures - this object will not print anything to standard out !*/ void set_oca ( const oca& item ); /*! ensures - #get_oca() == item !*/ const oca get_oca ( ) const; /*! ensures - returns a copy of the optimizer used to solve the structural SVM problem. !*/ void set_c ( scalar_type C ); /*! requires - C > 0 ensures - #get_c() = C !*/ const scalar_type get_c ( ) const; /*! ensures - returns the SVM regularization parameter. It is the parameter that determines the trade-off between trying to fit the training data (i.e. minimize the loss) or allowing more errors but hopefully improving the generalization of the resulting detector. Larger values encourage exact fitting while smaller values of C may encourage better generalization. !*/ void set_match_eps ( double eps ); /*! requires - 0 < eps < 1 ensures - #get_match_eps() == eps !*/ double get_match_eps ( ) const; /*! ensures - returns the amount of alignment necessary for a detection to be considered as matching with a ground truth rectangle. If it doesn't match then it is considered to be a false alarm. To define this precisely, let A and B be two rectangles, then A and B match if and only if: A.intersect(B).area()/(A+B).area() > get_match_eps() !*/ double get_loss_per_missed_target ( ) const; /*! ensures - returns the amount of loss experienced for failing to detect one of the targets. If you care more about finding targets than having a low false alarm rate then you can increase this value. !*/ void set_loss_per_missed_target ( double loss ); /*! requires - loss > 0 ensures - #get_loss_per_missed_target() == loss !*/ double get_loss_per_false_alarm ( ) const; /*! ensures - returns the amount of loss experienced for emitting a false alarm detection. Or in other words, the loss for generating a detection that doesn't correspond to one of the truth rectangles. If you care more about having a low false alarm rate than finding all the targets then you can increase this value. !*/ void set_loss_per_false_alarm ( double loss ); /*! requires - loss > 0 ensures - #get_loss_per_false_alarm() == loss !*/ template < typename image_array_type > const trained_function_type train ( const image_array_type& images, const std::vector<std::vector<full_object_detection> >& truth_object_detections ) const; /*! requires - is_learning_problem(images, truth_object_detections) == true - it must be valid to pass images[0] into the image_scanner_type::load() method. (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h) - for all valid i, j: - truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template() - all_parts_in_rect(truth_object_detections[i][j]) == true ensures - Uses the structural_svm_object_detection_problem to train an object_detector on the given images and truth_object_detections. - returns a function F with the following properties: - F(new_image) == A prediction of what objects are present in new_image. This is a set of rectangles indicating their positions. !*/ template < typename image_array_type > const trained_function_type train ( const image_array_type& images, const std::vector<std::vector<rectangle> >& truth_object_detections ) const; /*! requires - is_learning_problem(images, truth_object_detections) == true - it must be valid to pass images[0] into the image_scanner_type::load() method. (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h) - get_scanner().get_num_movable_components_per_detection_template() == 0 ensures - This function is identical to the above train(), except that it converts each element of truth_object_detections into a full_object_detection by passing it to full_object_detection's constructor taking only a rectangle. Therefore, this version of train() is a convenience function for for the case where you don't have any movable components of the detection templates. !*/ template < typename image_array_type > const trained_function_type train ( const image_array_type& images, const std::vector<std::vector<full_object_detection> >& truth_object_detections, const std::vector<std::vector<rectangle> >& ignore, const test_box_overlap& ignore_overlap_tester = test_box_overlap() ) const; /*! requires - is_learning_problem(images, truth_object_detections) == true - it must be valid to pass images[0] into the image_scanner_type::load() method. (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h) - ignore.size() == images.size() - for all valid i, j: - truth_object_detections[i][j].num_parts() == get_scanner().get_num_movable_components_per_detection_template() - all_parts_in_rect(truth_object_detections[i][j]) == true ensures - Uses the structural_svm_object_detection_problem to train an object_detector on the given images and truth_object_detections. - for all valid i: - Within images[i] any detections that match against a rectangle in ignore[i], according to ignore_overlap_tester, are ignored. That is, the optimizer doesn't care if the detector outputs a detection that matches any of the ignore rectangles or if it fails to output a detection for an ignore rectangle. Therefore, if there are objects in your dataset that you are unsure if you want to detect or otherwise don't care if the detector gets or doesn't then you can mark them with ignore rectangles and the optimizer will simply ignore them. - returns a function F with the following properties: - F(new_image) == A prediction of what objects are present in new_image. This is a set of rectangles indicating their positions. !*/ template < typename image_array_type > const trained_function_type train ( const image_array_type& images, const std::vector<std::vector<rectangle> >& truth_object_detections, const std::vector<std::vector<rectangle> >& ignore, const test_box_overlap& ignore_overlap_tester = test_box_overlap() ) const; /*! requires - is_learning_problem(images, truth_object_detections) == true - ignore.size() == images.size() - it must be valid to pass images[0] into the image_scanner_type::load() method. (also, image_array_type must be an implementation of dlib/array/array_kernel_abstract.h) - get_scanner().get_num_movable_components_per_detection_template() == 0 ensures - This function is identical to the above train(), except that it converts each element of truth_object_detections into a full_object_detection by passing it to full_object_detection's constructor taking only a rectangle. Therefore, this version of train() is a convenience function for for the case where you don't have any movable components of the detection templates. !*/ }; // ---------------------------------------------------------------------------------------- } #endif // DLIB_STRUCTURAL_OBJECT_DETECTION_TRAiNER_H_ABSTRACTh_