// Copyright (C) 2013 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #undef DLIB_SCAN_IMAGE_CuSTOM_ABSTRACT_Hh_ #ifdef DLIB_SCAN_IMAGE_CuSTOM_ABSTRACT_Hh_ #include <vector> #include "../matrix.h" #include "../geometry.h" #include "../image_processing/full_object_detection_abstract.h" namespace dlib { // ---------------------------------------------------------------------------------------- class example_feature_extractor { /*! WHAT THIS OBJECT REPRESENTS This object defines the interface a feature extractor must implement if it is to be used with the scan_image_custom object defined at the bottom of this file. In this case, the purpose of a feature extractor is to associated a complete feature vector with each rectangle in an image. In particular, each rectangle is scored by taking the dot product between this feature vector and a weight vector. If this score is greater than a threshold then the rectangle is output as a detection. !*/ public: template < typename image_type > void load ( const image_type& image, std::vector<rectangle>& candidate_objects ); /*! ensures - Loads the given image into this feature extractor. This means that subsequent calls to get_feature_vector() will return the feature vector corresponding to locations in the image given to load(). - #candidate_objects == a set of bounding boxes in the given image that might contain objects of interest. These are the locations that will be checked for the presents of objects when this feature extractor is used with the scan_image_custom object. !*/ void copy_configuration ( const feature_extractor& item ); /*! ensures - Copies all the state information of item into *this, except for state information populated by load(). More precisely, given two feature extractor objects S1 and S2, the following sequence of instructions should always result in both of them having the exact same state: S2.copy_configuration(S1); S1.load(img, temp); S2.load(img, temp); !*/ unsigned long get_num_dimensions ( ) const; /*! ensures - returns the dimensionality of the feature vectors output by this object. !*/ void get_feature_vector ( const rectangle& obj, matrix<double,0,1>& psi ) const; /*! requires - psi.size() >= get_num_dimensions() (i.e. psi must have preallocated its memory before this function is called) ensures - This function computes the feature vector associated with the given rectangle in obj. This rectangle is interpreted as a bounding box within the last image given to this->load() and a feature vector describing that bounding box is output into psi. - The feature vector is added into psi. That is, it does not overwrite the previous contents of psi, but instead, it adds the vector to psi. - The dimensionality of the vector added to psi is get_num_dimensions(). This means that elements of psi after psi(get_num_dimensions()-1) are not modified. - #psi.size() == psi.size() (i.e. this function does not change the size of the psi vector) !*/ double compute_object_score ( const matrix<double,0,1>& w, const rectangle& obj ) const; /*! requires - w.size() >= get_num_dimensions() ensures - This function returns the dot product between the feature vector for object box obj and the given w vector. That is, this function computes the same number as the following code snippet: matrix<double,0,1> psi(w.size()); psi = 0; get_feature_vector(obj, psi); return dot(psi, w); The point of the compute_object_score() routine is to compute this dot product in a much more efficient way than directly calling get_feature_vector() and dot(). Therefore, compute_object_score() is an optional function. If you can't think of a faster way to compute these scores then do not implement compute_object_score() and the scan_image_custom object will simply compute these scores for you. However, it is often the case that there is something clever you can do to make this computation faster. If that is the case, then you can provide an implementation of this function with your feature extractor and then scan_image_custom will use it instead of using the default calculation method shown in the above code snippet. !*/ }; // ---------------------------------------------------------------------------------------- void serialize( const feature_extractor& item, std::ostream& out ); /*! provides serialization support !*/ void deserialize( feature_extractor& item, std::istream& in ); /*! provides deserialization support !*/ // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- // ---------------------------------------------------------------------------------------- template < typename Feature_extractor_type > class scan_image_custom : noncopyable { /*! REQUIREMENTS ON Feature_extractor_type - must be an object with an interface compatible with the example_feature_extractor defined at the top of this file. INITIAL VALUE - is_loaded_with_image() == false WHAT THIS OBJECT REPRESENTS This object is a tool for running a classifier over an image with the goal of localizing each object present. The localization is in the form of the bounding box around each object of interest. Unlike the scan_image_pyramid and scan_image_boxes objects, this image scanner delegates all the work of constructing the object feature vector to its Feature_extractor_type template argument. That is, scan_image_custom simply asks the supplied feature extractor what boxes in the image we should investigate and then asks the feature extractor for the complete feature vector for each box. That is, scan_image_custom does not apply any kind of pyramiding or other higher level processing to the features coming out of the feature extractor. That means that when you use scan_image_custom it is completely up to you to define the feature vector used with each image box. THREAD SAFETY Concurrent access to an instance of this object is not safe and should be protected by a mutex lock except for the case where you are copying the configuration (via copy_configuration()) of a scan_image_custom object to many other threads. In this case, it is safe to copy the configuration of a shared object so long as no other operations are performed on it. !*/ public: typedef matrix<double,0,1> feature_vector_type; typedef Feature_extractor_type feature_extractor_type; scan_image_custom ( ); /*! ensures - this object is properly initialized !*/ template < typename image_type > void load ( const image_type& img ); /*! requires - image_type must be a type with the following properties: - image_type objects can be loaded into Feature_extractor_type objects via Feature_extractor_type::load(). ensures - #is_loaded_with_image() == true - Calls get_feature_extractor().load() on the given image. That is, we will have loaded the image into the feature extractor in this scan_image_custom object. We will also have stored the candidate object locations generated by the feature extractor and will scan over them when this->detect() is called. - This object is ready to run a classifier over img to detect object locations. Call detect() to do this. !*/ bool is_loaded_with_image ( ) const; /*! ensures - returns true if this object has been loaded with an image to process and false otherwise. !*/ const feature_extractor_type& get_feature_extractor ( ) const; /*! ensures - returns a const reference to the feature_extractor_type object used internally for local feature extraction. !*/ void copy_configuration( const feature_extractor_type& fe ); /*! ensures - This function performs the equivalent of get_feature_extractor().copy_configuration(fe) (i.e. this function allows you to configure the parameters of the underlying feature extractor used by a scan_image_custom object) !*/ void copy_configuration ( const scan_image_custom& item ); /*! ensures - Copies all the state information of item into *this, except for state information populated by load(). More precisely, given two scan_image_custom objects S1 and S2, the following sequence of instructions should always result in both of them having the exact same state: S2.copy_configuration(S1); S1.load(img); S2.load(img); !*/ long get_num_dimensions ( ) const; /*! ensures - returns the number of dimensions in the feature vector for a candidate object location. That is, this function returns get_feature_extractor().get_num_dimensions(). !*/ void detect ( const feature_vector_type& w, std::vector<std::pair<double, rectangle> >& dets, const double thresh ) const; /*! requires - w.size() >= get_num_dimensions() - is_loaded_with_image() == true ensures - Scans over all the candidate object locations produced by the feature extractor during image loading and stores all detections into #dets. - for all valid i: - #dets[i].second == The candidate object location which produced this detection. This rectangle gives the location of the detection. - #dets[i].first == The score for this detection. This value is equal to dot(w, feature vector for this candidate object location). - #dets[i].first >= thresh - #dets will be sorted in descending order. (i.e. #dets[i].first >= #dets[j].first for all i, and j>i) - Elements of w beyond index get_num_dimensions()-1 are ignored. I.e. only the first get_num_dimensions() are used. - Note that no form of non-max suppression is performed. If a locations has a score >= thresh then it is reported in #dets. !*/ void get_feature_vector ( const full_object_detection& obj, feature_vector_type& psi ) const; /*! requires - obj.num_parts() == 0 - is_loaded_with_image() == true - psi.size() >= get_num_dimensions() (i.e. psi must have preallocated its memory before this function is called) ensures - This function allows you to determine the feature vector used for a candidate object location output from detect(). Note that this vector is added to psi. Note also that you must use get_full_object_detection() to convert a rectangle from detect() into the needed full_object_detection. - The dimensionality of the vector added to psi is get_num_dimensions(). This means that elements of psi after psi(get_num_dimensions()-1) are not modified. - Since scan_image_custom only searches a limited set of object locations, not all possible rectangles can be output by detect(). So in the case where obj.get_rect() could not arise from a call to detect(), this function will map obj.get_rect() to the nearest possible rectangle and then add the feature vector for the mapped rectangle into #psi. - get_best_matching_rect(obj.get_rect()) == the rectangle obj.get_rect() gets mapped to for feature extraction. !*/ full_object_detection get_full_object_detection ( const rectangle& rect, const feature_vector_type& w ) const; /*! ensures - returns full_object_detection(rect) (This function is here only for compatibility with the scan_image_pyramid object) !*/ const rectangle get_best_matching_rect ( const rectangle& rect ) const; /*! requires - is_loaded_with_image() == true ensures - Since scan_image_custom only searches a limited set of object locations, not all possible rectangles can be represented. Therefore, this function allows you to supply a rectangle and obtain the nearest possible candidate object location rectangle. !*/ unsigned long get_num_detection_templates ( ) const { return 1; } /*! ensures - returns 1. Note that this function is here only for compatibility with the scan_image_pyramid object. Notionally, its return value indicates that a scan_image_custom object is always ready to detect objects once an image has been loaded. !*/ unsigned long get_num_movable_components_per_detection_template ( ) const { return 0; } /*! ensures - returns 0. Note that this function is here only for compatibility with the scan_image_pyramid object. Its return value means that this object does not support using movable part models. !*/ }; // ---------------------------------------------------------------------------------------- template <typename T> void serialize ( const scan_image_custom<T>& item, std::ostream& out ); /*! provides serialization support !*/ template <typename T> void deserialize ( scan_image_custom<T>& item, std::istream& in ); /*! provides deserialization support !*/ // ---------------------------------------------------------------------------------------- } #endif // DLIB_SCAN_IMAGE_CuSTOM_ABSTRACT_Hh_