// Copyright (C) 2013 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #undef DLIB_fHOG_ABSTRACT_Hh_ #ifdef DLIB_fHOG_ABSTRACT_Hh_ #include "../matrix/matrix_abstract.h" #include "../array2d/array2d_kernel_abstract.h" #include "../array/array_kernel_abstract.h" #include "../image_processing/generic_image.h" namespace dlib { // ---------------------------------------------------------------------------------------- template < typename image_type, typename T, typename mm > void extract_fhog_features( const image_type& img, array2d<matrix<T,31,1>,mm>& hog, int cell_size = 8, int filter_rows_padding = 1, int filter_cols_padding = 1 ); /*! requires - cell_size > 0 - filter_rows_padding > 0 - filter_cols_padding > 0 - image_type == an image object that implements the interface defined in dlib/image_processing/generic_image.h - T should be float or double ensures - This function implements the HOG feature extraction method described in the paper: Object Detection with Discriminatively Trained Part Based Models by P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010 This means that it takes an input image img and outputs Felzenszwalb's 31 dimensional version of HOG features, which are stored into #hog. - The input image is broken into cells that are cell_size by cell_size pixels and within each cell we compute a 31 dimensional FHOG vector. This vector describes the gradient structure within the cell. - A common task is to convolve each channel of the hog image with a linear filter. This is made more convenient if the contents of #hog includes extra rows and columns of zero padding along the borders. This extra padding allows for more efficient convolution code since the code does not need to perform expensive boundary checking. Therefore, you can set filter_rows_padding and filter_cols_padding to indicate the size of the filter you wish to use and this function will ensure #hog has the appropriate extra zero padding along the borders. In particular, it will include the following extra padding: - (filter_rows_padding-1)/2 extra rows of zeros on the top of #hog. - (filter_cols_padding-1)/2 extra columns of zeros on the left of #hog. - filter_rows_padding/2 extra rows of zeros on the bottom of #hog. - filter_cols_padding/2 extra columns of zeros on the right of #hog. Therefore, the extra padding is done such that functions like spatially_filter_image() apply their filters to the entire content containing area of a hog image (note that you should use the following planar version of extract_fhog_features() instead of the interlaced version if you want to use spatially_filter_image() on a hog image). - #hog.nr() == max(round(img.nr()/(double)cell_size)-2,0) + filter_rows_padding-1. - #hog.nc() == max(round(img.nc()/(double)cell_size)-2,0) + filter_cols_padding-1. (i.e. Each output dimension is roughly 1/cell_size the original size but there is a one cell_size border all around the image that is lost and then we add on any additional padding that is requested.) - for all valid r and c: - #hog[r][c] == the FHOG vector describing the cell centered at the pixel location fhog_to_image(point(c,r),cell_size,filter_rows_padding,filter_cols_padding) in img. !*/ // ---------------------------------------------------------------------------------------- template < typename image_type, typename T, typename mm1, typename mm2 > void extract_fhog_features( const image_type& img, dlib::array<array2d<T,mm1>,mm2>& hog, int cell_size = 8, int filter_rows_padding = 1, int filter_cols_padding = 1 ); /*! requires - cell_size > 0 - filter_rows_padding > 0 - filter_cols_padding > 0 - image_type == an image object that implements the interface defined in dlib/image_processing/generic_image.h - T should be float or double ensures - This function is identical to the above extract_fhog_features() routine except that it outputs the results in a planar format rather than the interlaced format used above. That is, each element of the hog vector is placed into one of 31 images inside #hog. To be precise, if vhog is the output of the above interlaced version of extract_fhog_features() then we will have, for all valid r and c: - #hog[i][r][c] == vhog[r][c](i) (where 0 <= i < 31) - #hog.size() == 31 - for all valid i: - #hog[i].nr() == hog[0].nr() - #hog[i].nc() == hog[0].nc() !*/ // ---------------------------------------------------------------------------------------- template < typename image_type > matrix<double,0,1> extract_fhog_features( const image_type& img, int cell_size = 8, int filter_rows_padding = 1, int filter_cols_padding = 1 ); /*! requires - cell_size > 0 - filter_rows_padding > 0 - filter_cols_padding > 0 - image_type == an image object that implements the interface defined in dlib/image_processing/generic_image.h ensures - This function calls the above extract_fhog_features() routine and simply packages the entire output into a dlib::matrix. The matrix is constructed using the planar version of extract_fhog_features() and then each output plane is converted into a column vector and subsequently all 31 column vectors are concatenated together and returned. - Each plane is converted into a column vector using reshape_to_column_vector(), and is therefore represented in row major order inside the returned vector. - If H is the array<array2d<double>> object output by the planar extract_fhog_features() then the returned vector is composed by concatenating H[0], then H[1], then H[2], and so on in ascending index order. !*/ // ---------------------------------------------------------------------------------------- template < typename image_type, typename T > void extract_fhog_features( const image_type& img, matrix<T,0,1>& feats, int cell_size = 8, int filter_rows_padding = 1, int filter_cols_padding = 1 ); /*! requires - cell_size > 0 - filter_rows_padding > 0 - filter_cols_padding > 0 - image_type == an image object that implements the interface defined in dlib/image_processing/generic_image.h - T is float, double, or long double ensures - This function is identical to the above version of extract_fhog_features() that returns a matrix<double,0,1> except that it returns the matrix here through a reference argument instead of returning it by value. !*/ // ---------------------------------------------------------------------------------------- inline point image_to_fhog ( point p, int cell_size = 8, int filter_rows_padding = 1, int filter_cols_padding = 1 ); /*! requires - cell_size > 0 - filter_rows_padding > 0 - filter_cols_padding > 0 ensures - When using extract_fhog_features(), each FHOG cell is extracted from a certain region in the input image. image_to_fhog() returns the identity of the FHOG cell containing the image pixel at location p. Or in other words, let P == image_to_fhog(p) and hog be a FHOG feature map output by extract_fhog_features(), then hog[P.y()][P.x()] == the FHOG vector/cell containing the point p in the input image. Note that some image points might not have corresponding feature locations. E.g. border points or points outside the image. In these cases the returned point will be outside the input image. - Note that you should use the same values of cell_size, filter_rows_padding, and filter_cols_padding that you used with extract_fhog_features(). !*/ // ---------------------------------------------------------------------------------------- inline rectangle image_to_fhog ( const rectangle& rect, int cell_size = 8, int filter_rows_padding = 1, int filter_cols_padding = 1 ); /*! requires - cell_size > 0 - filter_rows_padding > 0 - filter_cols_padding > 0 ensures - maps a rectangle from image space to fhog space. In particular this function returns: rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding)) !*/ // ---------------------------------------------------------------------------------------- inline point fhog_to_image ( point p, int cell_size = 8, int filter_rows_padding = 1, int filter_cols_padding = 1 ); /*! requires - cell_size > 0 - filter_rows_padding > 0 - filter_cols_padding > 0 ensures - Maps a pixel in a FHOG image (produced by extract_fhog_features()) back to the corresponding original input pixel. Note that since FHOG images are spatially downsampled by aggregation into cells the mapping is not totally invertible. Therefore, the returned location will be the center of the cell in the original image that contained the FHOG vector at position p. Moreover, cell_size, filter_rows_padding, and filter_cols_padding should be set to the values used by the call to extract_fhog_features(). - Mapping from fhog space to image space is an invertible transformation. That is, for any point P we have P == image_to_fhog(fhog_to_image(P,cell_size,filter_rows_padding,filter_cols_padding), cell_size,filter_rows_padding,filter_cols_padding). !*/ // ---------------------------------------------------------------------------------------- inline rectangle fhog_to_image ( const rectangle& rect, int cell_size = 8, int filter_rows_padding = 1, int filter_cols_padding = 1 ); /*! requires - cell_size > 0 - filter_rows_padding > 0 - filter_cols_padding > 0 ensures - maps a rectangle from fhog space to image space. In particular this function returns: rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding)) - Mapping from fhog space to image space is an invertible transformation. That is, for any rectangle R we have R == image_to_fhog(fhog_to_image(R,cell_size,filter_rows_padding,filter_cols_padding), cell_size,filter_rows_padding,filter_cols_padding). !*/ // ---------------------------------------------------------------------------------------- template < typename T, typename mm1, typename mm2 > matrix<unsigned char> draw_fhog( const dlib::array<array2d<T,mm1>,mm2>& hog, const long cell_draw_size = 15, const float min_response_threshold = 0.0 ); /*! requires - cell_draw_size > 0 - hog.size() == 31 ensures - Interprets hog as a FHOG feature map output by extract_fhog_features() and converts it into an image suitable for display on the screen. In particular, we draw all the hog cells into a grayscale image in a way that shows the magnitude and orientation of the gradient energy in each cell. The result is then returned. - The size of the cells in the output image will be rendered as cell_draw_size pixels wide and tall. - HOG cells with a response value less than min_response_threshold are not drawn. !*/ // ---------------------------------------------------------------------------------------- template < typename T > matrix<unsigned char> draw_fhog ( const std::vector<matrix<T> >& hog, const long cell_draw_size = 15, const float min_response_threshold = 0.0 ); /*! requires - cell_draw_size > 0 - hog.size() == 31 ensures - This function just converts the given hog object into an array<array2d<T>> and passes it to the above draw_fhog() routine and returns the results. - HOG cells with a response value less than min_response_threshold are not drawn. !*/ // ---------------------------------------------------------------------------------------- template < typename T, typename mm > matrix<unsigned char> draw_fhog( const array2d<matrix<T,31,1>,mm>& hog, const long cell_draw_size = 15, const float min_response_threshold = 0.0 ); /*! requires - cell_draw_size > 0 ensures - Interprets hog as a FHOG feature map output by extract_fhog_features() and converts it into an image suitable for display on the screen. In particular, we draw all the hog cells into a grayscale image in a way that shows the magnitude and orientation of the gradient energy in each cell. The result is then returned. - The size of the cells in the output image will be rendered as cell_draw_size pixels wide and tall. - HOG cells with a response value less than min_response_threshold are not drawn. !*/ // ---------------------------------------------------------------------------------------- } #endif // DLIB_fHOG_ABSTRACT_Hh_