dlib C++ Library - fhog

// Copyright (C) 2013  Davis E. King (davis@dlib.net)
// License: Boost Software License   See LICENSE.txt for the full license.
#undef DLIB_fHOG_ABSTRACT_Hh_
#ifdef DLIB_fHOG_ABSTRACT_Hh_

#include "../matrix/matrix_abstract.h"
#include "../array2d/array2d_kernel_abstract.h"
#include "../array/array_kernel_abstract.h"
#include "../image_processing/generic_image.h"

namespace dlib
{

// ----------------------------------------------------------------------------------------

    template <
        typename image_type, 
        typename T, 
        typename mm
        >
    void extract_fhog_features(
        const image_type& img, 
        array2d<matrix<T,31,1>,mm>& hog, 
        int cell_size = 8,
        int filter_rows_padding = 1,
        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
            - filter_rows_padding > 0
            - filter_cols_padding > 0
            - image_type == an image object that implements the interface defined in
              dlib/image_processing/generic_image.h 
            - T should be float or double
        ensures
            - This function implements the HOG feature extraction method described in 
              the paper:
                Object Detection with Discriminatively Trained Part Based Models by
                P. Felzenszwalb, R. Girshick, D. McAllester, D. Ramanan
                IEEE Transactions on Pattern Analysis and Machine Intelligence, Vol. 32, No. 9, Sep. 2010
              This means that it takes an input image img and outputs Felzenszwalb's
              31 dimensional version of HOG features, which are stored into #hog.
            - The input image is broken into cells that are cell_size by cell_size pixels
              and within each cell we compute a 31 dimensional FHOG vector.  This vector
              describes the gradient structure within the cell.  
            - A common task is to convolve each channel of the hog image with a linear
              filter.  This is made more convenient if the contents of #hog includes extra
              rows and columns of zero padding along the borders.  This extra padding
              allows for more efficient convolution code since the code does not need to
              perform expensive boundary checking.  Therefore, you can set
              filter_rows_padding and filter_cols_padding to indicate the size of the
              filter you wish to use and this function will ensure #hog has the appropriate
              extra zero padding along the borders.  In particular, it will include the
              following extra padding:
                - (filter_rows_padding-1)/2 extra rows of zeros on the top of #hog.
                - (filter_cols_padding-1)/2 extra columns of zeros on the left of #hog.
                - filter_rows_padding/2 extra rows of zeros on the bottom of #hog.
                - filter_cols_padding/2 extra columns of zeros on the right of #hog.
              Therefore, the extra padding is done such that functions like
              spatially_filter_image() apply their filters to the entire content containing
              area of a hog image (note that you should use the following planar version of
              extract_fhog_features() instead of the interlaced version if you want to use
              spatially_filter_image() on a hog image).
            - #hog.nr() == max(round(img.nr()/(double)cell_size)-2,0) + filter_rows_padding-1.
            - #hog.nc() == max(round(img.nc()/(double)cell_size)-2,0) + filter_cols_padding-1.
              (i.e.  Each output dimension is roughly 1/cell_size the original size but
              there is a one cell_size border all around the image that is lost and then we
              add on any additional padding that is requested.)
            - for all valid r and c:
                - #hog[r][c] == the FHOG vector describing the cell centered at the pixel location 
                  fhog_to_image(point(c,r),cell_size,filter_rows_padding,filter_cols_padding) in img.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename image_type,
        typename T, 
        typename mm1, 
        typename mm2
        >
    void extract_fhog_features(
        const image_type& img, 
        dlib::array<array2d<T,mm1>,mm2>& hog, 
        int cell_size = 8,
        int filter_rows_padding = 1,
        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
            - filter_rows_padding > 0
            - filter_cols_padding > 0
            - image_type == an image object that implements the interface defined in
              dlib/image_processing/generic_image.h 
            - T should be float or double
        ensures
            - This function is identical to the above extract_fhog_features() routine
              except that it outputs the results in a planar format rather than the
              interlaced format used above.  That is, each element of the hog vector is
              placed into one of 31 images inside #hog.  To be precise, if vhog is the
              output of the above interlaced version of extract_fhog_features() then we
              will have, for all valid r and c:
                - #hog[i][r][c] == vhog[r][c](i)
                  (where 0 <= i < 31)
            - #hog.size() == 31
            - for all valid i:
                - #hog[i].nr() == hog[0].nr()
                - #hog[i].nc() == hog[0].nc()
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename image_type
        >
    matrix<double,0,1> extract_fhog_features(
        const image_type& img, 
        int cell_size = 8,
        int filter_rows_padding = 1,
        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
            - filter_rows_padding > 0
            - filter_cols_padding > 0
            - image_type == an image object that implements the interface defined in
              dlib/image_processing/generic_image.h 
        ensures
            - This function calls the above extract_fhog_features() routine and simply
              packages the entire output into a dlib::matrix.  The matrix is constructed
              using the planar version of extract_fhog_features() and then each output
              plane is converted into a column vector and subsequently all 31 column
              vectors are concatenated together and returned.
            - Each plane is converted into a column vector using reshape_to_column_vector(), 
              and is therefore represented in row major order inside the returned vector.  
            - If H is the array<array2d<double>> object output by the planar
              extract_fhog_features() then the returned vector is composed by concatenating
              H[0], then H[1], then H[2], and so on in ascending index order.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename image_type,
        typename T
        >
    void extract_fhog_features(
        const image_type& img, 
        matrix<T,0,1>& feats,
        int cell_size = 8,
        int filter_rows_padding = 1,
        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
            - filter_rows_padding > 0
            - filter_cols_padding > 0
            - image_type == an image object that implements the interface defined in
              dlib/image_processing/generic_image.h 
            - T is float, double, or long double
        ensures
            - This function is identical to the above version of extract_fhog_features()
              that returns a matrix<double,0,1> except that it returns the matrix here
              through a reference argument instead of returning it by value.
    !*/

// ----------------------------------------------------------------------------------------

    inline point image_to_fhog (
        point p,
        int cell_size = 8,
        int filter_rows_padding = 1,
        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
            - filter_rows_padding > 0
            - filter_cols_padding > 0
        ensures
            - When using extract_fhog_features(), each FHOG cell is extracted from a
              certain region in the input image.  image_to_fhog() returns the identity of
              the FHOG cell containing the image pixel at location p.  Or in other words,
              let P == image_to_fhog(p) and hog be a FHOG feature map output by
              extract_fhog_features(), then hog[P.y()][P.x()] == the FHOG vector/cell
              containing the point p in the input image.  Note that some image points
              might not have corresponding feature locations.  E.g. border points or points
              outside the image.  In these cases the returned point will be outside the
              input image.
            - Note that you should use the same values of cell_size, filter_rows_padding,
              and filter_cols_padding that you used with extract_fhog_features().
    !*/

// ----------------------------------------------------------------------------------------

    inline rectangle image_to_fhog (
        const rectangle& rect,
        int cell_size = 8,
        int filter_rows_padding = 1,
        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
            - filter_rows_padding > 0
            - filter_cols_padding > 0
        ensures
            - maps a rectangle from image space to fhog space.  In particular this function returns:
              rectangle(image_to_fhog(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), 
                        image_to_fhog(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding))
    !*/

// ----------------------------------------------------------------------------------------

    inline point fhog_to_image (
        point p,
        int cell_size = 8,
        int filter_rows_padding = 1,
        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
            - filter_rows_padding > 0
            - filter_cols_padding > 0
        ensures
            - Maps a pixel in a FHOG image (produced by extract_fhog_features()) back to the
              corresponding original input pixel.  Note that since FHOG images are
              spatially downsampled by aggregation into cells the mapping is not totally
              invertible.  Therefore, the returned location will be the center of the cell
              in the original image that contained the FHOG vector at position p.  Moreover,
              cell_size, filter_rows_padding, and filter_cols_padding should be set to the
              values used by the call to extract_fhog_features().
            - Mapping from fhog space to image space is an invertible transformation.  That
              is, for any point P we have P == image_to_fhog(fhog_to_image(P,cell_size,filter_rows_padding,filter_cols_padding),
                                                             cell_size,filter_rows_padding,filter_cols_padding).
    !*/

// ----------------------------------------------------------------------------------------

    inline rectangle fhog_to_image (
        const rectangle& rect,
        int cell_size = 8,
        int filter_rows_padding = 1,
        int filter_cols_padding = 1
    );
    /*!
        requires
            - cell_size > 0
            - filter_rows_padding > 0
            - filter_cols_padding > 0
        ensures
            - maps a rectangle from fhog space to image space.  In particular this function returns:
              rectangle(fhog_to_image(rect.tl_corner(),cell_size,filter_rows_padding,filter_cols_padding), 
                        fhog_to_image(rect.br_corner(),cell_size,filter_rows_padding,filter_cols_padding))
            - Mapping from fhog space to image space is an invertible transformation.  That
              is, for any rectangle R we have R == image_to_fhog(fhog_to_image(R,cell_size,filter_rows_padding,filter_cols_padding),
                                                                 cell_size,filter_rows_padding,filter_cols_padding).
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename T, 
        typename mm1, 
        typename mm2
        >
    matrix<unsigned char> draw_fhog(
        const dlib::array<array2d<T,mm1>,mm2>& hog,
        const long cell_draw_size = 15,
        const float min_response_threshold = 0.0
    );
    /*!
        requires
            - cell_draw_size > 0
            - hog.size() == 31
        ensures
            - Interprets hog as a FHOG feature map output by extract_fhog_features() and
              converts it into an image suitable for display on the screen.  In particular,
              we draw all the hog cells into a grayscale image in a way that shows the
              magnitude and orientation of the gradient energy in each cell.  The result is
              then returned.
            - The size of the cells in the output image will be rendered as cell_draw_size 
              pixels wide and tall.
            - HOG cells with a response value less than min_response_threshold are not
              drawn.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename T
        >
    matrix<unsigned char> draw_fhog (
        const std::vector<matrix<T> >& hog,
        const long cell_draw_size = 15,
        const float min_response_threshold = 0.0
    );
    /*!
        requires
            - cell_draw_size > 0
            - hog.size() == 31
        ensures
            - This function just converts the given hog object into an array<array2d<T>>
              and passes it to the above draw_fhog() routine and returns the results.
            - HOG cells with a response value less than min_response_threshold are not
              drawn.
    !*/

// ----------------------------------------------------------------------------------------

    template <
        typename T, 
        typename mm
        >
    matrix<unsigned char> draw_fhog(
        const array2d<matrix<T,31,1>,mm>& hog,
        const long cell_draw_size = 15,
        const float min_response_threshold = 0.0
    );
    /*!
        requires
            - cell_draw_size > 0
        ensures
            - Interprets hog as a FHOG feature map output by extract_fhog_features() and
              converts it into an image suitable for display on the screen.  In particular,
              we draw all the hog cells into a grayscale image in a way that shows the
              magnitude and orientation of the gradient energy in each cell.  The result is 
              then returned.
            - The size of the cells in the output image will be rendered as cell_draw_size 
              pixels wide and tall.
            - HOG cells with a response value less than min_response_threshold are not
              drawn.
    !*/

// ----------------------------------------------------------------------------------------

}

#endif // DLIB_fHOG_ABSTRACT_Hh_