d0/df8/samples_2cpp_2train_HOG_8cpp-example.html

#include "opencv2/imgproc.hpp"

#include "opencv2/highgui.hpp"

#include "opencv2/ml.hpp"

#include "opencv2/objdetect.hpp"

#include "opencv2/videoio.hpp"

#include <iostream>

#include <time.h>


using namespace cv;

using namespace cv::ml;

using namespace std;


vector< float > get_svm_detector( const Ptr< SVM >& svm );

void convert_to_ml( const std::vector< Mat > & train_samples, Mat& trainData );

void load_images( const String & dirname, vector< Mat > & img_lst, bool showImages );

void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size );

void computeHOGs( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst, bool use_flip );

void test_trained_detector( String obj_det_filename, String test_dir, String videofilename );


vector< float > get_svm_detector( const Ptr< SVM >& svm )

{

 // get the support vectors

 Mat sv = svm->getSupportVectors();

 const int sv_total = sv.rows;

 // get the decision function

 Mat alpha, svidx;

 double rho = svm->getDecisionFunction( 0, alpha, svidx );


 CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 );

 CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||

 (alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );

 CV_Assert( sv.type() == CV_32F );


 vector< float > hog_detector( sv.cols + 1 );

 memcpy( &hog_detector[0], sv.ptr(), sv.cols*sizeof( hog_detector[0] ) );

 hog_detector[sv.cols] = (float)-rho;

 return hog_detector;

}


/*

* Convert training/testing set to be used by OpenCV Machine Learning algorithms.

* TrainData is a matrix of size (#samples x max(#cols,#rows) per samples), in 32FC1.

* Transposition of samples are made if needed.

*/

void convert_to_ml( const vector< Mat > & train_samples, Mat& trainData )

{

 //--Convert data

 const int rows = (int)train_samples.size();

 const int cols = (int)std::max( train_samples[0].cols, train_samples[0].rows );

 Mat tmp( 1, cols, CV_32FC1 );

 trainData = Mat( rows, cols, CV_32FC1 );


 for( size_t i = 0 ; i < train_samples.size(); ++i )

 {

 CV_Assert( train_samples[i].cols == 1 || train_samples[i].rows == 1 );


 if( train_samples[i].cols == 1 )

 {

 transpose( train_samples[i], tmp );

 tmp.copyTo( trainData.row( (int)i ) );

 }

 else if( train_samples[i].rows == 1 )

 {

 train_samples[i].copyTo( trainData.row( (int)i ) );

 }

 }

}


void load_images( const String & dirname, vector< Mat > & img_lst, bool showImages = false )

{

 vector< String > files;

 glob( dirname, files );


 for ( size_t i = 0; i < files.size(); ++i )

 {

 Mat img = imread( files[i] ); // load the image

 if ( img.empty() )

 {

 cout << files[i] << " is invalid!" << endl; // invalid image, skip it.

 continue;

 }


 if ( showImages )

 {

 imshow( "image", img );

 waitKey( 1 );

 }

 img_lst.push_back( img );

 }

}


void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size )

{

 Rect box;

 box.width = size.width;

 box.height = size.height;


 srand( (unsigned int)time( NULL ) );


 for ( size_t i = 0; i < full_neg_lst.size(); i++ )

 if ( full_neg_lst[i].cols > box.width && full_neg_lst[i].rows > box.height )

 {

 box.x = rand() % ( full_neg_lst[i].cols - box.width );

 box.y = rand() % ( full_neg_lst[i].rows - box.height );

 Mat roi = full_neg_lst[i]( box );

 neg_lst.push_back( roi.clone() );

 }

}


void computeHOGs( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst, bool use_flip )

{

 HOGDescriptor hog;

 hog.winSize = wsize;

 Mat gray;

 vector< float > descriptors;


 for( size_t i = 0 ; i < img_lst.size(); i++ )

 {

 if ( img_lst[i].cols >= wsize.width && img_lst[i].rows >= wsize.height )

 {

 Rect r = Rect(( img_lst[i].cols - wsize.width ) / 2,

 ( img_lst[i].rows - wsize.height ) / 2,

 wsize.width,

 wsize.height);

 cvtColor( img_lst[i](r), gray, COLOR_BGR2GRAY );

 hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ) );

 gradient_lst.push_back( Mat( descriptors ).clone() );

 if ( use_flip )

 {

 flip( gray, gray, 1 );

 hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ) );

 gradient_lst.push_back( Mat( descriptors ).clone() );

 }

 }

 }

}


void test_trained_detector( String obj_det_filename, String test_dir, String videofilename )

{

 cout << "Testing trained detector..." << endl;

 HOGDescriptor hog;

 hog.load( obj_det_filename );


 vector< String > files;

 glob( test_dir, files );


 int delay = 0;

 VideoCapture cap;


 if ( videofilename != "" )

 {

 if ( videofilename.size() == 1 && isdigit( videofilename[0] ) )

 cap.open( videofilename[0] - '0' );

 else

 cap.open( videofilename );

 }


 obj_det_filename = "testing " + obj_det_filename;

 namedWindow( obj_det_filename, WINDOW_NORMAL );


 for( size_t i=0;; i++ )

 {

 Mat img;


 if ( cap.isOpened() )

 {

 cap >> img;

 delay = 1;

 }

 else if( i < files.size() )

 {

 img = imread( files[i] );

 }


 if ( img.empty() )

 {

 return;

 }


 vector< Rect > detections;

 vector< double > foundWeights;


 hog.detectMultiScale( img, detections, foundWeights );

 for ( size_t j = 0; j < detections.size(); j++ )

 {

 Scalar color = Scalar( 0, foundWeights[j] * foundWeights[j] * 200, 0 );

 rectangle( img, detections[j], color, img.cols / 400 + 1 );

 }


 imshow( obj_det_filename, img );


 if( waitKey( delay ) == 27 )

 {

 return;

 }

 }

}


int main( int argc, char** argv )

{

 const char* keys =

 {

 "{help h| | show help message}"

 "{pd | | path of directory contains positive images}"

 "{nd | | path of directory contains negative images}"

 "{td | | path of directory contains test images}"

 "{tv | | test video file name}"

 "{dw | | width of the detector}"

 "{dh | | height of the detector}"

 "{f |false| indicates if the program will generate and use mirrored samples or not}"

 "{d |false| train twice}"

 "{t |false| test a trained detector}"

 "{v |false| visualize training steps}"

 "{fn |my_detector.yml| file name of trained SVM}"

 };


 CommandLineParser parser( argc, argv, keys );


 if ( parser.has( "help" ) )

 {

 parser.printMessage();

 exit( 0 );

 }


 String pos_dir = parser.get< String >( "pd" );

 String neg_dir = parser.get< String >( "nd" );

 String test_dir = parser.get< String >( "td" );

 String obj_det_filename = parser.get< String >( "fn" );

 String videofilename = parser.get< String >( "tv" );

 int detector_width = parser.get< int >( "dw" );

 int detector_height = parser.get< int >( "dh" );

 bool test_detector = parser.get< bool >( "t" );

 bool train_twice = parser.get< bool >( "d" );

 bool visualization = parser.get< bool >( "v" );

 bool flip_samples = parser.get< bool >( "f" );


 if ( test_detector )

 {

 test_trained_detector( obj_det_filename, test_dir, videofilename );

 exit( 0 );

 }


 if( pos_dir.empty() || neg_dir.empty() )

 {

 parser.printMessage();

 cout << "Wrong number of parameters.\n\n"

 << "Example command line:\n" << argv[0] << " -dw=64 -dh=128 -pd=/INRIAPerson/96X160H96/Train/pos -nd=/INRIAPerson/neg -td=/INRIAPerson/Test/pos -fn=HOGpedestrian64x128.xml -d\n"

 << "\nExample command line for testing trained detector:\n" << argv[0] << " -t -fn=HOGpedestrian64x128.xml -td=/INRIAPerson/Test/pos";

 exit( 1 );

 }


 vector< Mat > pos_lst, full_neg_lst, neg_lst, gradient_lst;

 vector< int > labels;


 clog << "Positive images are being loaded..." ;

 load_images( pos_dir, pos_lst, visualization );

 if ( pos_lst.size() > 0 )

 {

 clog << "...[done] " << pos_lst.size() << " files." << endl;

 }

 else

 {

 clog << "no image in " << pos_dir <<endl;

 return 1;

 }


 Size pos_image_size = pos_lst[0].size();


 if ( detector_width && detector_height )

 {

 pos_image_size = Size( detector_width, detector_height );

 }

 else

 {

 for ( size_t i = 0; i < pos_lst.size(); ++i )

 {

 if( pos_lst[i].size() != pos_image_size )

 {

 cout << "All positive images should be same size!" << endl;

 exit( 1 );

 }

 }

 pos_image_size = pos_image_size / 8 * 8;

 }


 clog << "Negative images are being loaded...";

 load_images( neg_dir, full_neg_lst, visualization );

 clog << "...[done] " << full_neg_lst.size() << " files." << endl;


 clog << "Negative images are being processed...";

 sample_neg( full_neg_lst, neg_lst, pos_image_size );

 clog << "...[done] " << neg_lst.size() << " files." << endl;


 clog << "Histogram of Gradients are being calculated for positive images...";

 computeHOGs( pos_image_size, pos_lst, gradient_lst, flip_samples );

 size_t positive_count = gradient_lst.size();

 labels.assign( positive_count, +1 );

 clog << "...[done] ( positive images count : " << positive_count << " )" << endl;


 clog << "Histogram of Gradients are being calculated for negative images...";

 computeHOGs( pos_image_size, neg_lst, gradient_lst, flip_samples );

 size_t negative_count = gradient_lst.size() - positive_count;

 labels.insert( labels.end(), negative_count, -1 );

 CV_Assert( positive_count < labels.size() );

 clog << "...[done] ( negative images count : " << negative_count << " )" << endl;


 Mat train_data;

 convert_to_ml( gradient_lst, train_data );


 clog << "Training SVM...";

 Ptr< SVM > svm = SVM::create();

 /* Default values to train SVM */

 svm->setCoef0( 0.0 );

 svm->setDegree( 3 );

 svm->setTermCriteria( TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 1e-3 ) );

 svm->setGamma( 0 );

 svm->setKernel( SVM::LINEAR );

 svm->setNu( 0.5 );

 svm->setP( 0.1 ); // for EPSILON_SVR, epsilon in loss function?

 svm->setC( 0.01 ); // From paper, soft classifier

 svm->setType( SVM::EPS_SVR ); // C_SVC; // EPSILON_SVR; // may be also NU_SVR; // do regression task

 svm->train( train_data, ROW_SAMPLE, labels );

 clog << "...[done]" << endl;


 if ( train_twice )

 {

 clog << "Testing trained detector on negative images. This might take a few minutes...";

 HOGDescriptor my_hog;

 my_hog.winSize = pos_image_size;


 // Set the trained svm to my_hog

 my_hog.setSVMDetector( get_svm_detector( svm ) );


 vector< Rect > detections;

 vector< double > foundWeights;


 for ( size_t i = 0; i < full_neg_lst.size(); i++ )

 {

 if ( full_neg_lst[i].cols >= pos_image_size.width && full_neg_lst[i].rows >= pos_image_size.height )

 my_hog.detectMultiScale( full_neg_lst[i], detections, foundWeights );

 else

 detections.clear();


 for ( size_t j = 0; j < detections.size(); j++ )

 {

 Mat detection = full_neg_lst[i]( detections[j] ).clone();

 resize( detection, detection, pos_image_size, 0, 0, INTER_LINEAR_EXACT);

 neg_lst.push_back( detection );

 }


 if ( visualization )

 {

 for ( size_t j = 0; j < detections.size(); j++ )

 {

 rectangle( full_neg_lst[i], detections[j], Scalar( 0, 255, 0 ), 2 );

 }

 imshow( "testing trained detector on negative images", full_neg_lst[i] );

 waitKey( 5 );

 }

 }

 clog << "...[done]" << endl;


 gradient_lst.clear();

 clog << "Histogram of Gradients are being calculated for positive images...";

 computeHOGs( pos_image_size, pos_lst, gradient_lst, flip_samples );

 positive_count = gradient_lst.size();

 clog << "...[done] ( positive count : " << positive_count << " )" << endl;


 clog << "Histogram of Gradients are being calculated for negative images...";

 computeHOGs( pos_image_size, neg_lst, gradient_lst, flip_samples );

 negative_count = gradient_lst.size() - positive_count;

 clog << "...[done] ( negative count : " << negative_count << " )" << endl;


 labels.clear();

 labels.assign(positive_count, +1);

 labels.insert(labels.end(), negative_count, -1);


 clog << "Training SVM again...";

 convert_to_ml( gradient_lst, train_data );

 svm->train( train_data, ROW_SAMPLE, labels );

 clog << "...[done]" << endl;

 }


 HOGDescriptor hog;

 hog.winSize = pos_image_size;

 hog.setSVMDetector( get_svm_detector( svm ) );

 hog.save( obj_det_filename );


 test_trained_detector( obj_det_filename, test_dir, videofilename );


 return 0;

}

cv::CommandLineParser
Designed for command line parsing.
Definition utility.hpp:820

cv::Mat
n-dimensional dense array class
Definition mat.hpp:812

cv::Mat::clone
CV_NODISCARD_STD Mat clone() const
Creates a full copy of the array and the underlying data.

cv::Mat::row
Mat row(int y) const
Creates a matrix header for the specified matrix row.

cv::Mat::ptr
uchar * ptr(int i0=0)
Returns a pointer to the specified matrix row.

cv::Mat::at
_Tp & at(int i0=0)
Returns a reference to the specified array element.

cv::Mat::cols
int cols
Definition mat.hpp:2138

cv::Mat::total
size_t total() const
Returns the total number of array elements.

cv::Mat::empty
bool empty() const
Returns true if the array has no elements.

cv::Mat::rows
int rows
the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions
Definition mat.hpp:2138

cv::Mat::type
int type() const
Returns the type of a matrix element.

cv::Mat::push_back
void push_back(const _Tp &elem)
Adds elements to the bottom of the matrix.

cv::Rect_
Template class for 2D rectangles.
Definition types.hpp:444

cv::Rect_::x
_Tp x
x coordinate of the top-left corner
Definition types.hpp:480

cv::Rect_::y
_Tp y
y coordinate of the top-left corner
Definition types.hpp:481

cv::Rect_::width
_Tp width
width of the rectangle
Definition types.hpp:482

cv::Rect_::height
_Tp height
height of the rectangle
Definition types.hpp:483

cv::Scalar_< double >

cv::Size_
Template class for specifying the size of an image or rectangle.
Definition types.hpp:335

cv::Size_::height
_Tp height
the height
Definition types.hpp:363

cv::Size_::width
_Tp width
the width
Definition types.hpp:362

cv::TermCriteria
The class defining termination criteria for iterative algorithms.
Definition types.hpp:886

cv::VideoCapture
Class for video capturing from video files, image sequences or cameras.
Definition videoio.hpp:731

cv::VideoCapture::open
virtual bool open(const String &filename, int apiPreference=CAP_ANY)
Opens a video file or a capturing device or an IP video stream for video capturing.

cv::VideoCapture::isOpened
virtual bool isOpened() const
Returns true if video capturing has been initialized already.

cv::flip
void flip(InputArray src, OutputArray dst, int flipCode)
Flips a 2D array around vertical, horizontal, or both axes.

cv::String
std::string String
Definition cvstd.hpp:151

cv::Ptr
std::shared_ptr< _Tp > Ptr
Definition cvstd_wrapper.hpp:23

CV_64F
#define CV_64F
Definition interface.h:79

CV_32FC1
#define CV_32FC1
Definition interface.h:118

CV_32F
#define CV_32F
Definition interface.h:78

CV_Assert
#define CV_Assert(expr)
Checks a condition at runtime and throws exception if it fails.
Definition base.hpp:342

cv::glob
void glob(String pattern, std::vector< String > &result, bool recursive=false)

cv::imshow
void imshow(const String &winname, InputArray mat)
Displays an image in the specified window.

cv::waitKey
int waitKey(int delay=0)
Waits for a pressed key.

cv::namedWindow
void namedWindow(const String &winname, int flags=WINDOW_AUTOSIZE)
Creates a window.

cv::imread
CV_EXPORTS_W Mat imread(const String &filename, int flags=IMREAD_COLOR)
Loads an image from a file.

cv::cvtColor
void cvtColor(InputArray src, OutputArray dst, int code, int dstCn=0)
Converts an image from one color space to another.

cv::rectangle
void rectangle(InputOutputArray img, Point pt1, Point pt2, const Scalar &color, int thickness=1, int lineType=LINE_8, int shift=0)
Draws a simple, thick, or filled up-right rectangle.

cv::resize
void resize(InputArray src, OutputArray dst, Size dsize, double fx=0, double fy=0, int interpolation=INTER_LINEAR)
Resizes an image.

highgui.hpp

main
int main(int argc, char *argv[])
Definition highgui_qt.cpp:3

imgproc.hpp

ml.hpp

cv::gapi::streaming::size
GOpaque< Size > size(const GMat &src)
Gets dimensions from Mat.

cv::ml
Definition ml.hpp:75

cv
"black box" representation of the file storage associated with a file on disk.
Definition core.hpp:102

std
STL namespace.

objdetect.hpp

cv::HOGDescriptor
Implementation of HOG (Histogram of Oriented Gradients) descriptor and object detector.
Definition objdetect.hpp:403

cv::HOGDescriptor::compute
virtual void compute(InputArray img, std::vector< float > &descriptors, Size winStride=Size(), Size padding=Size(), const std::vector< Point > &locations=std::vector< Point >()) const
Computes HOG descriptors of given image.

cv::HOGDescriptor::save
virtual void save(const String &filename, const String &objname=String()) const
saves HOGDescriptor parameters and coefficients for the linear SVM classifier to a file

cv::HOGDescriptor::setSVMDetector
virtual void setSVMDetector(InputArray svmdetector)
Sets coefficients for the linear SVM classifier.

cv::HOGDescriptor::winSize
Size winSize
Detection window size. Align to block size and block stride. Default value is Size(64,...
Definition objdetect.hpp:621

cv::HOGDescriptor::load
virtual bool load(const String &filename, const String &objname=String())
loads HOGDescriptor parameters and coefficients for the linear SVM classifier from a file

cv::HOGDescriptor::detectMultiScale
virtual void detectMultiScale(InputArray img, std::vector< Rect > &foundLocations, std::vector< double > &foundWeights, double hitThreshold=0, Size winStride=Size(), Size padding=Size(), double scale=1.05, double groupThreshold=2.0, bool useMeanshiftGrouping=false) const
Detects objects of different sizes in the input image. The detected objects are returned as a list of...

videoio.hpp