d0/df8/samples_2cpp_2train_HOG_8cpp-example.html

#include "opencv2/imgproc.hpp"

#include "opencv2/highgui.hpp"

#include "opencv2/ml.hpp"

#include "opencv2/objdetect.hpp"

#include "opencv2/videoio.hpp"

#include <iostream>

#include <time.h>


using namespace cv;

using namespace cv::ml;

using namespace std;


vector< float > get_svm_detector( const Ptr< SVM >& svm );

void convert_to_ml( const std::vector< Mat > & train_samples, Mat& trainData );

void load_images( const String & dirname, vector< Mat > & img_lst, bool showImages );

void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size );

void computeHOGs( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst, bool use_flip );

void test_trained_detector( String obj_det_filename, String test_dir, String videofilename );


vector< float > get_svm_detector( const Ptr< SVM >& svm )

{

    // 獲取支援向量

    Mat sv = svm->getSupportVectors();

    const int sv_total = sv.rows;

    // 獲取決策函式

    Mat alpha, svidx;

    double rho = svm->getDecisionFunction( 0, alpha, svidx );


    CV_Assert( alpha.total() == 1 && svidx.total() == 1 && sv_total == 1 );

    CV_Assert( (alpha.type() == CV_64F && alpha.at<double>(0) == 1.) ||

(alpha.type() == CV_32F && alpha.at<float>(0) == 1.f) );

    CV_Assert( sv.type() == CV_32F );


vector< float > hog_detector( sv.cols + 1 );

memcpy( &hog_detector[0], sv.ptr(), sv.cols*sizeof( hog_detector[0] ) );

hog_detector[sv.cols] = (float)-rho;

    return hog_detector;

}


/*

* 將訓練/測試集轉換為OpenCV機器學習演算法可用的格式。

* TrainData是一個大小為(#樣本數 x 每樣本最大(列數,行數))的矩陣，格式為32FC1。

* 如果需要，將對樣本進行轉置。

*/

void convert_to_ml( const vector< Mat > & train_samples, Mat& trainData )

{

    //--Convert data

    const int rows = (int)train_samples.size();

    const int cols = (int)std::max( train_samples[0].cols, train_samples[0].rows );

    Mat tmp( 1, cols, CV_32FC1 );

trainData = Mat( rows, cols, CV_32FC1 );


    for( size_t i = 0 ; i < train_samples.size(); ++i )

    {

        CV_Assert( train_samples[i].cols == 1 || train_samples[i].rows == 1 );


        if( train_samples[i].cols == 1 )

        {

transpose( train_samples[i], tmp );

tmp.copyTo( trainData.row( (int)i ) );

        }

        else if( train_samples[i].rows == 1 )

        {

train_samples[i].copyTo( trainData.row( (int)i ) );

        }

    }

}


void load_images( const String & dirname, vector< Mat > & img_lst, bool showImages = false )

{

vector< String > files;

    glob( dirname, files );


    for ( size_t i = 0; i < files.size(); ++i )

    {

        Mat img = imread( files[i] ); // 載入影像

        if ( img.empty() )

        {

cout << files[i] << " 是無效的！" << endl; // 無效影像，跳過。

            continue;

        }


        if ( showImages )

        {

            imshow( "image", img );

            waitKey( 1 );

        }

img_lst.push_back( img );

    }

}


void sample_neg( const vector< Mat > & full_neg_lst, vector< Mat > & neg_lst, const Size & size )

{

    Rect box;

box.width = size.width;

box.height = size.height;


srand( (unsigned int)time( NULL ) );


    for ( size_t i = 0; i < full_neg_lst.size(); i++ )

        if ( full_neg_lst[i].cols > box.width && full_neg_lst[i].rows > box.height )

        {

box.x = rand() % ( full_neg_lst[i].cols - box.width );

box.y = rand() % ( full_neg_lst[i].rows - box.height );

            Mat roi = full_neg_lst[i]( box );

neg_lst.push_back( roi.clone() );

        }

}


void computeHOGs( const Size wsize, const vector< Mat > & img_lst, vector< Mat > & gradient_lst, bool use_flip )

{

    HOGDescriptor hog;

hog.winSize = wsize;

    Mat gray;

vector< float > descriptors;


    for( size_t i = 0 ; i < img_lst.size(); i++ )

    {

        if ( img_lst[i].cols >= wsize.width && img_lst[i].rows >= wsize.height )

        {

            Rect r = Rect(( img_lst[i].cols - wsize.width ) / 2,

( img_lst[i].rows - wsize.height ) / 2,

wsize.width,

wsize.height);

            cvtColor( img_lst[i](r), gray, COLOR_BGR2GRAY );

hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ) );

gradient_lst.push_back( Mat( descriptors ).clone() );

            if ( use_flip )

            {

                flip( gray, gray, 1 );

hog.compute( gray, descriptors, Size( 8, 8 ), Size( 0, 0 ) );

gradient_lst.push_back( Mat( descriptors ).clone() );

            }

        }

    }

}


void test_trained_detector( String obj_det_filename, String test_dir, String videofilename )

{

cout << "正在測試訓練好的檢測器..." << endl;

    HOGDescriptor hog;

hog.load( obj_det_filename );


vector< String > files;

    glob( test_dir, files );


    int delay = 0;

    VideoCapture cap;


    if ( videofilename != "" )

    {

        if ( videofilename.size() == 1 && isdigit( videofilename[0] ) )

cap.open( videofilename[0] - '0' );

        else

cap.open( videofilename );

    }


obj_det_filename = "測試 " + obj_det_filename;

    namedWindow( obj_det_filename, WINDOW_NORMAL );


    for( size_t i=0;; i++ )

    {

        Mat img;


        if ( cap.isOpened() )

        {

cap >> img;

delay = 1;

        }

        else if( i < files.size() )

        {

img = imread( files[i] );

        }


        if ( img.empty() )

        {

            return;

        }


vector< Rect > detections;

vector< double > foundWeights;


hog.detectMultiScale( img, detections, foundWeights );

        for ( size_t j = 0; j < detections.size(); j++ )

        {

            Scalar color = Scalar( 0, foundWeights[j] * foundWeights[j] * 200, 0 );

            rectangle( img, detections[j], color, img.cols / 400 + 1 );

        }


        imshow( obj_det_filename, img );


        if( waitKey( delay ) == 27 )

        {

            return;

        }

    }

}


int main( int argc, char** argv )

{

    const char* keys =

    {

        "{help h| | 顯示幫助資訊}"

        "{pd | | 包含正樣本影像的目錄路徑}"

        "{nd | | 包含負樣本影像的目錄路徑}"

        "{td | | 包含測試影像的目錄路徑}"

        "{tv | | 測試影片檔名}"

        "{dw | | 檢測器寬度}"

        "{dh | | 檢測器高度}"

        "{f |false| 指示程式是否生成並使用映象樣本}"

        "{d |false| 訓練兩次}"

        "{t |false| 測試已訓練的檢測器}"

        "{v |false| 視覺化訓練步驟}"

        "{fn |my_detector.yml| 訓練好的SVM檔名}"

    };


    CommandLineParser parser( argc, argv, keys );


    if ( parser.has( "help" ) )

    {

parser.printMessage();

exit( 0 );

    }


    String pos_dir = parser.get< String >( "pd" );

    String neg_dir = parser.get< String >( "nd" );

    String test_dir = parser.get< String >( "td" );

    String obj_det_filename = parser.get< String >( "fn" );

    String videofilename = parser.get< String >( "tv" );

    int detector_width = parser.get< int >( "dw" );

    int detector_height = parser.get< int >( "dh" );

    bool test_detector = parser.get< bool >( "t" );

    bool train_twice = parser.get< bool >( "d" );

    bool visualization = parser.get< bool >( "v" );

    bool flip_samples = parser.get< bool >( "f" );


    if ( test_detector )

    {

test_trained_detector( obj_det_filename, test_dir, videofilename );

exit( 0 );

    }


    if( pos_dir.empty() || neg_dir.empty() )

    {

parser.printMessage();

cout << "引數數量錯誤。\n\n"

<< "命令列示例：\n" << argv[0] << " -dw=64 -dh=128 -pd=/INRIAPerson/96X160H96/Train/pos -nd=/INRIAPerson/neg -td=/INRIAPerson/Test/pos -fn=HOGpedestrian64x128.xml -d\n"

<< "\n測試訓練好的檢測器的命令列示例：\n" << argv[0] << " -t -fn=HOGpedestrian64x128.xml -td=/INRIAPerson/Test/pos";

exit( 1 );

    }


vector< Mat > pos_lst, full_neg_lst, neg_lst, gradient_lst;

vector< int > labels;


clog << "正在載入正樣本影像..." ;

load_images( pos_dir, pos_lst, visualization );

    if ( pos_lst.size() > 0 )

    {

clog << "...[完成] " << pos_lst.size() << " 個檔案。" << endl;

    }

    else

    {

clog << "沒有影像在 " << pos_dir <<endl;

        return 1;

    }


    Size pos_image_size = pos_lst[0].size();


    if ( detector_width && detector_height )

    {

pos_image_size = Size( detector_width, detector_height );

    }

    else

    {

        for ( size_t i = 0; i < pos_lst.size(); ++i )

        {

            if( pos_lst[i].size() != pos_image_size )

            {

cout << "所有正樣本影像的尺寸應相同！" << endl;

exit( 1 );

            }

        }

pos_image_size = pos_image_size / 8 * 8;

    }


clog << "正在載入負樣本影像...";

load_images( neg_dir, full_neg_lst, visualization );

clog << "...[完成] " << full_neg_lst.size() << " 個檔案。" << endl;


clog << "正在處理負樣本影像...";

sample_neg( full_neg_lst, neg_lst, pos_image_size );

clog << "...[完成] " << neg_lst.size() << " 個檔案。" << endl;


clog << "正在為正樣本影像計算梯度直方圖...";

computeHOGs( pos_image_size, pos_lst, gradient_lst, flip_samples );

    size_t positive_count = gradient_lst.size();

labels.assign( positive_count, +1 );

clog << "...[完成] ( 正樣本影像計數： " << positive_count << " )" << endl;


clog << "正在為負樣本影像計算梯度直方圖...";

computeHOGs( pos_image_size, neg_lst, gradient_lst, flip_samples );

    size_t negative_count = gradient_lst.size() - positive_count;

labels.insert( labels.end(), negative_count, -1 );

    CV_Assert( positive_count < labels.size() );

clog << "...[完成] ( 負樣本影像計數： " << negative_count << " )" << endl;


    Mat train_data;

convert_to_ml( gradient_lst, train_data );


clog << "正在訓練SVM...";

    Ptr< SVM > svm = SVM::create();

    /* 訓練SVM的預設值 */

svm->setCoef0( 0.0 );

svm->setDegree( 3 );

svm->setTermCriteria( TermCriteria(TermCriteria::MAX_ITER + TermCriteria::EPS, 1000, 1e-3 ) );

svm->setGamma( 0 );

svm->setKernel( SVM::LINEAR );

svm->setNu( 0.5 );

svm->setP( 0.1 ); // 對於EPSILON_SVR，損失函式中的epsilon？

svm->setC( 0.01 ); // 根據論文，軟分類器

svm->setType( SVM::EPS_SVR ); // C_SVC; // EPSILON_SVR; // 也可能是NU_SVR; // 執行迴歸任務

svm->train( train_data, ROW_SAMPLE, labels );

clog << "...[完成]" << endl;


    if ( train_twice )

    {

clog << "正在負樣本影像上測試訓練好的檢測器。這可能需要幾分鐘...";

        HOGDescriptor my_hog;

my_hog.winSize = pos_image_size;


        // 將訓練好的SVM設定給my_hog

my_hog.setSVMDetector( get_svm_detector( svm ) );


vector< Rect > detections;

vector< double > foundWeights;


        for ( size_t i = 0; i < full_neg_lst.size(); i++ )

        {

            if ( full_neg_lst[i].cols >= pos_image_size.width && full_neg_lst[i].rows >= pos_image_size.height )

my_hog.detectMultiScale( full_neg_lst[i], detections, foundWeights );

            else

detections.clear();


            for ( size_t j = 0; j < detections.size(); j++ )

            {

                Mat detection = full_neg_lst[i]( detections[j] ).clone();

                resize( detection, detection, pos_image_size, 0, 0, INTER_LINEAR_EXACT);

neg_lst.push_back( detection );

            }


            if ( visualization )

            {

                for ( size_t j = 0; j < detections.size(); j++ )

                {

                    rectangle( full_neg_lst[i], detections[j], Scalar( 0, 255, 0 ), 2 );

                }

                imshow( "在負樣本影像上測試訓練好的檢測器", full_neg_lst[i] );

                waitKey( 5 );

            }

        }

clog << "...[完成]" << endl;


gradient_lst.clear();

clog << "正在為正樣本影像計算梯度直方圖...";

computeHOGs( pos_image_size, pos_lst, gradient_lst, flip_samples );

positive_count = gradient_lst.size();

clog << "...[完成] ( 正樣本計數： " << positive_count << " )" << endl;


clog << "正在為負樣本影像計算梯度直方圖...";

computeHOGs( pos_image_size, neg_lst, gradient_lst, flip_samples );

negative_count = gradient_lst.size() - positive_count;

clog << "...[完成] ( 負樣本計數： " << negative_count << " )" << endl;


labels.clear();

labels.assign(positive_count, +1);

labels.insert(labels.end(), negative_count, -1);


clog << "再次訓練SVM...";

convert_to_ml( gradient_lst, train_data );

svm->train( train_data, ROW_SAMPLE, labels );

clog << "...[完成]" << endl;

    }


    HOGDescriptor hog;

hog.winSize = pos_image_size;

hog.setSVMDetector( get_svm_detector( svm ) );

hog.save( obj_det_filename );


test_trained_detector( obj_det_filename, test_dir, videofilename );


    return 0;

}

bool empty() const
如果陣列沒有元素，則返回 true。
int64_t int64

cv::Mat
n 維密集陣列類
定義 mat.hpp:830

cv::Mat::clone
CV_NODISCARD_STD Mat clone() const
建立陣列及其底層資料的完整副本。

cv::Mat::row
Mat row(int y) const
為指定的矩陣行建立矩陣頭。

cv::Mat::ptr
uchar * ptr(int i0=0)
返回指向指定矩陣行的指標。

cv::Mat::at
_Tp & at(int i0=0)
返回指定陣列元素的引用。

cv::Mat::cols
int cols
定義 mat.hpp:2165

cv::Mat::total
size_t total() const
返回陣列元素的總數。

定義 interface.h:61
cv::getTickFrequency
double getTickFrequency()

cv::Mat::rows
int rows
矩陣的行數和列數，或當矩陣維度超過2時為 (-1, -1)
定義 mat.hpp:2165

cv::Mat::type
int type() const
返回矩陣元素的型別。

cv::Rect_
2D 矩形的模板類。
定義 types.hpp:444

cv::Rect_::x
_Tp x
左上角的 x 座標
定義 types.hpp:487

cv::Rect_::y
_Tp y
左上角的 y 座標
定義 types.hpp:488

cv::Rect_::width
_Tp width
矩形的寬度
定義 types.hpp:489

cv::Rect_::height
_Tp height
矩形的高度
定義 types.hpp:490

cv::Scalar_< double >

cv::Size_
用於指定影像或矩形大小的模板類。
Definition types.hpp:335

cv::Size_::height
_Tp height
高度
Definition types.hpp:363

cv::Size_::width
_Tp width
寬度
Definition types.hpp:362

cv::TermCriteria
定義迭代演算法終止標準的類。
定義 types.hpp:893

cv::VideoCapture
用於從影片檔案、影像序列或攝像機捕獲影片的類。
Definition videoio.hpp:772

cv::VideoCapture::open
virtual bool open(const String &filename, int apiPreference=CAP_ANY)
開啟影片檔案、捕獲裝置或IP影片流進行影片捕獲。

cv::VideoCapture::isOpened
virtual bool isOpened() const
如果影片捕獲已初始化，則返回 true。

cv::flip
void flip(InputArray src, OutputArray dst, int flipCode)
沿垂直、水平或兩個軸翻轉 2D 陣列。

cv::String
std::string String
定義 cvstd.hpp:151

cv::Ptr
std::shared_ptr< _Tp > Ptr
Definition cvstd_wrapper.hpp:23

CV_64F
#define CV_64F
Definition interface.h:79

CV_32FC1
#define CV_32FC1
定義 interface.h:118

CV_32F
#define CV_32F
Definition interface.h:78

CV_Assert
#define CV_Assert(expr)
在執行時檢查條件，如果失敗則丟擲異常。
定義 base.hpp:423

cv::glob
void glob(String pattern, std::vector< String > &result, bool recursive=false)
在目錄中搜索與指定模式匹配的檔案。

cv::imshow
void imshow(const String &winname, InputArray mat)
在指定視窗中顯示影像。

cv::waitKey
int waitKey(int delay=0)
等待按鍵按下。

cv::namedWindow
void namedWindow(const String &winname, int flags=WINDOW_AUTOSIZE)
建立視窗。

cv::imread
CV_EXPORTS_W Mat imread(const String &filename, int flags=IMREAD_COLOR_BGR)
從檔案載入影像。

cv::cvtColor
void cvtColor(InputArray src, OutputArray dst, int code, int dstCn=0, AlgorithmHint hint=cv::ALGO_HINT_DEFAULT)
將影像從一個顏色空間轉換為另一個顏色空間。

cv::rectangle
void rectangle(InputOutputArray img, Point pt1, Point pt2, const Scalar &color, int thickness=1, int lineType=LINE_8, int shift=0)
繪製一個簡單、粗或填充的矩形。

cv::resize
void resize(InputArray src, OutputArray dst, Size dsize, double fx=0, double fy=0, int interpolation=INTER_LINEAR)
調整影像大小。

highgui.hpp

main
int main(int argc, char *argv[])
定義 highgui_qt.cpp:3

imgproc.hpp

ml.hpp

cv::gapi::streaming::size
GOpaque< Size > size(const GMat &src)
從 Mat 獲取維度。

cv::ml
定義 ml.hpp:75

cv
定義 core.hpp:107

std
STL 名稱空間。

objdetect.hpp

cv::HOGDescriptor
HOG（方向梯度直方圖）描述符和物件檢測器的實現。
定義 objdetect.hpp:403

cv::HOGDescriptor::compute
virtual void compute(InputArray img, std::vector< float > &descriptors, Size winStride=Size(), Size padding=Size(), const std::vector< Point > &locations=std::vector< Point >()) const
計算給定影像的 HOG 描述符。

cv::HOGDescriptor::save
virtual void save(const String &filename, const String &objname=String()) const
將HOGDescriptor引數和線性SVM分類器的係數儲存到檔案

cv::HOGDescriptor::setSVMDetector
virtual void setSVMDetector(InputArray svmdetector)
設定線性 SVM 分類器的係數。

cv::HOGDescriptor::winSize
Size winSize
檢測視窗大小。對齊到塊大小和塊步長。預設值為Size(64,...
定義 objdetect.hpp:621

cv::HOGDescriptor::load
virtual bool load(const String &filename, const String &objname=String())
從檔案載入HOGDescriptor引數和線性SVM分類器的係數

cv::HOGDescriptor::detectMultiScale
virtual void detectMultiScale(InputArray img, std::vector< Rect > &foundLocations, std::vector< double > &foundWeights, double hitThreshold=0, Size winStride=Size(), Size padding=Size(), double scale=1.05, double groupThreshold=2.0, bool useMeanshiftGrouping=false) const
在輸入影像中檢測不同大小的物件。檢測到的物件以列表形式返回...

videoio.hpp