OpenCV HOG PeopleDetector:从训练到检测

【原文:http://blog.youtueye.com/work/opencv-hog-peopledetector-trainning.html】

opencv2.0 以上版本提供了行人检测的方法,以opencv2.2为例,该示例为opencv安装目录下的:/samples/cpp/peopledetect.cpp,测试效果如下:

我们先看看示例代码的内容:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
 
#include <stdio.h>
#include <string.h>
#include <ctype.h>
 
using namespace cv;
using namespace std;
 
void help()
{
     printf (
             "\nDemonstrate the use of the HoG descriptor using\n"
             "  HOGDescriptor::hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());\n"
             "Usage:\n"
             "./peopledetect (<image_filename> | <image_list>.txt)\n\n" );
}
 
int main( int argc, char ** argv)
{
     Mat img;
     FILE * f = 0;
     char _filename[1024];
 
     if ( argc == 1 )
     {
         printf ( "Usage: peopledetect (<image_filename> | <image_list>.txt)\n" );
         return 0;
     }
     img = imread(argv[1]);
 
     if ( img.data )
     {
         strcpy (_filename, argv[1]);
     }
     else
     {
         f = fopen (argv[1], "rt" );
         if (!f)
         {
             fprintf ( stderr, "ERROR: the specified file could not be loaded\n" );
             return -1;
         }
     }
 
     HOGDescriptor hog;
     hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
     namedWindow( "people detector" , 1);
 
     for (;;)
     {
         char * filename = _filename;
         if (f)
         {
             if (! fgets (filename, ( int ) sizeof (_filename)-2, f))
                 break ;
             //while(*filename && isspace(*filename))
             //  ++filename;
             if (filename[0] == '#' )
                 continue ;
             int l = strlen (filename);
             while (l > 0 && isspace (filename[l-1]))
                 --l;
             filename[l] = '\0' ;
             img = imread(filename);
         }
         printf ( "%s:\n" , filename);
         if (!img.data)
             continue ;
 
         fflush (stdout);
         vector<Rect> found, found_filtered;
         double t = ( double )getTickCount();
         // run the detector with default parameters. to get a higher hit-rate
         // (and more false alarms, respectively), decrease the hitThreshold and
         // groupThreshold (set groupThreshold to 0 to turn off the grouping completely).
         hog.detectMultiScale(img, found, 0, Size(8,8), Size(32,32), 1.05, 2);
         t = ( double )getTickCount() - t;
         printf ( "tdetection time = %gms\n" , t*1000./cv::getTickFrequency());
         size_t i, j;
         for ( i = 0; i < found.size(); i++ )
         {
             Rect r = found[i];
             for ( j = 0; j < found.size(); j++ )
                 if ( j != i && (r & found[j]) == r)
                     break ;
             if ( j == found.size() )
                 found_filtered.push_back(r);
         }
         for ( i = 0; i < found_filtered.size(); i++ )
         {
             Rect r = found_filtered[i];
             // the HOG detector returns slightly larger rectangles than the real objects.
             // so we slightly shrink the rectangles to get a nicer output.
             r.x += cvRound(r.width*0.1);
             r.width = cvRound(r.width*0.8);
             r.y += cvRound(r.height*0.07);
             r.height = cvRound(r.height*0.8);
             rectangle(img, r.tl(), r.br(), cv::Scalar(0,255,0), 3);
         }
         imshow( "people detector" , img);
         int c = waitKey(0) & 255;
         if ( c == 'q' || c == 'Q' || !f)
             break ;
     }
     if (f)
         fclose (f);
     return 0;
}

你会发现代码中没有类似人脸检测时的分类器(模型)的载入过程,而是用

1
hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());

载入默认模型。找到hog的源码,在/modules/objdetect/hog.cpp,从第907行开始的代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
vector< float > HOGDescriptor::getDefaultPeopleDetector()
{
     static const float detector[] = {
        0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
        0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
        0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
        0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,
        -0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,
        -0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f,
        -3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,
        0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,
        0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,
        0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,
        0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,
        0.09879354f, 0.05362710f, -0.06745391f, -7.01260753e-003f,....

显然,行人检测所用到的模型数据被做为常量写在源代码中,但这些模型数据数据是如何的来的呢?如果我要用我自己的样本tainning ,然后用的到的model去detect该如何操作呢?别急,我们从opencv行人检测所用到的算法说起。

opencv行人检测所用到的算法源自Navneet Dalal和Bill Triggs在2005's CVPR上的文章Histograms of Oriented Gradients for Human Detection.这里是英文和中文的介绍。作者所在的研究机构(INRIA:French National Institute for Research in Computer Science and Control,法国国家计算机技术和控制研究所)发布了这套算法的源码:INRIA Object Detection and Localization Toolkit.

那么,opencv中的PeopleDetector是否就原封不动地使用了这套算法呢,为了求证这一问题,笔者首先比较了两者的模型数据:下载OLTbinaries.zip,找到/HOG/model_4BiSVMLight.alt文件(二进制形式的数据),用代码将其中数据保存为文本格式(这个要根据上面的源码learcode.zip),其内容如下:

1
2
3
4
5
6
7
8
9
10
11
12
0.05359386 -0.14721455 -0.05532170 0.05077307 0.11547081 -0.04268804 0.04635834 -0.05468199 0.08232084 0.10424068
-0.02294518 0.01108519 0.01378693 0.11193510 0.01268418 0.08528346 -0.06309239 0.13054632 0.08100729 -0.05209739
-0.04315529 0.09341384 0.11035026 -0.07596217 -0.05517511 -0.04465296 0.02947334 0.04555536 -0.00355954 0.07818956
0.07730991 0.07890716 0.06222893 0.09001381 -0.03574381 0.03414327 0.05677258 -0.04773581 0.03746637 -0.03521175
0.06955440 -0.03849038 0.01052293 0.01736112 0.10867710 0.08748853 0.00329740 0.10907028 0.07913758 0.10393069
0.02091867 0.11594022 0.13182420 0.09879354 0.05362710 -0.06745391 -0.00701261 0.00524702 0.03236255 0.01407916
0.02207983 0.02537322 0.04547948 0.07200756 0.03129894 -0.06274468 0.02107014 0.06035208 0.08636236 0.00453164
0.02193363 0.02309801 0.05568166 -0.02645093 0.04448695 0.02837519 0.08975695 0.04461517 0.08975355 0.07514391
0.02306982 0.10410084 0.06368385 0.05943465 0.00458421 0.05220337 0.06675851 0.08358569 0.06712102 0.06559004
-0.03930481 -0.00915937 -0.05897915 0.02816453 0.05032348 0.06780671 0.03377650 -0.00060942 -0.01795146 -0.03083684
-0.01302475 -0.02972313 0.00788707 -0.03525961 -0.00250398 0.05245084 0.11791293 -0.02167498 0.05299332 0.06640524
0.05190265 -0.00827317 0.03033127 0.05842173 -0.00401050 -0.00625106 0.05862958 -0.02465461 0.05546781 -0.08228195 ....

数值居然同HOGDescriptor::getDefaultPeopleDetector()中的是一样!那么,你就可以用上面的OLT去trainning你的样本,然后把得到的model数据替换getDefaultPeopleDetector()中的数据,你便可以去进行你要的目标检测啦!

为了验证这一想法的正确性和可行性,笔者做了些实验,在Ubuntu10.4 g++4.4.5环境中,步骤如下:

  1. 下载release版的程序:OLTbinaries.zip
  2. 下载样本:INRIAPerson.tar
  3. 在目录OLTbinaries/下建立两个文件夹test, train. 将INRIAperson/Test/neg拷贝到test/下,INRIAperson/Train/neg拷贝到train/下;将INRIAperson/test_64x128_H96拷贝到test/下重命名为pos,将INRIAperson/train_64x128_H96拷贝到train/下重命名为pos;
  4. 将test/neg , test/pos各自文件夹中的所有图片文件名分别输出到neg.list, pos.list, 并放置在test/下; 同样地操作在train/。
    <pre class="brush: shell; gutter: true">amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/test$ ls ./neg &gt; neg.list
    amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/test$ ls ./pos &gt; pos.list</pre>
  5. 到这里,样本数据便准备好了,那么,只要修改OLTbinaries/runall.sh相关参数然后这些此脚本,一小时左右的时间,便会在OLTbinaries/HOG/下产生一个model_4BiSVMLight.alt文件,你的模型数据便保存在这里面。到这里,你便成功trainning了一个model。

注意事项:

  • runall.sh中第5行,按你的正负样本数目修改:
    1
    HardOption= " --poscases 2416 --negcases 1218 "
  • runall.sh中第21行,按你的样本文件夹所在(InDir)及输出文件所在(OutDir)修改:
    1
    2
    3
    4
    OutDir=. /HOG
    InDir=./
    OutFile=$OutDir /record
    CMDLINE=$OutDir /record
  • trainning过程中会产生两个G左右的临时文件在OutDir(=./HOG)中,所以要确保硬盘空间足够,结束时删除,只留model_4BiSVMLight.alt。
  • 整个trainning过程分4步,有4条屏幕输出信息,最可能出现的错误是样本文件路径不对,可在pos.list neg.list 中用图像文件的绝对路径。
    1
    2
    3
    4
    First iteration complete
    Hard examples created
    Doing second learning
    Second iteration complete
  • 如果你用的是自己的样本,注意修改其他参数(待究),如正样本的大小:
    1
    2
    WIDTH=64; export WIDTH
    HEIGHT=128; export HEIGHT

有了模型,怎么去做目标检测呢?你可以做以下的试验:

  1. 使用bin在OLTbinaries/bin/中classify_rhog: classify_rhog [待检测图片] [目标位置数据结果保存的文本文件] [模型文件] -i [位置画在图像文件]
    amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/bin$ ./classify_rhog  person-1.jpg  result.txt model_4BiSVMLight.alt -i result.jpg

    结果:

  2. 使用lib在OLTbinaries/lib/中:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    /*
      * =============================================
      *
      *       Filename:  lib-detector.cpp
      *
      *    Description:  Code to detect object
      *
      *       Compiler:  gcc
      *
      *         Author:  Amadeu zou
      *         URL:  www.youtueye.com
      *
      * =============================================
      */
    #include <cv.h>
    #include <highgui.h>
     
    #include <string>
    #include <iostream>
    #include <algorithm>
    #include <iterator>
     
    #include <X11/Xlib.h>
    #include <Imlib2.h>
     
    #include <lear/interface/windetect.h>// change this path as appropriate.
    using namespace std;
     
    std::list<DetectedRegion> detector( char * imgf, WinDetectClassify windetect, LinearClassify* classifier)
    {
         std::list<DetectedRegion> detections;
         // read image
         Imlib_Image image = imlib_load_image(imgf);
         // if the load was successful
         if (image)
         { // set the image we loaded as the current context image to work on
             imlib_context_set_image(image);
         } else {
             //std::cerr << "Unable to read image: " << argv[1] << std::endl;
             return detections;
         }
         int width  = imlib_image_get_width(),
             height = imlib_image_get_height();
     
         typedef unsigned char uchar;
         DATA32* data = imlib_image_get_data_for_reading_only();
         uchar* imagedata = new uchar[3*width*height* sizeof (uchar)];
         for ( int j= 0; j< height; ++j)
         for ( int i= 0; i< width; ++i) {
             uchar* pixel = imagedata+(i+j*width)*3;
             int argb = data[i+j*width];
             pixel[0] = static_cast <uchar>((argb & 0x00FF0000)>>16);
             pixel[1] = static_cast <uchar>((argb & 0x0000FF00)>> 8);
             pixel[2] = static_cast <uchar>((argb & 0x000000FF)    );
         }
         imlib_free_image();
     
         // now get detections
         windetect.test(*classifier, detections, imagedata, width, height);
     
         delete [] imagedata;
         return detections;
    }
     
    int main( int argc, char ** argv) {
     
         if (argc != 4) {
             std::cout << "Error" << std::endl;
             return 0;
         }
     
         char modelpath[256];
         strcpy (modelpath,argv[2]);
         string model_file(modelpath) ;
         // initialize the person detector. All default parameters are set for person detector.
         WinDetectClassify windetect; // use default person detector.
         RHOGDenseParam desc;
         LinearClassify* classifier = NULL; // initialize it to 64x128 person detector.
         classifier = new LinearClassify(model_file, 0);
         windetect.init(&desc); // initialize the descriptor computation
     
         std::list<DetectedRegion> detections;
         detections = detector(argv[1], windetect, classifier);
     
         //draw region in image
         IplImage* img = cvLoadImage(argv[1],1);
         for (list<DetectedRegion>::iterator itor=detections.begin();itor!=detections.end();++itor)
         {
              cvRectangle(img,cvPoint(itor->x,itor->y),cvPoint(itor->x+itor->width,itor->y+itor->height),cvScalar(0,0,255),2);
     
         }
         cvSaveImage(argv[3],img);
         cvReleaseImage(&img);
         //print detections
         std::copy(detections.begin(), detections.end(), std::ostream_iterator<DetectedRegion>(std::cout, "\n" ));
     
         return 0;
    }

    编译:

    1
    g++ `pkg-config --cflags --libs opencv` -O3 -o lib-detector lib-detector.cpp   -I.  -I /usr/include -L. -lcmdline -lcvip -lclassifier -llearutil -lboost_filesystem-gcc  -lblitz -L /usr/lib -lImlib2 -lfreetype -lz -L /usr/X11R6/lib -lX11 -lXext -ldl -lm

    结果:

    1
    2
    3
    4
    amadeuzou@ubuntu:~ /Projects/opencv/OLTbinaries/lib $ . /lib-detector person-1.jpg   model_4BiSVMLight.alt  result.jpg
        298    215    145    290    2.2674 1.10256009e-01
         13      9    237    475   3.71704 1.31164089e-01
        234     -7    230    460   3.59693 1.35627717e-01
  3. 使用源码在learcode/app/中classify_rhog.cpp:配置及使用见learcode/README
  4. 将模型数据copy到opencv的getDefaultPeopleDetector(),但要重新编译objdetect链接,没必要这么搞。从model中读取数据如下(共classifier->length() = 3780 + 1数据,1为classifier->linearbias):
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    int main( int argc, char ** argv) {
     
         string model_file = "model_4BiSVMLight.alt" ;
     
         LinearClassify* classifier = NULL;
     
         classifier = new LinearClassify(model_file, 0);
     
         cout<< "classifier->length() : " <<classifier->length()<<endl;
         FILE * fptr = fopen ( "hog.txt" , "w" );
             for ( int i = 0;i<classifier->length();i++){
                 fprintf (fptr, "%1.8f " ,classifier->linearwt[i]);
                 if (0 == (i+1) % 10) fprintf (fptr, "%s" , "\n" );
             }
         fprintf (fptr, "%1.8f " ,classifier->linearbias);
         fclose (fptr);
     
         return 0;
     
    }
  5. opencv直接载入model: 将peopeldetect.cpp第49行hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector())里面的vector换成你的模板数据,模板数据从model_4BiSVMLight.alt中载入,方法参照learcode/lib/windetect.cpp第1175行处LinearClassify::LinearClassify(std::string& modelfile, const int verbose):
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    79
    80
    81
    82
    83
    84
    85
    86
    87
    88
    89
    90
    91
    92
    93
    94
    95
    96
    97
    98
    99
    100
    101
    102
    103
    104
    105
    106
    107
    108
    109
    110
    111
    112
    113
    114
    115
    116
    117
    118
    119
    120
    121
    122
    123
    124
    125
    126
    127
    128
    129
    130
    131
    132
    133
    134
    135
    136
    137
    138
    139
    140
    141
    142
    143
    144
    145
    146
    147
    148
    149
    150
    151
    152
    153
    154
    155
    156
    157
    158
    159
    160
    161
    162
    163
    164
    165
    166
    167
    168
    169
    170
    171
    172
    173
    174
    175
    176
    177
    178
    179
    180
    181
    182
    183
    184
    185
    186
    187
    188
    189
    190
    191
    192
    193
    194
    195
    196
    197
    198
    199
    200
    201
    202
    203
    204
    205
    206
    207
    208
    209
    210
    211
    212
    213
    214
    215
    216
    217
    218
    219
    220
    221
    222
    223
    224
    225
    226
    /*
      * =====================================================================================
      *
      *       Filename:  model-detector.cpp
      *
      *    Description:  Code to detect object
      *
      *       Compiler:  gcc
      *
      *         Author:  Amadeu zou
      *         URL:  www.youtueye.com
      *
      * =====================================================================================
      */
    #include <fstream>
    #include <iostream>
    #include <vector>
     
    #include <stdio.h>
    #include <string.h>
    #include <ctype.h>
     
    #include "opencv2/imgproc/imgproc.hpp"
    #include "opencv2/objdetect/objdetect.hpp"
    #include "opencv2/highgui/highgui.hpp"
     
    using namespace std;
    using namespace cv;
     
    vector< float > load_lear_model( const char * model_file)
    {
          vector< float >  detector;
          FILE *modelfl;
          if ((modelfl = fopen (model_file, "rb" )) == NULL)
          {
              cout<< "Unable to open the modelfile" <<endl;
              return detector;
          }
     
          char version_buffer[10];
          if (! fread (&version_buffer, sizeof ( char ),10,modelfl))
          {
              cout<< "Unable to read version" <<endl;
              return

你可能感兴趣的:(HOG)