转载自该文章 格式进行修整
以现在使用的OpenCV 2.4.10为例,行人检测的Demo在“D:\opencv\sources\samples\cpp\peopledetect.cpp”下,源代码如下所示:
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/highgui/highgui.hpp"
#include
#include
#include
using namespace cv;
using namespace std;
// static void help()
// {
// printf(
// "\nDemonstrate the use of the HoG descriptor using\n"
// " HOGDescriptor::hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());\n"
// "Usage:\n"
// "./peopledetect ( | .txt)\n\n");
// }
int main(int argc, char** argv)
{
Mat img;
FILE* f = 0;
char _filename[1024];
if( argc == 1 )
{
printf("Usage: peopledetect ( | .txt)\n");
return 0;
}
img = imread(argv[1]);
if( img.data )
{
strcpy(_filename, argv[1]);
}
else
{
f = fopen(argv[1], "rt");
if(!f)
{
fprintf( stderr, "ERROR: the specified file could not be loaded\n");
return -1;
}
}
HOGDescriptor hog;
hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
namedWindow("people detector", 1);
for(;;)
{
char* filename = _filename;
if(f)
{
if(!fgets(filename, (int)sizeof(_filename)-2, f))
break;
//while(*filename && isspace(*filename))
// ++filename;
if(filename[0] == '#')
continue;
int l = (int)strlen(filename);
while(l > 0 && isspace(filename[l-1]))
--l;
filename[l] = '\0';
img = imread(filename);
}
printf("%s:\n", filename);
if(!img.data)
continue;
fflush(stdout);
vector found, found_filtered;
double t = (double)getTickCount();
// run the detector with default parameters. to get a higher hit-rate
// (and more false alarms, respectively), decrease the hitThreshold and
// groupThreshold (set groupThreshold to 0 to turn off the grouping completely).
hog.detectMultiScale(img, found, 0, Size(8,8), Size(32,32), 1.05, 2);
t = (double)getTickCount() - t;
printf("tdetection time = %gms\n", t*1000./cv::getTickFrequency());
size_t i, j;
for( i = 0; i < found.size(); i++ )
{
Rect r = found[i];
for( j = 0; j < found.size(); j++ )
if( j != i && (r & found[j]) == r)
break;
if( j == found.size() )
found_filtered.push_back(r);
}
for( i = 0; i < found_filtered.size(); i++ )
{
Rect r = found_filtered[i];
// the HOG detector returns slightly larger rectangles than the real objects.
// so we slightly shrink the rectangles to get a nicer output.
r.x += cvRound(r.width*0.1);
r.width = cvRound(r.width*0.8);
r.y += cvRound(r.height*0.07);
r.height = cvRound(r.height*0.8);
rectangle(img, r.tl(), r.br(), cv::Scalar(0,255,0), 3);
}
imshow("people detector", img);
int c = waitKey(0) & 255;
if( c == 'q' || c == 'Q' || !f)
break;
}
if(f)
fclose(f);
return 0;
}
在该Demo里,没有类似人类检测时的分类器(模型)的载入过程,而是用下面的语句载入默认模型:
hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
vector HOGDescriptor::getDefaultPeopleDetector()
{
static const float detector[] = {
0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,
-0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,
-0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f,
-3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,
0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,
0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,
0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,
0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,
1)这些模型数据是如何得来的?
2)如果使用我自己的样本进行trainning,使用得到的model去detect该如何操作?
首先,从OpenCV所使用到的行人检测算法说起,OpenCV行人检测所用到的算法源自Navneet Dalal和Bil Triggs 在2005年 CVPR 上的文章Histograms of Oriented Gradients for Human Detection 作者所在的研究机构 (INRIA:French National Institute for Research in Computer Science and Control,法国国家计算机技术和控制研究所)发布了这套算法的源码INTRIA Object Detection and Localization Toolkit 。
那么,OpenCV是否原封不动的使用了这套算法呢?为了求证该问题,笔者比较了两者的模型数据,下载OLTbinaries.zip 找到 /HOG/model_4BiSVMLight.alt 文件(二进制形式的数据),用代码将其中数据保存为文本格式(这个要根据上面的源码learncode),其内容如下:
0.05359386f, -0.14721455f, -0.05532170f, 0.05077307f,
0.11547081f, -0.04268804f, 0.04635834f, -0.05468199f, 0.08232084f,
0.10424068f, -0.02294518f, 0.01108519f, 0.01378693f, 0.11193510f,
0.01268418f, 0.08528346f, -0.06309239f, 0.13054633f, 0.08100729f,
-0.05209739f, -0.04315529f, 0.09341384f, 0.11035026f, -0.07596218f,
-0.05517511f, -0.04465296f, 0.02947334f, 0.04555536f,
-3.55954492e-003f, 0.07818956f, 0.07730991f, 0.07890715f, 0.06222893f,
0.09001380f, -0.03574381f, 0.03414327f, 0.05677258f, -0.04773581f,
0.03746637f, -0.03521175f, 0.06955440f, -0.03849038f, 0.01052293f,
0.01736112f, 0.10867710f, 0.08748853f, 3.29739624e-003f, 0.10907028f,
0.07913758f, 0.10393070f, 0.02091867f, 0.11594022f, 0.13182420f,
由此可见,两者的数据一模一样。那么你使用OLT去trainning你的样本,把得到的model替换
getDefaultPeopleDetector() 中的数据,就可以进行你自己的目标检测了!
---------------------------------------------------------------------------------------------------------
训练样本
为了验证这一想法的正确性和可行性,笔者进行了实验,使用的环境为 Ubuntu10.4 g++ 4.4.5:
具体实现步骤如下:
1)下载release版本的程序: :OLTbinaries.zip
2)下载样本:INRIAPerson.tar
3)在OLTbinaries/ 下建立两个文件夹 test, train,将INRIAperson/Test/neg拷贝到 test/ 下,INRIAperson/Train/neg拷贝到 train/ 下;将INRIAperson/test_64x128_H96拷贝到 test/ 下,重命名为 pos, INRIAperson/train_64x128_H96 拷贝到 train/ 下,重命名为 pos;
4)将 test/neg, test/pos 各自文件夹中的所有图片文件名分别输出到 neg.list, pos.list,并放置在 test/ 下,同样的操作在 train/。
amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/test$ ls ./neg > neg.list
amadeuzou@ubuntu:~/Projects/opencv/OLTbinaries/test$ ls ./pos > pos.list
5)到这里,样本数据便准备好了,只要修改 OLTbinaries/runall.sh 相关参数,然后运行这些脚本,一个小时左右的时间,便会在 OLTbinaries/HOG 下产生一个 model_4BiSVMLight.alt文件,模型数据便保存在这里,到这里,你便成功 trainning 了一个 model。
注意事项:
HardOption=" --poscases 2416 --negcases 12180 "
OutDir=HOG
OutFile=$OutDir/record
CMDLINE=$OutDir/record
First
iteration complete
Hard
examples created
Doing
second learning
Second
iteration complete
WIDTH=64; export WIDTH
HEIGHT=128; export HEIGHT