最近在做人头统计方面的研究,尝试了多种办法,其中包括Adaboost+Haar特征、HOG特征+线性SVM两大模型。SVM+HOG的模型之前多数被应用于行人检测,我在做人头统计的过程中考虑到人头的边缘形状比较明显,图像梯度比较容易提取,所以将该方法搬到人头统计上来,效果还不错。不足之处是多尺度的HOG计算太慢了,难以达到实时性要求,所以我采用的多数是单尺度检测(64*64的固定窗口大小)。
我现在的工作只是做了个入门,本文意在抛砖引玉,希望感兴趣的小伙伴能够一起研究。
1、预处理
从视频中采集了1416个人头正样本,以及1957个负样本。正样本主要就是人头部(头发)的图像,负样本是不包括人头目标的图像。然后将它们统一归一化到64*64的大小(我这里为了简化训练过程,将人头图像的大小设置为和窗口大小一致)。
2、人头训练
开发环境是winxp+vs2008+opencv2.3.1。训练代码主要包括MySVM.h,global.h和global.cpp三个文件。其中MySVM.h是SVM类定义文件,global.h和global.cpp分别是全局函数声明和定义文件。
(1)MySVM.h如下:
#pragma once #ifndef _MYSVM_H_ #define _MYSVM_H_ #include <opencv2/opencv.hpp> #include <opencv2/core/core.hpp> #include <opencv2/ml/ml.hpp> #include <opencv2/highgui/highgui.hpp> using namespace cv; class MySVM: public CvSVM { public: int get_alpha_count() { return this->sv_total; } int get_sv_dim() { return this->var_all; } int get_sv_count() { return this->decision_func->sv_count; } double* get_alpha() { return this->decision_func->alpha; } float** get_sv() { return this->sv; } float get_rho() { return this->decision_func->rho; } }; #endif(2)global.h如下:
#include "MySVM.h" #include <fstream> #include <iostream> #include <ctime> using namespace std; //函数名:Train //函数功能:SVM训练每张图片的HOG特征 //参数说明: //const char* positivePath:正样本路径 //int pCount:正样本个数 //const char* negativePath:负样本路径 //int nCount:负样本个数 //const char* classifierSavePath:分类器保存路径 //const char* detectorSavePath:检测器保存路径 //返回bool:训练是否成功(true:成功,false:失败) bool Train(const char* positivePath, int pCount, const char* negativePath, int nCount, const char* classifierSavePath, const char* detectorSavePath); //函数名:CalDimension //函数功能:计算每张图片的HOG特征维度 //参数说明: //CvSize winSize:窗口大小 //CvSize blockSize:块大小 //CvSize blockStride:块位移大小 //CvSize cellSize:胞元大小 //int nbins:bin数 //返回int:HOG特征维度 //参考计算方式详细:http://blog.csdn.net/carson2005/article/details/7782726 //参考参数说明详细:http://blog.csdn.net/raodotcong/article/details/6239431 int CalDimension(CvSize winSize, CvSize blockSize, CvSize blockStride, CvSize cellSize, int nbins); //函数名:DetectMulti //函数功能:用SVM+HOG分类器对图片做多尺度检测 //参数说明: //const char* detectorSavePath:检测器保存路径 //const char* testPath:测试视频路径 //返回bool:检测是否成功(true:成功,false:失败) bool DetectMulti(const char* detectorSavePath, const char* testPath); //函数名:DetectSingle //函数功能:用SVM+HOG分类器对图片做单尺度检测 //参数说明: //const char* detectorSavePath:检测器保存路径 //const char* testPath:测试视频路径 //返回bool:检测是否成功(true:成功,false:失败) bool DetectSingle(const char* classifierSavePath, const char* testPath);(3)global.cpp如下:
#include "global.h" ///////////////////////参数设置/////////////////////////// CvSize winSize = cvSize(64, 64); //等于训练样本图像大小 CvSize blockSize = cvSize(16, 16); //block size CvSize blockStride = cvSize(8, 8); //block stride CvSize winStride = cvSize(8, 8); //window stride CvSize cellSize = cvSize(8, 8); //cell size int nbins = 9; //一般取9个梯度方向 ////////////函数定义////////////////// int CalDimension(CvSize winSize, CvSize blockSize, CvSize blockStride, CvSize cellSize, int nbins) { //一个窗口(winSize)内宽和高方向分别有多少个块(blockSize) //int hBlockNum = (winSize.height - 1) / cellSize.height; //int wBlockNum = (winSize.width - 1) / cellSize.width; int hBlockNum = (winSize.height - blockSize.height) / blockStride.height + 1; int wBlockNum = (winSize.width - blockSize.width) / blockStride.width + 1; //一个块(blockSize)里面有多少个单元(cellSize) int hCellNum = blockSize.height / cellSize.height; int wCellNum = blockSize.width / cellSize.width; //一个单元(cellSize)里面有多少HOG特征维度 int hogNum = nbins; //计算一个窗口的HOG特征维度:block的个数 * block内部cell的个数 * 每个cell的HOG特征维度 int totalHogNum = (hBlockNum * wBlockNum) * (hCellNum * wCellNum) * hogNum; return totalHogNum; } bool Train(const char* positivePath, int pCount, const char* negativePath, int nCount, const char* classifierSavePath, const char* detectorSavePath) { cout<<"******************** Train ********************"<<endl; //首先计算图像的HOG特征维度 int dim = CalDimension(winSize, blockSize, blockStride, cellSize, nbins); int totalCount = pCount + nCount; cout<<"1: Start trainning for SVM:"<<endl; cout<<"total samples: "<<totalCount<<endl; cout<<"positive samples: "<<pCount<<endl; cout<<"negative samples: "<<nCount<<endl; cout<<"feature dimension is: "<<dim<<endl<<endl; //训练正样本 cout<<"2: Start to train positive samples:"<<endl; CvMat *sampleFeaturesMat = cvCreateMat(totalCount , dim, CV_32FC1); //64*128的训练样本,该矩阵将是totalSample*3780 //64*64的训练样本,该矩阵将是totalSample*1764 cvSetZero(sampleFeaturesMat); CvMat *sampleLabelMat = cvCreateMat(totalCount, 1, CV_32FC1);//样本标识 cvSetZero(sampleLabelMat); char positiveImgPath[256]; for(int i=0; i<pCount; i++) { //载入图像 sprintf(positiveImgPath, "%s%d.bmp", positivePath, i); string strPosPath(positiveImgPath); cv::Mat img = cv::imread(strPosPath); if( img.data == NULL ) { cout<<"positive image sample load error: "<<i<<" "<<strPosPath<<endl; //return false; //system("pause"); continue; } cv::HOGDescriptor hog(winSize, blockSize, blockStride, cellSize, nbins); vector<float> featureVec; hog.compute(img, featureVec, winStride); //计算HOG特征向量 int featureVecSize = featureVec.size(); //加上类标,转化为CvMat for (int j=0; j<featureVecSize; j++) { CV_MAT_ELEM( *sampleFeaturesMat, float, i, j ) = featureVec[j]; } sampleLabelMat->data.fl[i] = 1; } cout<<"End of training for positive samples."<<endl<<endl; //训练负样本 cout<<"3: Start to train negative samples: "<<endl; char negativeImgPath[256]; for (int i=0; i<nCount; i++) { //载入图像 sprintf(negativeImgPath, "%s%d.bmp", negativePath, i); string strNegPath(negativeImgPath); cv::Mat img = cv::imread(strNegPath); if(img.data == NULL) { cout<<"negative image sample load error: "<<strNegPath<<endl; //return false; //system("pause"); continue; } cv::HOGDescriptor hog(winSize, blockSize, blockStride, cellSize, nbins); vector<float> featureVec; hog.compute(img,featureVec, winStride);//计算HOG特征向量 int featureVecSize = featureVec.size(); for ( int j=0; j<featureVecSize; j ++) { CV_MAT_ELEM( *sampleFeaturesMat, float, i + pCount, j ) = featureVec[ j ]; } sampleLabelMat->data.fl[ i + pCount ] = -1; } cout<<"End of training for negative samples."<<endl<<endl; //SVM训练 cout<<"4: Start to train SVM classifier: "<<endl; //设置SVM参数 CvSVMParams params; int iteration = 1000; double penaltyFactor = 0.01; params.svm_type = CvSVM::C_SVC; params.kernel_type = CvSVM::LINEAR; params.term_crit = cvTermCriteria(CV_TERMCRIT_ITER, iteration, FLT_EPSILON); params.C = penaltyFactor; //print cout<<"svm_type: C_SVC\nkernel_type: LINEAR\ntermination type: CV_TERMCRIT_ITER" <<"\ntermination iteration: "<<iteration<<"\ntermination epsilon: "<<FLT_EPSILON <<"\npenalty factor: "<<penaltyFactor<<endl; MySVM svm; svm.train( sampleFeaturesMat, sampleLabelMat, NULL, NULL, params ); //用线性SVM分类器训练 svm.save(classifierSavePath); //将SVM训练完的数据保存到指定的文件中 cvReleaseMat(&sampleFeaturesMat); cvReleaseMat(&sampleLabelMat); int supportVectorSize = svm.get_support_vector_count(); cout<<"\nsupport vector size of SVM:"<<supportVectorSize<<endl; cout<<"End of training SVM classifier."<<endl<<endl; //保存用于检测的HOG特征 cout<<"5. Save SVM detector file: "<<endl; CvMat *sv,*alp,*re;//所有样本特征向量 sv = cvCreateMat(supportVectorSize , dim, CV_32FC1); alp = cvCreateMat(1 , supportVectorSize, CV_32FC1); re = cvCreateMat(1 , dim, CV_32FC1); CvMat *res = cvCreateMat(1 , 1, CV_32FC1); cvSetZero(sv); cvSetZero(re); for(int i=0; i<supportVectorSize; i++) { memcpy( (float*)(sv->data.fl+i*dim), svm.get_support_vector(i), dim*sizeof(float)); } double* alphaArr = svm.get_alpha(); int alphaCount = svm.get_alpha_count(); for(int i=0; i<supportVectorSize; i++) { alp->data.fl[i] = alphaArr[i]; } cvMatMul(alp, sv, re); int posCount = 0; for (int i=0; i<dim; i++) { re->data.fl[i] *= -1; } //保存为文本文件 FILE* fp = fopen(detectorSavePath,"wb"); if( NULL == fp ) { return false; } for(int i=0; i<dim; i++) { fprintf(fp,"%f \n",re->data.fl[i]); } float rho = svm.get_rho(); fprintf(fp, "%f", rho); fclose(fp); cout<<"Save "<<detectorSavePath<<" OK!"<<endl; return true; } //使用detectMultiScale检测 bool DetectMulti(const char* detectorSavePath, const char* testPath) { cout<<"\n******************** Detection Multi********************"<<endl; CvCapture* cap = cvCreateFileCapture(testPath); if (!cap) { cout<<"avi file load error..."<<endl; return false; } vector<float> x; ifstream fileIn(detectorSavePath, ios::in); float val = 0.0f; while(!fileIn.eof()) { fileIn>>val; x.push_back(val); } fileIn.close(); vector<cv::Rect> found; cv::HOGDescriptor hog(winSize, blockSize, blockStride, cellSize, nbins); hog.setSVMDetector(x); IplImage* img = NULL; cvNamedWindow("img", 0); cvNamedWindow("video", 0); int frameCount = 0; double timeSum = 0.0; while(img=cvQueryFrame(cap)) { cvShowImage("video", img); frameCount++; double begin = clock(); hog.detectMultiScale(img, found, 0, winStride, cv::Size(0,0), 1.05, 2); double end = clock(); double diff = (end-begin)/CLOCKS_PER_SEC*1000; timeSum += diff; cout<< "Detection time is: "<<diff<<"ms"<<endl; if (found.size() > 0) { for (int i=0; i<found.size(); i++) { CvRect tempRect = cvRect(found[i].x, found[i].y, found[i].width, found[i].height); cvRectangle(img, cvPoint(tempRect.x,tempRect.y), cvPoint(tempRect.x+tempRect.width,tempRect.y+tempRect.height),CV_RGB(255,0,0), 2); } } cvShowImage("img", img); if (cvWaitKey(1) == 27) { break; } } cvReleaseCapture(&cap); cout<< "Average detection time is: "<<timeSum / frameCount<<"ms"<<endl; return true; } //使用detect检测 bool DetectSingle(const char* detectorSavePath, const char* testPath) { cout<<"\n******************** Detection Single********************"<<endl; CvCapture* cap = cvCreateFileCapture(testPath); if (!cap) { cout<<"avi file load error..."<<endl; return false; } vector<float> x; ifstream fileIn(detectorSavePath, ios::in); float val = 0.0f; while(!fileIn.eof()) { fileIn>>val; x.push_back(val); } fileIn.close(); vector<cv::Point> found; cv::HOGDescriptor hog(winSize, blockSize, blockStride, cellSize, nbins); hog.setSVMDetector(x); IplImage* img = NULL; cvNamedWindow("img", 0); cvNamedWindow("video", 0); int frameCount = 0; double timeSum = 0.0; while(img=cvQueryFrame(cap)) { cvShowImage("video", img); frameCount++; double begin = clock(); //检测:found为检测目标的左上角坐标点 hog.detect(img, found, 0, winStride, cvSize(0,0)); double end = clock(); double diff = (end-begin)/CLOCKS_PER_SEC*1000; timeSum += diff; cout<< "Detection time is: "<<diff<<"ms"<<endl; if (found.size() > 0) { for (int i=0; i<found.size(); i++) { CvRect tempRect = cvRect(found[i].x, found[i].y, winSize.width, winSize.height); cvRectangle(img, cvPoint(tempRect.x,tempRect.y), cvPoint(tempRect.x+tempRect.width,tempRect.y+tempRect.height),CV_RGB(255,0,0), 2); } } cvShowImage("img", img); if (cvWaitKey(1) == 27) { break; } } cvReleaseCapture(&cap); cout<< "Average detection time is: "<<timeSum / frameCount<<"ms"<<endl; return true; }3、人头检测
主文件main.cpp如下:
#include "global.h" //SVM分类器文件 const char* classifierSavePath = ".\\HOG_SVM.xml"; //HOG检测器文件 const char* detectorSavePath = ".\\HogDetector.txt"; //正负样本存储路径 const char* positivePath = ".\\pos_64_64\\"; const char* negativePath = ".\\neg_64_64\\"; //正负样本数目 const int pCount = 1416; const int nCount = 1957; //测试视频文件路径 const char* testVideoPath = ".\\test.avi"; int main(int argc, char* argv[]) { bool flag; ////////////////训练//////////////// flag = Train(positivePath, pCount, negativePath, nCount, classifierSavePath, detectorSavePath); if (!flag) { cout<<"Train error!\n"; return -1; } ////////////////检测-单尺度/////////////// flag = DetectSingle(detectorSavePath, testVideoPath); if (!flag) { cout<<"Detection error!\n"; return -1; } ////////////////检测-多尺度/////////////// //flag = DetectMulti(detectorSavePath, testVideoPath); //if (!flag) //{ // cout<<"Detection error!\n"; // return -1; //} system("pause"); return 0; }
采集528幅图像使用xml分类器文件进行分类,一般识别准确率在99%左右;但是在对视频(CIF)做检测的时候,由于是滑动窗口的检测机制,准确率达不到这么高,大概有95%上下,每帧时间大概是31ms上下。除此之外,对于高分辨率视频的检测速度以及误检是个问题:误检主要是将静止物体识别为人头,或者是阴影的干扰,将非人头的运动物体识别为人头的情况比较少。下一步打算使用别的特征采取特征的融合,或者是结合背景建模去除静止物体等方法进行尝试,此外阴影消除算法也在考虑之列。
运行时间截图:
说明:
1、在training的时候一般把sample大小设置成窗口大小一样,开始可能需要resize sample(为了处理多尺度问题,可以使用multi-scale hog feature,然后用PCA降维)。
2、最后对检测出来的目标矩形框可能有多个,要采用一些方法如矩形合并法来处理,比如说多个目标框嵌套着,如果其中一个矩形框的中心在另一个矩形框中,则将这两个矩形框合并起来,直到最后合并到一个矩形框。这里的校正系数group_threshold(参考groupRectangles()函数)也能起到辅助找最合适的目标矩形框的作用。
3、因为hog检测出的矩形框比实际人体框要稍微大些,所以需要对这些矩形框大小尺寸做一些调整,比如更改参数scale0的值。