这一周研究了下HOG行人检测算法,一开始看源码还真不习惯,不过仔细研究了下,对于算法的具体实现有了更多的了解。
/*M/////////////////////////////////////////////////////////////////////////////////////// // // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. // // By downloading, copying, installing or using the software you agree to this license. // If you do not agree to this license, do not download, install, // copy or use the software. // // // License Agreement // For Open Source Computer Vision Library // // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. // Copyright (C) 2009, Willow Garage Inc., all rights reserved. // Third party copyrights are property of their respective owners. // // Redistribution and use in source and binary forms, with or without modification, // are permitted provided that the following conditions are met: // // * Redistribution's of source code must retain the above copyright notice, // this list of conditions and the following disclaimer. // // * Redistribution's in binary form must reproduce the above copyright notice, // this list of conditions and the following disclaimer in the documentation // and/or other materials provided with the distribution. // // * The name of the copyright holders may not be used to endorse or promote products // derived from this software without specific prior written permission. // // This software is provided by the copyright holders and contributors "as is" and // any express or implied warranties, including, but not limited to, the implied // warranties of merchantability and fitness for a particular purpose are disclaimed. // In no event shall the Intel Corporation or contributors be liable for any direct, // indirect, incidental, special, exemplary, or consequential damages // (including, but not limited to, procurement of substitute goods or services; // loss of use, data, or profits; or business interruption) however caused // and on any theory of liability, whether in contract, strict liability, // or tort (including negligence or otherwise) arising in any way out of // the use of this software, even if advised of the possibility of such damage. // //M*/ #include <stdio.h> #include "precomp.hpp" #include <iterator> #ifdef HAVE_IPP #include "ipp.h" #endif /****************************************************************************************\ The code below is implementation of HOG (Histogram-of-Oriented Gradients) descriptor and object detection, introduced by Navneet Dalal and Bill Triggs. The computed feature vectors are compatible with the INRIA Object Detection and Localization Toolkit (http://pascal.inrialpes.fr/soft/olt/) \****************************************************************************************/ //HOG一般调用过程:初始化实例化一个HOGDescriptor(各种参数及svmDetector系数),读入单幅图像, //调用detector方法,得到行人可能存在矩阵集合hits。 //高斯函数求权重、三线性插值求权重 缓存机制实现过程 //算法涉及概念 //getblock()表示的是给定block在滑动窗口的位置以及图片的hog缓存指针,来获得本次block中计算hog特征所需要的信息。 //normalizeBlockHistogram()指对block获取到的hog部分描述子进行归一化,其实该归一化有2层,具体看代码。 //windowsInImage()实现的功能是给定测试图片和滑动窗口移动的大小,来获得该层中水平和垂直方向上需要滑动多少个滑动窗口。 //getWindow()值获得一个滑动窗口矩形。 //compute()是实际上计算hog描述子的函数,在测试和训练阶段都能用到。 //detect()是检测目标是用到的函数,在detectMultiScale()函数内部被调用。 //为了便于分析,我们假定win窗口大小为128x64,block为16x16,cell为8x8,winStride为8,可知一个block中有4个 //cell。win中有(128-16+8)/8x(64-16+8)/8=15x7=105个block。而一个cell的直方图为9维,那么一个对于整个 //win而言,产生的直方图为105x4x9=3780维。 namespace cv { //获得一个win中获得3780维的直方图描述子。 size_t HOGDescriptor::getDescriptorSize() const { CV_Assert(blockSize.width % cellSize.width == 0 && blockSize.height % cellSize.height == 0); CV_Assert((winSize.width - blockSize.width) % blockStride.width == 0 && (winSize.height - blockSize.height) % blockStride.height == 0 ); return (size_t)nbins* (blockSize.width/cellSize.width)* (blockSize.height/cellSize.height)* ((winSize.width - blockSize.width)/blockStride.width + 1)* ((winSize.height - blockSize.height)/blockStride.height + 1); } double HOGDescriptor::getWinSigma() const { //win设定为计算高斯权重时的方差,默认值为4。 return winSigma >= 0 ? winSigma : (blockSize.width + blockSize.height)/8.; } bool HOGDescriptor::checkDetectorSize() const { size_t detectorSize = svmDetector.size(), descriptorSize = getDescriptorSize(); return detectorSize == 0 || detectorSize == descriptorSize || detectorSize == descriptorSize + 1;//比描述子长度大1表示偏置 } void HOGDescriptor::setSVMDetector(InputArray _svmDetector) { //convertTo改变图像Mat属性,如改变图像深度、通道数等等。 _svmDetector.getMat().convertTo(svmDetector, CV_32F); CV_Assert( checkDetectorSize() ); } #define CV_TYPE_NAME_HOG_DESCRIPTOR "opencv-object-detector-hog" //把文件节点的内容读取到类的成员变量中。 bool HOGDescriptor::read(FileNode& obj) { if( !obj.isMap() ) return false; FileNodeIterator it = obj["winSize"].begin(); //依次读入winSize、blockSize、blockStride、cellSize、nbins、derivAperture、winSigma、 //histogramNormType、L2HysThreshold、gammaCorrection、nlevels。 it >> winSize.width >> winSize.height; it = obj["blockSize"].begin(); it >> blockSize.width >> blockSize.height; it = obj["blockStride"].begin(); it >> blockStride.width >> blockStride.height; it = obj["cellSize"].begin(); it >> cellSize.width >> cellSize.height; obj["nbins"] >> nbins; obj["derivAperture"] >> derivAperture; obj["winSigma"] >> winSigma; obj["histogramNormType"] >> histogramNormType; obj["L2HysThreshold"] >> L2HysThreshold; obj["gammaCorrection"] >> gammaCorrection; obj["nlevels"] >> nlevels; FileNode vecNode = obj["SVMDetector"]; if( vecNode.isSeq() ) { vecNode >> svmDetector; CV_Assert(checkDetectorSize()); } return true; } //将hog描述子内的变量输入到文件fs中,且每次输入前都输入一个名字与其对应,因此这些借点是mapping类型。 void HOGDescriptor::write(FileStorage& fs, const String& objName) const { if( !objName.empty() ) fs << objName; fs << "{" CV_TYPE_NAME_HOG_DESCRIPTOR << "winSize" << winSize << "blockSize" << blockSize << "blockStride" << blockStride << "cellSize" << cellSize << "nbins" << nbins << "derivAperture" << derivAperture << "winSigma" << getWinSigma() << "histogramNormType" << histogramNormType << "L2HysThreshold" << L2HysThreshold << "gammaCorrection" << gammaCorrection << "nlevels" << nlevels; if( !svmDetector.empty() ) fs << "SVMDetector" << svmDetector; fs << "}"; } //从给定的文件中读取参数。 bool HOGDescriptor::load(const String& filename, const String& objname) { FileStorage fs(filename, FileStorage::READ); FileNode obj = !objname.empty() ? fs[objname] : fs.getFirstTopLevelNode(); return read(obj); } //将文件的参数以文件借点的形式写入文件中。 void HOGDescriptor::save(const String& filename, const String& objName) const { FileStorage fs(filename, FileStorage::WRITE); write(fs, !objName.empty() ? objName : FileStorage::getDefaultObjectName(filename)); } //复制HOG描述子到c中。 void HOGDescriptor::copyTo(HOGDescriptor& c) const { c.winSize = winSize; c.blockSize = blockSize; c.blockStride = blockStride; c.cellSize = cellSize; c.nbins = nbins; c.derivAperture = derivAperture; c.winSigma = winSigma; c.histogramNormType = histogramNormType; c.L2HysThreshold = L2HysThreshold; c.gammaCorrection = gammaCorrection; c.svmDetector = svmDetector; c.nlevels = nlevels; } //完成梯度幅度图和梯度方向值的计算。利用mapBuffer计算拓展图像与原始图像映射之间的关系。 void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle, Size paddingTL, Size paddingBR) const { CV_Assert( img.type() == CV_8U || img.type() == CV_8UC3 ); Size gradsize(img.cols + paddingTL.width + paddingBR.width, img.rows + paddingTL.height + paddingBR.height); grad.create(gradsize, CV_32FC2); // <magnitude*(1-alpha), magnitude*alpha> qangle.create(gradsize, CV_8UC2); // [0..nbins-1] - quantized gradient orientation Size wholeSize; Point roiofs; img.locateROI(wholeSize, roiofs); int i, x, y; int cn = img.channels(); Mat_<float> _lut(1, 256); const float* lut = &_lut(0,0); if( gammaCorrection ) for( i = 0; i < 256; i++ ) _lut(0,i) = std::sqrt((float)i); else for( i = 0; i < 256; i++ ) _lut(0,i) = (float)i; AutoBuffer<int> mapbuf(gradsize.width + gradsize.height + 4); int* xmap = (int*)mapbuf + 1; int* ymap = xmap + gradsize.width + 2; const int borderType = (int)BORDER_REFLECT_101; //利用borderInterpolate函数完成拓展图像和原始图像位置映射关系。 for( x = -1; x < gradsize.width + 1; x++ ) xmap[x] = borderInterpolate(x - paddingTL.width + roiofs.x, wholeSize.width, borderType) - roiofs.x; for( y = -1; y < gradsize.height + 1; y++ ) ymap[y] = borderInterpolate(y - paddingTL.height + roiofs.y, wholeSize.height, borderType) - roiofs.y; // x- & y- derivatives for the whole row //创建连续内存空间_dbuf,完成对dx、dy、mag、angle的存放;利用angleScale系数将(0,2pi)划分为 //18个单元,之后取整并利用hidx-=nbins操作,将其归拢到九个单元中。 int width = gradsize.width; AutoBuffer<float> _dbuf(width*4); float* dbuf = _dbuf; Mat Dx(1, width, CV_32F, dbuf); Mat Dy(1, width, CV_32F, dbuf + width); Mat Mag(1, width, CV_32F, dbuf + width*2); Mat Angle(1, width, CV_32F, dbuf + width*3); int _nbins = nbins; float angleScale = (float)(_nbins/CV_PI); #ifdef HAVE_IPP Mat lutimg(img.rows,img.cols,CV_MAKETYPE(CV_32F,cn)); Mat hidxs(1, width, CV_32F); Ipp32f* pHidxs = (Ipp32f*)hidxs.data; Ipp32f* pAngles = (Ipp32f*)Angle.data; IppiSize roiSize; roiSize.width = img.cols; roiSize.height = img.rows; for( y = 0; y < roiSize.height; y++ ) { const uchar* imgPtr = img.data + y*img.step; float* imglutPtr = (float*)(lutimg.data + y*lutimg.step); for( x = 0; x < roiSize.width*cn; x++ ) { imglutPtr[x] = lut[imgPtr[x]]; } } #endif for( y = 0; y < gradsize.height; y++ ) { #ifdef HAVE_IPP const float* imgPtr = (float*)(lutimg.data + lutimg.step*ymap[y]); const float* prevPtr = (float*)(lutimg.data + lutimg.step*ymap[y-1]); const float* nextPtr = (float*)(lutimg.data + lutimg.step*ymap[y+1]); #else const uchar* imgPtr = img.data + img.step*ymap[y]; const uchar* prevPtr = img.data + img.step*ymap[y-1]; const uchar* nextPtr = img.data + img.step*ymap[y+1]; #endif float* gradPtr = (float*)grad.ptr(y); uchar* qanglePtr = (uchar*)qangle.ptr(y); if( cn == 1 ) { for( x = 0; x < width; x++ ) { int x1 = xmap[x]; #ifdef HAVE_IPP dbuf[x] = (float)(imgPtr[xmap[x+1]] - imgPtr[xmap[x-1]]); dbuf[width + x] = (float)(nextPtr[x1] - prevPtr[x1]); #else dbuf[x] = (float)(lut[imgPtr[xmap[x+1]]] - lut[imgPtr[xmap[x-1]]]); dbuf[width + x] = (float)(lut[nextPtr[x1]] - lut[prevPtr[x1]]); #endif } } else { for( x = 0; x < width; x++ ) { int x1 = xmap[x]*3; float dx0, dy0, dx, dy, mag0, mag; #ifdef HAVE_IPP const float* p2 = imgPtr + xmap[x+1]*3; const float* p0 = imgPtr + xmap[x-1]*3; dx0 = p2[2] - p0[2]; dy0 = nextPtr[x1+2] - prevPtr[x1+2]; mag0 = dx0*dx0 + dy0*dy0; dx = p2[1] - p0[1]; dy = nextPtr[x1+1] - prevPtr[x1+1]; mag = dx*dx + dy*dy; if( mag0 < mag ) { dx0 = dx; dy0 = dy; mag0 = mag; } dx = p2[0] - p0[0]; dy = nextPtr[x1] - prevPtr[x1]; mag = dx*dx + dy*dy; #else const uchar* p2 = imgPtr + xmap[x+1]*3; const uchar* p0 = imgPtr + xmap[x-1]*3; dx0 = lut[p2[2]] - lut[p0[2]]; dy0 = lut[nextPtr[x1+2]] - lut[prevPtr[x1+2]]; mag0 = dx0*dx0 + dy0*dy0; dx = lut[p2[1]] - lut[p0[1]]; dy = lut[nextPtr[x1+1]] - lut[prevPtr[x1+1]]; mag = dx*dx + dy*dy; if( mag0 < mag ) { dx0 = dx; dy0 = dy; mag0 = mag; } dx = lut[p2[0]] - lut[p0[0]]; dy = lut[nextPtr[x1]] - lut[prevPtr[x1]]; mag = dx*dx + dy*dy; #endif if( mag0 < mag ) { dx0 = dx; dy0 = dy; mag0 = mag; } dbuf[x] = dx0; dbuf[x+width] = dy0; } } #ifdef HAVE_IPP ippsCartToPolar_32f((const Ipp32f*)Dx.data, (const Ipp32f*)Dy.data, (Ipp32f*)Mag.data, pAngles, width); for( x = 0; x < width; x++ ) { if(pAngles[x] < 0.f) pAngles[x] += (Ipp32f)(CV_PI*2.); } ippsNormalize_32f(pAngles, pAngles, width, 0.5f/angleScale, 1.f/angleScale); ippsFloor_32f(pAngles,(Ipp32f*)hidxs.data,width); ippsSub_32f_I((Ipp32f*)hidxs.data,pAngles,width); ippsMul_32f_I((Ipp32f*)Mag.data,pAngles,width); ippsSub_32f_I(pAngles,(Ipp32f*)Mag.data,width); ippsRealToCplx_32f((Ipp32f*)Mag.data,pAngles,(Ipp32fc*)gradPtr,width); #else //cartToPolar()函数是计算2个矩阵对应元素的幅度和角度,最后一个参数表示是否角度使用角度表示,这里false //表示不用角度表示,即用弧度表示。如果只需计算2个矩阵对应元素的幅度图像,可以采用magnitute()函数。 //-pi/2<Angle<pi/2; cartToPolar( Dx, Dy, Mag, Angle, false ); #endif for( x = 0; x < width; x++ ) { #ifdef HAVE_IPP int hidx = (int)pHidxs[x]; #else float mag = dbuf[x+width*2], angle = dbuf[x+width*3]*angleScale - 0.5f; int hidx = cvFloor(angle); angle -= hidx; gradPtr[x*2] = mag*(1.f - angle); gradPtr[x*2+1] = mag*angle; #endif if( hidx < 0 ) hidx += _nbins; else if( hidx >= _nbins ) hidx -= _nbins; assert( (unsigned)hidx < (unsigned)_nbins ); qanglePtr[x*2] = (uchar)hidx; hidx++; hidx &= hidx < _nbins ? -1 : 0; qanglePtr[x*2+1] = (uchar)hidx; } } } struct HOGCache { /* 结构体BlockData中有2个变量。1个BlockData结构体是对应的一个block数据。 histOfs和imgOffset.其中histOfs表示为该block对整个滑动窗口内hog描述算子 的贡献那部分向量的起始位置;imgOffset为该block在滑动窗口图片中的坐标(当然是指左上角坐标)。 */ struct BlockData { BlockData() : histOfs(0), imgOffset() {} int histOfs; Point imgOffset; }; /*结构体PixData中有5个变量,1个PixData结构体是对应的block中1个像素点的数据。其中gradOfs表示 该点的梯度幅度在滑动窗口图片梯度幅度图中的位置坐标;qangleOfs表示该点的梯度角度在滑动窗口 图片梯度角度图中的位置坐标;histOfs[]表示该像素点对1个或2个或4个cell贡献的hog描述子向量的 起始位置坐标(比较抽象,需要看源码才懂)。histWeight[]表示该像素点对1个或2个或4个cell贡献 的权重。gradWeight表示该点本身由于处在block中位置的不同因而对梯度直方图贡献也不同,其权值 按照二维高斯分布(以block中心为二维高斯的中心)来决定。 */ struct PixData { size_t gradOfs, qangleOfs; int histOfs[4]; float histWeights[4]; float gradWeight; }; HOGCache(); HOGCache(const HOGDescriptor* descriptor, const Mat& img, Size paddingTL, Size paddingBR, bool useCache, Size cacheStride); virtual ~HOGCache() {}; virtual void init(const HOGDescriptor* descriptor, const Mat& img, Size paddingTL, Size paddingBR, bool useCache, Size cacheStride); Size windowsInImage(Size imageSize, Size winStride) const; Rect getWindow(Size imageSize, Size winStride, int idx) const; const float* getBlock(Point pt, float* buf); virtual void normalizeBlockHistogram(float* histogram) const; vector<PixData> pixData; vector<BlockData> blockData; bool useCache; vector<int> ymaxCached; Size winSize, cacheStride; Size nblocks, ncells; int blockHistogramSize; int count1, count2, count4; Point imgoffset; Mat_<float> blockCache; Mat_<uchar> blockCacheFlags; Mat grad, qangle; const HOGDescriptor* descriptor; }; HOGCache::HOGCache() { useCache = false; blockHistogramSize = count1 = count2 = count4 = 0; descriptor = 0; } HOGCache::HOGCache(const HOGDescriptor* _descriptor, const Mat& _img, Size _paddingTL, Size _paddingBR, bool _useCache, Size _cacheStride) { init(_descriptor, _img, _paddingTL, _paddingBR, _useCache, _cacheStride); } //完成对HOGCache的初始化工作,包括对梯度幅值、方向值的计算,对blockData及pixData的计算过程 void HOGCache::init(const HOGDescriptor* _descriptor, const Mat& _img, Size _paddingTL, Size _paddingBR, bool _useCache, Size _cacheStride) { descriptor = _descriptor; cacheStride = _cacheStride; useCache = _useCache; descriptor->computeGradient(_img, grad, qangle, _paddingTL, _paddingBR); imgoffset = _paddingTL; winSize = descriptor->winSize; Size blockSize = descriptor->blockSize; Size blockStride = descriptor->blockStride; Size cellSize = descriptor->cellSize; int i, j, nbins = descriptor->nbins; int rawBlockSize = blockSize.width*blockSize.height; nblocks = Size((winSize.width - blockSize.width)/blockStride.width + 1, (winSize.height - blockSize.height)/blockStride.height + 1); ncells = Size(blockSize.width/cellSize.width, blockSize.height/cellSize.height); blockHistogramSize = ncells.width*ncells.height*nbins; if( useCache ) { Size cacheSize((grad.cols - blockSize.width)/cacheStride.width+1, (winSize.height/cacheStride.height)+1); blockCache.create(cacheSize.height, cacheSize.width*blockHistogramSize); blockCacheFlags.create(cacheSize); size_t cacheRows = blockCache.rows; ymaxCached.resize(cacheRows); for(size_t ii = 0; ii < cacheRows; ii++ ) ymaxCached[ii] = -1; } Mat_<float> weights(blockSize); float sigma = (float)descriptor->getWinSigma(); float scale = 1.f/(sigma*sigma*2); for(i = 0; i < blockSize.height; i++) for(j = 0; j < blockSize.width; j++) { float di = i - blockSize.height*0.5f; float dj = j - blockSize.width*0.5f; weights(i,j) = std::exp(-(di*di + dj*dj)*scale); } blockData.resize(nblocks.width*nblocks.height); //这里*3不是表示三通道而是划分为三类像素区分存放。这里申请的内存空间远远超过实际所需,但为了计算方便 //需先如此申请,后再进行压缩。 pixData.resize(rawBlockSize*3); // Initialize 2 lookup tables, pixData & blockData. // Here is why: // // The detection algorithm runs in 4 nested loops (at each pyramid layer): // loop over the windows within the input image // loop over the blocks within each window // loop over the cells within each block // loop over the pixels in each cell // // As each of the loops runs over a 2-dimensional array, // we could get 8(!) nested loops in total, which is very-very slow. // // To speed the things up, we do the following: // 1. loop over windows is unrolled in the HOGDescriptor::{compute|detect} methods; // inside we compute the current search window using getWindow() method. // Yes, it involves some overhead (function call + couple of divisions), // but it's tiny in fact. // 2. loop over the blocks is also unrolled. Inside we use pre-computed blockData[j] // to set up gradient and histogram pointers. // 3. loops over cells and pixels in each cell are merged // (since there is no overlap between cells, each pixel in the block is processed once) // and also unrolled. Inside we use PixData[k] to access the gradient values and // update the histogram // count1 = count2 = count4 = 0; for( j = 0; j < blockSize.width; j++ ) for( i = 0; i < blockSize.height; i++ ) { PixData* data = 0; float cellX = (j+0.5f)/cellSize.width - 0.5f; float cellY = (i+0.5f)/cellSize.height - 0.5f; int icellX0 = cvFloor(cellX); int icellY0 = cvFloor(cellY); int icellX1 = icellX0 + 1, icellY1 = icellY0 + 1; cellX -= icellX0; cellY -= icellY0; if( (unsigned)icellX0 < (unsigned)ncells.width && (unsigned)icellX1 < (unsigned)ncells.width ) { if( (unsigned)icellY0 < (unsigned)ncells.height && (unsigned)icellY1 < (unsigned)ncells.height ) { //即一个区域内进行直方图统计,最多包含四个cell的不同直方图,histOfs[i]表示每个区域中的 //第i个直方图在整个block直方图存储空间中的距离原始位置的偏置。 //当前像素对哪个直方图做出贡献 data = &pixData[rawBlockSize*2 + (count4++)]; data->histOfs[0] = (icellX0*ncells.height + icellY0)*nbins; data->histWeights[0] = (1.f - cellX)*(1.f - cellY); data->histOfs[1] = (icellX1*ncells.height + icellY0)*nbins; data->histWeights[1] = cellX*(1.f - cellY); data->histOfs[2] = (icellX0*ncells.height + icellY1)*nbins; data->histWeights[2] = (1.f - cellX)*cellY; data->histOfs[3] = (icellX1*ncells.height + icellY1)*nbins; data->histWeights[3] = cellX*cellY; } else { data = &pixData[rawBlockSize + (count2++)]; if( (unsigned)icellY0 < (unsigned)ncells.height ) { icellY1 = icellY0; cellY = 1.f - cellY; } data->histOfs[0] = (icellX0*ncells.height + icellY1)*nbins; data->histWeights[0] = (1.f - cellX)*cellY; data->histOfs[1] = (icellX1*ncells.height + icellY1)*nbins; data->histWeights[1] = cellX*cellY; data->histOfs[2] = data->histOfs[3] = 0; data->histWeights[2] = data->histWeights[3] = 0; } } else { if( (unsigned)icellX0 < (unsigned)ncells.width ) { icellX1 = icellX0; cellX = 1.f - cellX; } if( (unsigned)icellY0 < (unsigned)ncells.height && (unsigned)icellY1 < (unsigned)ncells.height ) { data = &pixData[rawBlockSize + (count2++)]; data->histOfs[0] = (icellX1*ncells.height + icellY0)*nbins; data->histWeights[0] = cellX*(1.f - cellY); data->histOfs[1] = (icellX1*ncells.height + icellY1)*nbins; data->histWeights[1] = cellX*cellY; data->histOfs[2] = data->histOfs[3] = 0; data->histWeights[2] = data->histWeights[3] = 0; } else { data = &pixData[count1++]; if( (unsigned)icellY0 < (unsigned)ncells.height ) { icellY1 = icellY0; cellY = 1.f - cellY; } data->histOfs[0] = (icellX1*ncells.height + icellY1)*nbins; data->histWeights[0] = cellX*cellY; data->histOfs[1] = data->histOfs[2] = data->histOfs[3] = 0; data->histWeights[1] = data->histWeights[2] = data->histWeights[3] = 0; } } //grad记录每个像素所属bin对应的权重的矩阵,为幅值乘以权重,这个权重包括高斯权重、三次差值的权重。 //qangle记录每个像素角度所属的bin序号的矩阵,均为2通道,为了线性 data->gradOfs = (grad.cols*i + j)*2; data->qangleOfs = (qangle.cols*i + j)*2; data->gradWeight = weights(i,j); } assert( count1 + count2 + count4 == rawBlockSize ); // defragment pixData //重新整理数据使其连贯存储,节省了2/3的内存空间 for( j = 0; j < count2; j++ ) pixData[j + count1] = pixData[j + rawBlockSize]; for( j = 0; j < count4; j++ ) pixData[j + count1 + count2] = pixData[j + rawBlockSize*2]; count2 += count1; count4 += count2; //上面是初始化pixData,下面开始初始化blockData // initialize blockData for( j = 0; j < nblocks.width; j++ ) for( i = 0; i < nblocks.height; i++ ) { //histOfs表示该block对检测窗口贡献的hog描述变量起点在整个变量中的坐标 BlockData& data = blockData[j*nblocks.height + i]; data.histOfs = (j*nblocks.height + i)*blockHistogramSize; //imgOffset表示该block的左上角在检测窗口中的坐标 data.imgOffset = Point(j*blockStride.width,i*blockStride.height); } } //pt为该block左上角在滑动窗口中的坐标,buf为指向检测窗口中blockData的指针 //函数返回一个指向block描述子的指针。 const float* HOGCache::getBlock(Point pt, float* buf) { float* blockHist = buf; assert(descriptor != 0); Size blockSize = descriptor->blockSize; pt += imgoffset; CV_Assert( (unsigned)pt.x <= (unsigned)(grad.cols - blockSize.width) && (unsigned)pt.y <= (unsigned)(grad.rows - blockSize.height) ); if( useCache ) { CV_Assert( pt.x % cacheStride.width == 0 && pt.y % cacheStride.height == 0 ); //cacheIdx表示的是block个数的坐标 Point cacheIdx(pt.x/cacheStride.width, (pt.y/cacheStride.height) % blockCache.rows); //ymaxCached的长度为一个检测窗口垂直方向上容纳的block个数 if( pt.y != ymaxCached[cacheIdx.y] ) { Mat_<uchar> cacheRow = blockCacheFlags.row(cacheIdx.y); cacheRow = (uchar)0; ymaxCached[cacheIdx.y] = pt.y; } blockHist = &blockCache[cacheIdx.y][cacheIdx.x*blockHistogramSize]; uchar& computedFlag = blockCacheFlags(cacheIdx.y, cacheIdx.x); if( computedFlag != 0 ) return blockHist; computedFlag = (uchar)1; // set it at once, before actual computing } int k, C1 = count1, C2 = count2, C4 = count4; const float* gradPtr = (const float*)(grad.data + grad.step*pt.y) + pt.x*2; const uchar* qanglePtr = qangle.data + qangle.step*pt.y + pt.x*2; CV_Assert( blockHist != 0 ); #ifdef HAVE_IPP ippsZero_32f(blockHist,blockHistogramSize); #else for( k = 0; k < blockHistogramSize; k++ ) blockHist[k] = 0.f; #endif const PixData* _pixData = &pixData[0]; for( k = 0; k < C1; k++ ) { const PixData& pk = _pixData[k]; //一个像素包含:gradOfs,qangleOfs,grandWeight,histOfs[4],histWeight[4]这5个属性 const float* a = gradPtr + pk.gradOfs; float w = pk.gradWeight*pk.histWeights[0]; const uchar* h = qanglePtr + pk.qangleOfs; int h0 = h[0], h1 = h[1]; float* hist = blockHist + pk.histOfs[0]; float t0 = hist[h0] + a[0]*w; float t1 = hist[h1] + a[1]*w; hist[h0] = t0; hist[h1] = t1; } for( ; k < C2; k++ ) { const PixData& pk = _pixData[k]; const float* a = gradPtr + pk.gradOfs; float w, t0, t1, a0 = a[0], a1 = a[1]; const uchar* h = qanglePtr + pk.qangleOfs; int h0 = h[0], h1 = h[1]; float* hist = blockHist + pk.histOfs[0]; w = pk.gradWeight*pk.histWeights[0]; t0 = hist[h0] + a0*w; t1 = hist[h1] + a1*w; hist[h0] = t0; hist[h1] = t1; hist = blockHist + pk.histOfs[1]; w = pk.gradWeight*pk.histWeights[1]; t0 = hist[h0] + a0*w; t1 = hist[h1] + a1*w; hist[h0] = t0; hist[h1] = t1; } for( ; k < C4; k++ ) { const PixData& pk = _pixData[k]; const float* a = gradPtr + pk.gradOfs; float w, t0, t1, a0 = a[0], a1 = a[1]; const uchar* h = qanglePtr + pk.qangleOfs; int h0 = h[0], h1 = h[1]; float* hist = blockHist + pk.histOfs[0]; w = pk.gradWeight*pk.histWeights[0]; t0 = hist[h0] + a0*w; t1 = hist[h1] + a1*w; hist[h0] = t0; hist[h1] = t1; hist = blockHist + pk.histOfs[1]; w = pk.gradWeight*pk.histWeights[1]; t0 = hist[h0] + a0*w; t1 = hist[h1] + a1*w; hist[h0] = t0; hist[h1] = t1; hist = blockHist + pk.histOfs[2]; w = pk.gradWeight*pk.histWeights[2]; t0 = hist[h0] + a0*w; t1 = hist[h1] + a1*w; hist[h0] = t0; hist[h1] = t1; hist = blockHist + pk.histOfs[3]; w = pk.gradWeight*pk.histWeights[3]; t0 = hist[h0] + a0*w; t1 = hist[h1] + a1*w; hist[h0] = t0; hist[h1] = t1; } normalizeBlockHistogram(blockHist); return blockHist; } void HOGCache::normalizeBlockHistogram(float* _hist) const { float* hist = &_hist[0]; #ifdef HAVE_IPP size_t sz = blockHistogramSize; #else size_t i, sz = blockHistogramSize; #endif float sum = 0; #ifdef HAVE_IPP ippsDotProd_32f(hist,hist,sz,&sum); #else for( i = 0; i < sz; i++ ) sum += hist[i]*hist[i]; #endif float scale = 1.f/(std::sqrt(sum)+sz*0.1f), thresh = (float)descriptor->L2HysThreshold; #ifdef HAVE_IPP ippsMulC_32f_I(scale,hist,sz); ippsThreshold_32f_I( hist, sz, thresh, ippCmpGreater ); ippsDotProd_32f(hist,hist,sz,&sum); #else for( i = 0, sum = 0; i < sz; i++ ) { hist[i] = std::min(hist[i]*scale, thresh); sum += hist[i]*hist[i]; } #endif scale = 1.f/(std::sqrt(sum)+1e-3f); #ifdef HAVE_IPP ippsMulC_32f_I(scale,hist,sz); #else for( i = 0; i < sz; i++ ) hist[i] *= scale; #endif } //返回测试图片中水平方向和垂直方向共有多少个检测窗口 Size HOGCache::windowsInImage(Size imageSize, Size winStride) const { return Size((imageSize.width - winSize.width)/winStride.width + 1, (imageSize.height - winSize.height)/winStride.height + 1); } //给定图片的大小,已经检测窗口滑动的大小和测试图片中的检测窗口的索引,得到该索引处检测窗口的 //尺寸,包括坐标信息。 Rect HOGCache::getWindow(Size imageSize, Size winStride, int idx) const { int nwindowsX = (imageSize.width - winSize.width)/winStride.width + 1; int y = idx / nwindowsX; int x = idx - nwindowsX*y; return Rect( x*winStride.width, y*winStride.height, winSize.width, winSize.height ); } //完成对整幅图像中所有扫描窗口,或指定扫描窗口的特征向量的计算。首先初始化HOGCache, //之后遍历所有扫描窗口,在每个扫描窗口内遍历所有block并通过调用getBlock函数进行特征 //向量的快速计算。 void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors, Size winStride, Size padding, const vector<Point>& locations) const { if( winStride == Size() ) winStride = cellSize; Size cacheStride(gcd(winStride.width, blockStride.width), gcd(winStride.height, blockStride.height)); size_t nwindows = locations.size(); padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width); padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height); Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2); HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride); if( !nwindows ) nwindows = cache.windowsInImage(paddedImgSize, winStride).area(); const HOGCache::BlockData* blockData = &cache.blockData[0]; int nblocks = cache.nblocks.area(); int blockHistogramSize = cache.blockHistogramSize; size_t dsize = getDescriptorSize(); descriptors.resize(dsize*nwindows); for( size_t i = 0; i < nwindows; i++ ) { float* descriptor = &descriptors[i*dsize]; Point pt0; if( !locations.empty() ) { pt0 = locations[i]; if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width || pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height ) continue; } else { pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding); CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0); } for( int j = 0; j < nblocks; j++ ) { const HOGCache::BlockData& bj = blockData[j]; Point pt = pt0 + bj.imgOffset; float* dst = descriptor + bj.histOfs; const float* src = cache.getBlock(pt, dst); if( src != dst ) #ifdef HAVE_IPP ippsCopy_32f(src,dst,blockHistogramSize); #else for( int k = 0; k < blockHistogramSize; k++ ) dst[k] = src[k]; #endif } } } //过程与compute()基本类似,不同在于,在计算机扫描窗口特征向量过程中利用svmCec进行 //计算,并最终判定当前扫描窗口内是否有人。 void HOGDescriptor::detect(const Mat& img, vector<Point>& hits, vector<double>& weights, double hitThreshold, Size winStride, Size padding, const vector<Point>& locations) const { hits.clear(); if( svmDetector.empty() ) return; if( winStride == Size() ) winStride = cellSize; Size cacheStride(gcd(winStride.width, blockStride.width), gcd(winStride.height, blockStride.height)); size_t nwindows = locations.size(); padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width); padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height); Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2); //调用detect函数的内部:初始化实例化一个HOGCache(完成单幅图像的梯度幅度图及梯度方向图的计算, //对blockData,pixData进行初始化工作);对指定扫描窗口或遍历所有扫描窗口(取决于locations是否为空) //计算扫描窗口对应特征向量;将计算得到的特征向量与svmVec做乘累加;利用阈值对结果进行判定,如果 //超过阈值,则说明当前窗口内含有行人,将该扫描窗口的相关信息进行保留;继续进行下一个窗口扫描, //直到结束。 HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride); if( !nwindows ) nwindows = cache.windowsInImage(paddedImgSize, winStride).area(); const HOGCache::BlockData* blockData = &cache.blockData[0]; int nblocks = cache.nblocks.area(); int blockHistogramSize = cache.blockHistogramSize; size_t dsize = getDescriptorSize(); double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0; vector<float> blockHist(blockHistogramSize); for( size_t i = 0; i < nwindows; i++ ) { Point pt0; if( !locations.empty() ) { pt0 = locations[i]; if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width || pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height ) continue; } else { pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding); CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0); } double s = rho; const float* svmVec = &svmDetector[0]; #ifdef HAVE_IPP int j; #else int j, k; #endif for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize ) { const HOGCache::BlockData& bj = blockData[j]; Point pt = pt0 + bj.imgOffset; const float* vec = cache.getBlock(pt, &blockHist[0]); #ifdef HAVE_IPP Ipp32f partSum; ippsDotProd_32f(vec,svmVec,blockHistogramSize,&partSum); s += (double)partSum; #else for( k = 0; k <= blockHistogramSize - 4; k += 4 ) s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] + vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3]; for( ; k < blockHistogramSize; k++ ) s += vec[k]*svmVec[k]; #endif } if( s >= hitThreshold ) { hits.push_back(pt0); weights.push_back(s); } } } void HOGDescriptor::detect(const Mat& img, vector<Point>& hits, double hitThreshold, Size winStride, Size padding, const vector<Point>& locations) const { vector<double> weightsV; detect(img, hits, weightsV, hitThreshold, winStride, padding, locations); } class HOGInvoker : public ParallelLoopBody { public: HOGInvoker( const HOGDescriptor* _hog, const Mat& _img, double _hitThreshold, Size _winStride, Size _padding, const double* _levelScale, std::vector<Rect> * _vec, Mutex* _mtx, std::vector<double>* _weights=0, std::vector<double>* _scales=0 ) { hog = _hog; img = _img; hitThreshold = _hitThreshold; winStride = _winStride; padding = _padding; levelScale = _levelScale; vec = _vec; weights = _weights; scales = _scales; mtx = _mtx; } void operator()( const Range& range ) const { int i, i1 = range.start, i2 = range.end; double minScale = i1 > 0 ? levelScale[i1] : i2 > 1 ? levelScale[i1+1] : std::max(img.cols, img.rows); Size maxSz(cvCeil(img.cols/minScale), cvCeil(img.rows/minScale)); Mat smallerImgBuf(maxSz, img.type()); vector<Point> locations; vector<double> hitsWeights; for( i = i1; i < i2; i++ ) { double scale = levelScale[i]; //缩小图像尺寸 ,方便detectMultiScale Size sz(cvRound(img.cols/scale), cvRound(img.rows/scale)); Mat smallerImg(sz, img.type(), smallerImgBuf.data); if( sz == img.size() ) smallerImg = Mat(sz, img.type(), img.data, img.step); else resize(img, smallerImg, sz); hog->detect(smallerImg, locations, hitsWeights, hitThreshold, winStride, padding); Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale)); mtx->lock(); for( size_t j = 0; j < locations.size(); j++ ) { vec->push_back(Rect(cvRound(locations[j].x*scale), cvRound(locations[j].y*scale), scaledWinSize.width, scaledWinSize.height)); if (scales) { scales->push_back(scale); } } mtx->unlock(); if (weights && (!hitsWeights.empty())) { mtx->lock(); for (size_t j = 0; j < locations.size(); j++) { weights->push_back(hitsWeights[j]); } mtx->unlock(); } } } const HOGDescriptor* hog; Mat img; double hitThreshold; Size winStride; Size padding; const double* levelScale; std::vector<Rect>* vec; std::vector<double>* weights; std::vector<double>* scales; Mutex* mtx; }; void HOGDescriptor::detectMultiScale( const Mat& img, vector<Rect>& foundLocations, vector<double>& foundWeights, double hitThreshold, Size winStride, Size padding, double scale0, double finalThreshold, bool useMeanshiftGrouping) const { double scale = 1.; int levels = 0; vector<double> levelScale; for( levels = 0; levels < nlevels; levels++ ) { levelScale.push_back(scale); if( cvRound(img.cols/scale) < winSize.width || cvRound(img.rows/scale) < winSize.height || scale0 <= 1 ) break; scale *= scale0; } levels = std::max(levels, 1); levelScale.resize(levels); std::vector<Rect> allCandidates; std::vector<double> tempScales; std::vector<double> tempWeights; std::vector<double> foundScales; Mutex mtx; parallel_for_(Range(0, (int)levelScale.size()), HOGInvoker(this, img, hitThreshold, winStride, padding, &levelScale[0], &allCandidates, &mtx, &tempWeights, &tempScales)); std::copy(tempScales.begin(), tempScales.end(), back_inserter(foundScales)); foundLocations.clear(); std::copy(allCandidates.begin(), allCandidates.end(), back_inserter(foundLocations)); foundWeights.clear(); std::copy(tempWeights.begin(), tempWeights.end(), back_inserter(foundWeights)); if ( useMeanshiftGrouping ) { groupRectangles_meanshift(foundLocations, foundWeights, foundScales, finalThreshold, winSize); } else { groupRectangles(foundLocations, foundWeights, (int)finalThreshold, 0.2); } } void HOGDescriptor::detectMultiScale(const Mat& img, vector<Rect>& foundLocations, double hitThreshold, Size winStride, Size padding, double scale0, double finalThreshold, bool useMeanshiftGrouping) const { vector<double> foundWeights; detectMultiScale(img, foundLocations, foundWeights, hitThreshold, winStride, padding, scale0, finalThreshold, useMeanshiftGrouping); } typedef RTTIImpl<HOGDescriptor> HOGRTTI; CvType hog_type( CV_TYPE_NAME_HOG_DESCRIPTOR, HOGRTTI::isInstance, HOGRTTI::release, HOGRTTI::read, HOGRTTI::write, HOGRTTI::clone); class HOGConfInvoker : public ParallelLoopBody { public: HOGConfInvoker( const HOGDescriptor* _hog, const Mat& _img, double _hitThreshold, Size _padding, std::vector<DetectionROI>* locs, std::vector<Rect>* _vec, Mutex* _mtx ) { hog = _hog; img = _img; hitThreshold = _hitThreshold; padding = _padding; locations = locs; vec = _vec; mtx = _mtx; } void operator()( const Range& range ) const { int i, i1 = range.start, i2 = range.end; Size maxSz(cvCeil(img.cols/(*locations)[0].scale), cvCeil(img.rows/(*locations)[0].scale)); Mat smallerImgBuf(maxSz, img.type()); vector<Point> dets; for( i = i1; i < i2; i++ ) { double scale = (*locations)[i].scale; Size sz(cvRound(img.cols / scale), cvRound(img.rows / scale)); Mat smallerImg(sz, img.type(), smallerImgBuf.data); if( sz == img.size() ) smallerImg = Mat(sz, img.type(), img.data, img.step); else resize(img, smallerImg, sz); hog->detectROI(smallerImg, (*locations)[i].locations, dets, (*locations)[i].confidences, hitThreshold, Size(), padding); Size scaledWinSize = Size(cvRound(hog->winSize.width*scale), cvRound(hog->winSize.height*scale)); mtx->lock(); for( size_t j = 0; j < dets.size(); j++ ) { vec->push_back(Rect(cvRound(dets[j].x*scale), cvRound(dets[j].y*scale), scaledWinSize.width, scaledWinSize.height)); } mtx->unlock(); } } const HOGDescriptor* hog; Mat img; double hitThreshold; std::vector<DetectionROI>* locations; Size padding; std::vector<Rect>* vec; Mutex* mtx; }; void HOGDescriptor::detectROI(const cv::Mat& img, const vector<cv::Point> &locations, CV_OUT std::vector<cv::Point>& foundLocations, CV_OUT std::vector<double>& confidences, double hitThreshold, cv::Size winStride, cv::Size padding) const { foundLocations.clear(); confidences.clear(); if( svmDetector.empty() ) return; if( locations.empty() ) return; if( winStride == Size() ) winStride = cellSize; Size cacheStride(gcd(winStride.width, blockStride.width), gcd(winStride.height, blockStride.height)); size_t nwindows = locations.size(); padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width); padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height); Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2); // HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride); HOGCache cache(this, img, padding, padding, true, cacheStride); if( !nwindows ) nwindows = cache.windowsInImage(paddedImgSize, winStride).area(); const HOGCache::BlockData* blockData = &cache.blockData[0]; int nblocks = cache.nblocks.area(); int blockHistogramSize = cache.blockHistogramSize; size_t dsize = getDescriptorSize(); double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0; vector<float> blockHist(blockHistogramSize); for( size_t i = 0; i < nwindows; i++ ) { Point pt0; pt0 = locations[i]; if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width || pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height ) { // out of image confidences.push_back(-10.0); continue; } double s = rho; const float* svmVec = &svmDetector[0]; int j, k; for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize ) { const HOGCache::BlockData& bj = blockData[j]; Point pt = pt0 + bj.imgOffset; // need to devide this into 4 parts! const float* vec = cache.getBlock(pt, &blockHist[0]); for( k = 0; k <= blockHistogramSize - 4; k += 4 ) s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] + vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3]; for( ; k < blockHistogramSize; k++ ) s += vec[k]*svmVec[k]; } // cv::waitKey(); confidences.push_back(s); if( s >= hitThreshold ) foundLocations.push_back(pt0); } } void HOGDescriptor::detectMultiScaleROI(const cv::Mat& img, CV_OUT std::vector<cv::Rect>& foundLocations, std::vector<DetectionROI>& locations, double hitThreshold, int groupThreshold) const { std::vector<Rect> allCandidates; Mutex mtx; parallel_for_(Range(0, (int)locations.size()), HOGConfInvoker(this, img, hitThreshold, Size(8, 8), &locations, &allCandidates, &mtx)); foundLocations.resize(allCandidates.size()); std::copy(allCandidates.begin(), allCandidates.end(), foundLocations.begin()); cv::groupRectangles(foundLocations, groupThreshold, 0.2); } void HOGDescriptor::readALTModel(std::string modelfile) { // read model from SVMlight format.. FILE *modelfl; if ((modelfl = fopen(modelfile.c_str(), "rb")) == NULL) { std::string eerr("file not exist"); std::string efile(__FILE__); std::string efunc(__FUNCTION__); throw Exception(CV_StsError, eerr, efile, efunc, __LINE__); } char version_buffer[10]; if (!fread (&version_buffer,sizeof(char),10,modelfl)) { std::string eerr("version?"); std::string efile(__FILE__); std::string efunc(__FUNCTION__); throw Exception(CV_StsError, eerr, efile, efunc, __LINE__); } if(strcmp(version_buffer,"V6.01")) { std::string eerr("version doesnot match"); std::string efile(__FILE__); std::string efunc(__FUNCTION__); throw Exception(CV_StsError, eerr, efile, efunc, __LINE__); } /* read version number */ int version = 0; if (!fread (&version,sizeof(int),1,modelfl)) { throw Exception(); } if (version < 200) { std::string eerr("version doesnot match"); std::string efile(__FILE__); std::string efunc(__FUNCTION__); throw Exception(); } int kernel_type; size_t nread; nread=fread(&(kernel_type),sizeof(int),1,modelfl); {// ignore these int poly_degree; nread=fread(&(poly_degree),sizeof(int),1,modelfl); double rbf_gamma; nread=fread(&(rbf_gamma),sizeof(double), 1, modelfl); double coef_lin; nread=fread(&(coef_lin),sizeof(double),1,modelfl); double coef_const; nread=fread(&(coef_const),sizeof(double),1,modelfl); int l; nread=fread(&l,sizeof(int),1,modelfl); char* custom = new char[l]; nread=fread(custom,sizeof(char),l,modelfl); delete[] custom; } int totwords; nread=fread(&(totwords),sizeof(int),1,modelfl); {// ignore these int totdoc; nread=fread(&(totdoc),sizeof(int),1,modelfl); int sv_num; nread=fread(&(sv_num), sizeof(int),1,modelfl); } double linearbias; nread=fread(&linearbias, sizeof(double), 1, modelfl); std::vector<float> detector; detector.clear(); if(kernel_type == 0) { /* linear kernel */ /* save linear wts also */ double *linearwt = new double[totwords+1]; int length = totwords; nread = fread(linearwt, sizeof(double), totwords + 1, modelfl); if(nread != static_cast<size_t>(length) + 1) { delete [] linearwt; throw Exception(); } for(int i = 0; i < length; i++) detector.push_back((float)linearwt[i]); detector.push_back((float)-linearbias); setSVMDetector(detector); delete [] linearwt; } else { throw Exception(); } fclose(modelfl); } void HOGDescriptor::groupRectangles(vector<cv::Rect>& rectList, vector<double>& weights, int groupThreshold, double eps) const { if( groupThreshold <= 0 || rectList.empty() ) { return; } CV_Assert(rectList.size() == weights.size()); vector<int> labels; int nclasses = partition(rectList, labels, SimilarRects(eps)); vector<cv::Rect_<double> > rrects(nclasses); vector<int> numInClass(nclasses, 0); vector<double> foundWeights(nclasses, DBL_MIN); vector<double> totalFactorsPerClass(nclasses, 1); int i, j, nlabels = (int)labels.size(); for( i = 0; i < nlabels; i++ ) { int cls = labels[i]; rrects[cls].x += rectList[i].x; rrects[cls].y += rectList[i].y; rrects[cls].width += rectList[i].width; rrects[cls].height += rectList[i].height; foundWeights[cls] = max(foundWeights[cls], weights[i]); numInClass[cls]++; } for( i = 0; i < nclasses; i++ ) { // find the average of all ROI in the cluster cv::Rect_<double> r = rrects[i]; double s = 1.0/numInClass[i]; rrects[i] = cv::Rect_<double>(cv::saturate_cast<double>(r.x*s), cv::saturate_cast<double>(r.y*s), cv::saturate_cast<double>(r.width*s), cv::saturate_cast<double>(r.height*s)); } rectList.clear(); weights.clear(); for( i = 0; i < nclasses; i++ ) { cv::Rect r1 = rrects[i]; int n1 = numInClass[i]; double w1 = foundWeights[i]; if( n1 <= groupThreshold ) continue; // filter out small rectangles inside large rectangles for( j = 0; j < nclasses; j++ ) { int n2 = numInClass[j]; if( j == i || n2 <= groupThreshold ) continue; cv::Rect r2 = rrects[j]; int dx = cv::saturate_cast<int>( r2.width * eps ); int dy = cv::saturate_cast<int>( r2.height * eps ); if( r1.x >= r2.x - dx && r1.y >= r2.y - dy && r1.x + r1.width <= r2.x + r2.width + dx && r1.y + r1.height <= r2.y + r2.height + dy && (n2 > std::max(3, n1) || n1 < 3) ) break; } if( j == nclasses ) { rectList.push_back(r1); weights.push_back(w1); } } } } //参考文献: //http://blog.csdn.net/pp5576155/article/details/7029699#1536434-tsina-1-47472-66a1f5d8f89e9ad52626f6f40fdeadaa //http://www.cnblogs.com/tornadomeet/archive/2012/08/15/2640754.html //http://blog.csdn.net/antter0510/article/details/20565045