首先关于HOG算法:
#include "_cvaux.h"
/*****************************************************************************************
struct CV_EXPORTS HOGDescriptor
{
public:
enum { L2Hys=0 };
HOGDescriptor() : winSize(64,128), blockSize(16,16), blockStride(8,8),
cellSize(8,8), nbins(9), derivAperture(1), winSigma(-1),
histogramNormType(L2Hys), L2HysThreshold(0.2), gammaCorrection(true)
{}
HOGDescriptor(Size _winSize, Size _blockSize, Size _blockStride,
Size _cellSize, int _nbins, int _derivAperture=1, double _winSigma=-1,
int _histogramNormType=L2Hys, double _L2HysThreshold=0.2, bool _gammaCorrection=false)
: winSize(_winSize), blockSize(_blockSize), blockStride(_blockStride), cellSize(_cellSize),
nbins(_nbins), derivAperture(_derivAperture), winSigma(_winSigma),
histogramNormType(_histogramNormType), L2HysThreshold(_L2HysThreshold),
gammaCorrection(_gammaCorrection)
{}
HOGDescriptor(const String& filename)
{
load(filename);
}
virtual ~HOGDescriptor() {}
size_t getDescriptorSize() const;
bool checkDetectorSize() const;
double getWinSigma() const;
virtual void setSVMDetector(const vector<float>& _svmdetector);
virtual bool load(const String& filename, const String& objname=String());
virtual void save(const String& filename, const String& objname=String()) const;
virtual void compute(const Mat& img,
vector<float>& descriptors,
Size winStride=Size(), Size padding=Size(),
const vector<Point>& locations=vector<Point>()) const;
virtual void detect(const Mat& img, vector<Point>& foundLocations,
double hitThreshold=0, Size winStride=Size(),
Size padding=Size(),
const vector<Point>& searchLocations=vector<Point>()) const;
virtual void detectMultiScale(const Mat& img, vector<Rect>& foundLocations,
double hitThreshold=0, Size winStride=Size(),
Size padding=Size(), double scale=1.05,
int groupThreshold=2) const;
//Mat& angleOfs,与后文Mat& qangle不一致,怀疑是笔误,由于qangle与angleOfs有不同含义,尽量改过来
virtual void computeGradient(const Mat& img, Mat& grad, Mat& angleOfs,
Size paddingTL=Size(), Size paddingBR=Size()) const;
static vector<float> getDefaultPeopleDetector();
Size winSize
;//窗口大小
Size blockSize
;//Block大小
Size blockStride
;//block每次移动宽度包括水平和垂直两个方向
Size cellSize
;//Cell单元大小
int nbins
;//直方图bin数目
int derivAperture
;//不知道什么用
double winSigma
;//高斯函数的方差
int histogramNormType
;//直方图归一化类型,具体见论文
double L2HysThreshold
;//L2Hys化中限制最大值为0.2
bool gammaCorrection
;//是否Gamma校正
vector<float> svmDetector
;//检测算子
};
**********************************************************************************/
namespace cv
{
size_t HOGDescriptor::getDescriptorSize() const
{
//检测数据的合理性
CV_Assert(blockSize.width % cellSize.width == 0 &&
blockSize.height % cellSize.height == 0);
CV_Assert((winSize.width - blockSize.width) % blockStride.width == 0 &&
(winSize.height - blockSize.height) % blockStride.height == 0 );
//Descriptor的大小
return (size_t)nbins*
(blockSize.width/cellSize.width)*
(blockSize.height/cellSize.height)*
((winSize.width - blockSize.width)/blockStride.width + 1)*
((winSize.height - blockSize.height)/blockStride.height + 1);
//9*(16/8)*(16/8)*((64-16)/8+1)*((128-16)/8+1)=9*2*2*7*15=3780,实际上的检测算子为3781,多的1表示偏置
}
double HOGDescriptor::getWinSigma() const
{
//winSigma默认为-1,然而有下式知,实际上为4;否则自己选择参数
return winSigma >= 0 ? winSigma : (blockSize.width + blockSize.height)/8.;
}
bool HOGDescriptor::checkDetectorSize() const
{
//size_t:unsigned int
size_t detectorSize = svmDetector.size(), descriptorSize = getDescriptorSize();
//三种情况任意一种为true则表达式为true,实际上是最后一种
return detectorSize == 0 ||
detectorSize == descriptorSize ||
detectorSize == descriptorSize + 1;
}
void HOGDescriptor::setSVMDetector(const vector<float>& _svmDetector)
{
svmDetector = _svmDetector;
CV_Assert( checkDetectorSize() );
}
bool HOGDescriptor::load(const String& filename, const String& objname)
{
//XML/YML文件存储
FileStorage fs(filename, FileStorage::READ);
//objname为空,!1=0,选择fs.getFirstTopLevelNode();否则为fs[objname]
//注意到FileStorage中[]重载了:FileNode operator[](const string& nodename)(returns the top-level node by name )
FileNode obj = !objname.empty() ? fs[objname] : fs.getFirstTopLevelNode();
if( !obj.isMap() )
return false;
FileNodeIterator it = obj["winSize"].begin();
it >> winSize.width >> winSize.height;
it = obj["blockSize"].begin();
it >> blockSize.width >> blockSize.height;
it = obj["blockStride"].begin();
it >> blockStride.width >> blockStride.height;
it = obj["cellSize"].begin();
it >> cellSize.width >> cellSize.height;
obj["nbins"] >> nbins;
obj["derivAperture"] >> derivAperture;
obj["winSigma"] >> winSigma;
obj["histogramNormType"] >> histogramNormType;
obj["L2HysThreshold"] >> L2HysThreshold;
obj["gammaCorrection"] >> gammaCorrection;
FileNode vecNode = obj["SVMDetector"];
if( vecNode.isSeq() )
{
vecNode >> svmDetector;
CV_Assert(checkDetectorSize());
}
return true;
}
void HOGDescriptor::save(const String& filename, const String& objName) const
{
FileStorage fs(filename, FileStorage::WRITE);
//空的对象名则取默认名,输出有一定格式,对象名后紧接{
fs << (!objName.empty() ? objName : FileStorage::getDefaultObjectName(filename)) << "{";
//之后依次为:
fs << "winSize" << winSize
<< "blockSize" << blockSize
<< "blockStride" << blockStride
<< "cellSize" << cellSize
<< "nbins" << nbins
<< "derivAperture" << derivAperture
<< "winSigma" << getWinSigma()
<< "histogramNormType" << histogramNormType
<< "L2HysThreshold" << L2HysThreshold
<< "gammaCorrection" << gammaCorrection;
if( !svmDetector.empty() )
fs << "SVMDetector" << "[:" << svmDetector << "]";
//注意还要输出"}"
fs << "}";
}
//img:原始图像
//grad:记录每个像素所属bin对应的权重的矩阵,为幅值乘以权值
//这个权值是关键,也很复杂:包括高斯权重,三次插值的权重,在本函数中先值考虑幅值和相邻bin间的插值权重
//qangle:记录每个像素角度所属的bin序号的矩阵,均为2通道,为了线性插值
//paddingTL:Top和Left扩充像素数
//paddingBR:类似同上
//功能:计算img经扩张后的图像中每个像素的梯度和角度
void HOGDescriptor::computeGradient(const Mat& img, Mat& grad, Mat& qangle,
Size paddingTL, Size paddingBR) const
{
//先判断是否为单通道的灰度或者3通道的图像
CV_Assert( img.type() == CV_8U || img.type() == CV_8UC3 );
//计算gradient的图的大小,由64*128==》112*160,则会产生5*7=35个窗口(windowstride:8)
//每个窗口105个block,105*36=3780维特征向量
//paddingTL.width=16,paddingTL.height=24
Size gradsize(img.cols + paddingTL.width + paddingBR.width,
img.rows + paddingTL.height + paddingBR.height);
//注意grad和qangle是2通道的矩阵,为3D-trilinear插值中的orientation维度,另两维为坐标x与y
grad.create(gradsize, CV_32FC2); // <magnitude*(1-alpha), magnitude*alpha>
qangle.create(gradsize, CV_8UC2); // [0..nbins-1] - quantized gradient orientation
//wholeSize为parent matrix大小,不是扩展后gradsize的大小
//roiofs即为img在parent matrix中的偏置
//对于正样本img=parent matrix;但对于负样本img是从parent img中抽取的10个随机位置
//至于OpenCv具体是怎么操作,使得img和parent img相联系,不是很了解
//wholeSize与roiofs仅在padding时有用,可以不管,就认为传入的img==parent img,是否是从parent img中取出无所谓
Size wholeSize;
Point roiofs;
img.locateROI(wholeSize, roiofs);
int i, x, y;
int cn = img.channels();
//产生1行256列的向量,lut为列向量头地址
Mat_<float> _lut(1, 256);
const float* lut = &_lut(0,0);
//gamma校正,作者的编程思路很有意思
//初看不知道这怎么会与图像的gamma校正有关系,压根img都没出现,看到后面大家会豁然开朗的
if( gammaCorrection )
for( i = 0; i < 256; i++ )
_lut(0,i) = std::sqrt((float)i);
else
for( i = 0; i < 256; i++ )
_lut(0,i) = (float)i;
//开辟空间存xmap和ymap,其中各占gradsize.width+2和gradsize.height+2空间
//+2是为了计算dx,dy时用[-1,0,1]算子,即使在扩充图像中,其边缘计算梯度时还是要再额外加一个像素的
//作者很喜欢直接用内存地址及之间的关系,初看是有点头大的
//另外再说说xmap与ymap的作用:其引入是因为img图像需要扩充到gradsize大小
//如果我们计算img中位于(-5,-6)像素时,需要将基于img的(-5,-6)坐标,映射为基于grad和qangle的坐标(xmap,ymap)
AutoBuffer<int> mapbuf(gradsize.width + gradsize.height + 4);
int* xmap = (int*)mapbuf + 1;
int* ymap = xmap + gradsize.width + 2;
// BORDER_REFLECT_101:(左插值)gfedcb|abcdefgh(原始像素)|gfedcba(右插值),一种插值模式 const int borderType = (int)BORDER_REFLECT_101;
//borderInterpolate函数完成两项操作,一是利用插值扩充img,二是返回x-paddingTL.width+roiofs.x映射后的坐标xmap
//例如,ximg=x(取0)-paddingTL.width(取24)+roiofs.x(取0)=-24 ==>xmap[0]=0
//即img中x=-24,映射到grad中xmap=0,并且存在xmap[0]中,至于borderInterpolate的具体操作可以不必细究
for( x = -1; x < gradsize.width + 1; x++ )
xmap[x] = borderInterpolate(x - paddingTL.width + roiofs.x,
wholeSize.width, borderType);
for( y = -1; y < gradsize.height + 1; y++ )
ymap[y] = borderInterpolate(y - paddingTL.height + roiofs.y,
wholeSize.height, borderType);
// x- & y- derivatives for the whole row
// 由于后面的循环是以行为单位,每次循环内存重复使用,所以只要记录一行的信息而不是整个矩阵
int width = gradsize.width;
AutoBuffer<float> _dbuf(width*4);
float* dbuf = _dbuf;
//注意到内存的连续性方便之后的编程
Mat Dx(1, width, CV_32F, dbuf);
Mat Dy(1, width, CV_32F, dbuf + width);
Mat Mag(1, width, CV_32F, dbuf + width*2);
Mat Angle(1, width, CV_32F, dbuf + width*3);
int _nbins = nbins;
float angleScale = (float)(_nbins/CV_PI);//9/pi
for( y = 0; y < gradsize.height; y++ )
{
//指向每行的第一个元素,img.data为矩阵的第一个元素地址
const uchar* imgPtr = img.data + img.step*ymap[y];
const uchar* prevPtr = img.data + img.step*ymap[y-1];
const uchar* nextPtr = img.data + img.step*ymap[y+1];
float* gradPtr = (float*)grad.ptr(y);
uchar* qanglePtr = (uchar*)qangle.ptr(y);
//1通道
if( cn == 1 )
{
for( x = 0; x < width; x++ )
{
int x1 = xmap[x];
//imgPtr指向img第y行首元素,imgPtr[x]即表示第(x,y)像素,其亮度值位于0~255,对应lut[0]~lut[255]
//即若像素亮度为120,则对应lut[120],若有gamma校正,lut[120]=sqrt(120)
//由于补充了虚拟像素,即在imgPtr[-1]无法表示gradsize中-1位置元素,而需要有个转换
//imgPtr[-1-paddingTL.width+roiofs.x],即imgPtr[xmap[-1]],即gradsize中-1位置元素为img中xmap[-1]位置的元素
dbuf[x] = (float)(lut[imgPtr[xmap[x+1]]] - lut[imgPtr[xmap[x-1]]]);
//由于内存的连续性,隔width,即存Dy
dbuf[width + x] = (float)(lut[nextPtr[x1]] - lut[prevPtr[x1]]);
}
}
else
//3通道,3通道中取最大值
{
for( x = 0; x < width; x++ )
{
int x1 = xmap[x]*3;
const uchar* p2 = imgPtr + xmap[x+1]*3;
const uchar* p0 = imgPtr + xmap[x-1]*3;
float dx0, dy0, dx, dy, mag0, mag;
dx0 = lut[p2[2]] - lut[p0[2]];
dy0 = lut[nextPtr[x1+2]] - lut[prevPtr[x1+2]];
mag0 = dx0*dx0 + dy0*dy0;
dx = lut[p2[1]] - lut[p0[1]];
dy = lut[nextPtr[x1+1]] - lut[prevPtr[x1+1]];
mag = dx*dx + dy*dy;
if( mag0 < mag )
{
dx0 = dx;
dy0 = dy;
mag0 = mag;
}
dx = lut[p2[0]] - lut[p0[0]];
dy = lut[nextPtr[x1]] - lut[prevPtr[x1]];
mag = dx*dx + dy*dy;
if( mag0 < mag )
{
dx0 = dx;
dy0 = dy;
mag0 = mag;
}
dbuf[x] = dx0;
dbuf[x+width] = dy0;
}
}
//函数 cvCartToPolar 计算二维向量(x(I),y(I))的长度,角度:
//magnitude(I) = sqrt(x(I)2 + y(I)2),angle(I) = atan(y(I) / x(I)),注意属于-pi/2~pi/2
cartToPolar( Dx, Dy, Mag, Angle, false );
for( x = 0; x < width; x++ )
{
float mag = dbuf[x+width*2];
float angle = dbuf[x+width*3]*angleScale - 0.5f;//-5<=angle<=4
//判断angle属于哪个bin
int hidx = cvFloor(angle);
angle -= hidx;
//hidx=-5~-1===>4~8
if( hidx < 0 )
hidx += _nbins;
else if( hidx >= _nbins )
hidx -= _nbins;
//检测是否<9
assert( (unsigned)hidx < (unsigned)_nbins );
qanglePtr[x*2] = (uchar)hidx;
hidx++;
//hidx = hidx & 1111 1111 当hidx<nbins,即hidx=hidx
//hidx = hidx & 0000 0000 当hidx>=nbins,即hidx=0
//注意到nbins=9时,hidx最大值只为8
hidx &= hidx < _nbins ? -1 : 0;
//qangle两通道分别存放相邻的两个bin
qanglePtr[x*2+1] = (uchar)hidx;
//幅度,注意此时的0<angle<1,由于hidx = cvFloor(angle),angle -= hidx;
gradPtr[x*2] = mag*(1.f - angle);
gradPtr[x*2+1] = mag*angle;
}
}
}
//HOG存储结构,每个window包含105block,每个block包含36bin
struct HOGCache
{
struct BlockData
{
BlockData() : histOfs(0), imgOffset() {}
//以block为单位,譬如block[0]中的36个bin在内存中位于最前面
//而block[1]中的36个bin存储位置在连续内存中则有一个距离起点的偏置,即为histOfs:hist offset
int histOfs;
//imgOffset表示该block在检测窗口window中的位置
Point imgOffset;
};
//PixData是作者程序中比较晦涩的部分,具体见后面程序分析
//gradOfs:该pixel的grad在Mat grad中的位置,是一个数:(grad.cols*i+j)*2,2表示2通道
//qangleOfs:pixel的angle在Mat qangle中的位置,是一个数:(qangle.cols*i+j)*2,2表示2通道
//histOfs[4]:在后面程序中,作者把一个block中的像素分为四个区域,每个区域的像素最多对四个不同Cell中的hist有贡献
//即一个区域中进行直方图统计,则最多包含四个Cell的不同直方图,histOfs[i]表示每个区域中的第i个直方图
//在整个block直方图存储空间中的距离原始位置的偏置
//显然第一个Cell的hist其对应的histOfs[0]=0,依次类推有:histOfs[1]=9,histOfs[2]=18,histOfs[3]=27
//|_1_|_2_|_3_|_4_|一个block四个cell,这里把每个cell又分四分,1,2,5,6中像素统计属于hist[0],3,4,7,8在hist[1]...
//|_5_|_6_|_7_|_8_|作者将一个block分为了四块区域为:A:1,4,13,16/B:2,3,14,15/C:5,9,8,12/D:6,7,10,11
//|_9_|_10|_11|_12|作者认为A区域中的像素只对其所属的Cell中的hist有贡献,即此区域的像素只会产生一个hist
//|_13|_14|_15|_16|而B区域2,3的像素会对Cell0与Cell1中的hist有贡献,相应的会产生hist[0]与hist[1],14,15类似
//C区域与B区域类似,会对上下两个Cell的hist产生影响,而D区域会对相邻四个Cell的hist产生影响
//histWeights:每个像素对不同cell的hist贡献大小,由像素在block中的位置决定
//个人觉得这是论文中trilinear插值中对于position中x和y两个维度的插值
//其中像素的角度对于相邻两个bin的权重在HOGDescriptor::computerGradient中已有体现,至此trilinear完成
//其实作者认为每个像素对于其他cell的hist的影响,其大小与该像素距各个cell中心的距离决定
//譬如处于中心的像素(8,8)可以认为对每个cell的hist贡献一样,后面程序中权重的分配也可以看出
//gradWeight:为幅值与高斯权重的乘积
//其中高斯权重选择exp^(-(dx^2+dy^2)/(2*sigma^2)),sigma在HOGDescriptor中决定,以block中(8,8)为中心
//区别gradWeight和histWeight,gradWeight认为在同一个Cell中不同元素对hist的贡献是不一样的,由二维高斯分布决定
//而histweight说的是一个元素对不同cell中的hist的贡献不同,其贡献由其坐标距离各个cell的距离决定
struct PixData
{
size_t gradOfs, qangleOfs;
int histOfs[4];
float histWeights[4];
float gradWeight;
};
HOGCache();
HOGCache(const HOGDescriptor* descriptor,
const Mat& img, Size paddingTL, Size paddingBR,
bool useCache, Size cacheStride);
virtual ~HOGCache() {};
virtual void init(const HOGDescriptor* descriptor,
const Mat& img, Size paddingTL, Size paddingBR,
bool useCache, Size cacheStride);
//windowsInImage返回Image中横竖可产生多少个windows
Size windowsInImage(Size imageSize, Size winStride) const;
//依据img大小,窗口移动步伐,即窗口序号得到窗口在img中的位置
Rect getWindow(Size imageSize, Size winStride, int idx) const;
//buf为存储blockdata的内存空间,pt为block在parent img中的位置
const float* getBlock(Point pt, float* buf);
virtual void normalizeBlockHistogram(float* histogram) const;
vector<PixData> pixData;
vector<BlockData> blockData;
//以下的参数是为了充分利用重叠的block信息,避免重叠的block信息重复计算采用的一种缓存思想具体见后面代码
bool useCache;//是否存储已经计算的block信息
vector<int> ymaxCached;//见后文
Size winSize, cacheStride;//cacheStride认为等于blockStride,降低代码的复杂性
Size nblocks, ncells;
int blockHistogramSize;
int count1, count2, count4;
Point imgoffset;//img在扩展后图像中img原点关于扩展后原点偏置
Mat_<float> blockCache
;//待检测图像中以检测窗口进行横向扫描,所扫描的block信息存储在blockCache中
Mat_<uchar> blockCacheFlags;
//判断当前block的信息blockCache中是否有存储,1:存储,于是直接调用;0:未存储,需要把信息存储到blockCache中
Mat grad, qangle;
const HOGDescriptor* descriptor;
};
HOGCache::HOGCache()
{
useCache = false;
blockHistogramSize = count1 = count2 = count4 = 0;
descriptor = 0;
}
HOGCache::HOGCache(const HOGDescriptor* _descriptor,
const Mat& _img, Size _paddingTL, Size _paddingBR,
bool _useCache, Size _cacheStride)
{
init(_descriptor, _img, _paddingTL, _paddingBR, _useCache, _cacheStride);
}
//初始化主要包括:1、block中各像素对block四个bin的贡献权重,以及在存储空间中的位置 记录
//2、block的初始化,以及每个block在存储空间中的偏置及在检测窗口中的位置 记录
//3、其他参数的赋值
//并没有实际计算HOG
void HOGCache::init(const HOGDescriptor* _descriptor,
const Mat& _img, Size _paddingTL, Size _paddingBR,
bool _useCache, Size _cacheStride)
{
descriptor = _descriptor;
cacheStride = _cacheStride;
useCache = _useCache;
descriptor->computeGradient(_img, grad, qangle, _paddingTL, _paddingBR);
imgoffset = _paddingTL;//16,24
winSize = descriptor->winSize;//64*128
Size blockSize = descriptor->blockSize;//16*16
Size blockStride = descriptor->blockStride;//8*8
Size cellSize = descriptor->cellSize;//8*8
Size winSize = descriptor->winSize;//64*128
int i, j, nbins = descriptor->nbins;//9
int rawBlockSize = blockSize.width*blockSize.height;//16*16=256
nblocks = Size((winSize.width - blockSize.width)/blockStride.width + 1,
(winSize.height - blockSize.height)/blockStride.height + 1);//7*15=105
ncells = Size(blockSize.width/cellSize.width, blockSize.height/cellSize.height);//2*2=4
blockHistogramSize = ncells.width*ncells.height*nbins;//9*2*2=36
//对于训练时,该段代码不起作用;对于检测时,该段代码可以提高运行速度。
//在训练时,由于样本大小即等于检测窗口大小,因而不需要额外存储
//但是在检测时由于待检测图像大于检测窗口,因而当检测窗口移动时,检测相邻检测窗口具有大量共同的block信息
//为了节省时间,对于之前计算过大block信息,这里只需要调用,而对于未计算过的block信息,则重新计算并存储
//其具体思路如下:假设待检测图像640*480,检测窗口为144*144
//待检测图像水平方向有79个block,检测窗口垂直方向有17个block
//于是由以下代码知道:blockCache为18*(79*36)=18*2844,blockCacheFlags为17*79,ymxcCached为17
//以左上角代表检测窗口位置,当位于(0,0)时,第一次计算block信息,blockCache中是没有保存任何信息的。
//当位于(0,0)时须计算(也以block左上角代表block位置):
//(0,0)---->(128,0) 信息均存储到blockCache中,分别为blockCache[0][0]--->blockCache[0][17*36],相应blockCacheFlags置1
//(0,128)-->(128,128) blockCache[17][0]-->blockCache[17][17*36]
//当检测窗口移动到(8,0)时,可以发现两个窗口中有大量信息是重复的,于是可以直接调用blockCache中相关block信息
//并把(136,0)-->(136,128)新增列的block信息加到blockCache中,同时跟新blockCacheFlags
//一直到窗口移到(624,0)进入到下一行(0,8),上述过程持续,于是blockCache中前17行存储了待检测图像中前17*79个block信息
//当检测窗口移动到(624,0)时此时blockCache已经存储满了
//当检测窗口移动到(0,8)时,第18行的信息怎么处理呢?
//此时大家要留意的是第1行的block信息已经没有用啦,于是可以将第18行的信息替代第1行的信息。
//当检测窗口不断横向扫描时,最新一行的信息总是会替代最旧一行的信息,如此反复,达到提高运行速度的目的
//另外需要提到一点的是当block在pt=(x,y)=(0,0)-->(624,0)--->(0,128)---->(624.128)
//可以用x/cacheStride=blockStride--->Canche_X,y/blockStride--->Cache_Y
//从而从blockCache中取出对应的blockCache[Cache_Y][Cache_X*36]
//当pt中y>128时,对应的第18行信息存储在第blockCache中的第0行
//于是我们可以用取余的办法,y/blockStride%18--->Cache_Y,而Cache_X的计算不变
//getblock函数中代码正是按该方法进行操作的
if( useCache )
{
//HOGCache的grad,qangle由discriptor->computerGradient得到
//grad.cols=img.cols + paddingTL.width + paddingBR.width
Size cacheSize((grad.cols - blockSize.width)/cacheStride.width+1,
(winSize.height/cacheStride.height)+1);
blockCache.create(cacheSize.height, cacheSize.width*blockHistogramSize);
blockCacheFlags.create(cacheSize);
size_t i, cacheRows = blockCache.rows;
ymaxCached.resize(cacheRows);
for( i = 0; i < cacheRows; i++ )
ymaxCached[i] = -1;
}
Mat_<float> weights(blockSize);
//sigma默认值为4
float sigma = (float)descriptor->getWinSigma();
float scale = 1.f/(sigma*sigma*2);
//权重的二维高斯分布
for(i = 0; i < blockSize.height; i++)
for(j = 0; j < blockSize.width; j++)
{
float di = i - blockSize.height*0.5f;
float dj = j - blockSize.width*0.5f;
weights(i,j) = std::exp(-(di*di + dj*dj)*scale);
}
blockData.resize(nblocks.width*nblocks.height);//105个block
pixData.resize(rawBlockSize*3);//256*3(通道数)
// Initialize 2 lookup tables, pixData & blockData.
// Here is why:
//
// The detection algorithm runs in 4 nested loops (at each pyramid layer):
// loop over the windows within the input image
// loop over the blocks within each window
// loop over the cells within each block
// loop over the pixels in each cell
//
// As each of the loops runs over a 2-dimensional array,
// we could get 8(!) nested loops in total, which is very-very slow.
//
// To speed the things up, we do the following:
// 1. loop over windows is unrolled in the HOGDescriptor::{compute|detect} methods;
// inside we compute the current search window using getWindow() method.
// Yes, it involves some overhead (function call + couple of divisions),
// but it's tiny in fact.
// 2. loop over the blocks is also unrolled. Inside we use pre-computed blockData[j]
// to set up gradient and histogram pointers.
// 3. loops over cells and pixels in each cell are merged
// (since there is no overlap between cells, each pixel in the block is processed once)
// and also unrolled. Inside we use PixData[k] to access the gradient values and
// update the histogram
//作者用查找表的方法来计算。具体实现时是先执行HoGCache的初始化函数Init()
//构造查找表,然后用getWindow()和getBlock()两个函数实现的表的查找
count1 = count2 = count4 = 0;
//blockSize.width=16
for( j = 0; j < blockSize.width; j++ )
for( i = 0; i < blockSize.height; i++ )
{
PixData* data = 0;
//确定cell在block中的位置
float cellX = (j+0.5f)/cellSize.width - 0.5f;
float cellY = (i+0.5f)/cellSize.height - 0.5f;
int icellX0 = cvFloor(cellX);
int icellY0 = cvFloor(cellY);
int icellX1 = icellX0 + 1, icellY1 = icellY0 + 1;
cellX -= icellX0;
cellY -= icellY0;
//注意到unsigned,当icellX0=-1时,(unsigned)icellX0>2
//(0~3,0~3)+(0~3,12~15)+(12~15,0~3)+(12~15,12~15)
//(icellX0,icellY0,icellX1,icellY1)=(-1,-1,0,0),(-1,1,0,2),(1,-1,0,2),(1,1,2,2)===》条件4
//(4~11,4~11)==》(0,0,1,1)==》条件1
//(0~3,4~11)+(12~15,4~11)==》(-1,0,0,1)==》条件3
//(4~11,0~3)+(4~11,12~15)==》(0,-1,1,0)==》条件2
//情况2,3中元素对两个cell中的hist有贡献
//(0~3,4~11):histofs=(0,9,0,0);(12~15,4~11):histofs=(18,27,0,0)
//(4~11,0~3):histofs=(0,18,0,0);(4~11,12~15):hisofs=(9,27,0,0)
//情况1中,元素对4个cell的hist有贡献,则会有4个hist及histofs,并且为(0,9,18,27)
//情况4中,元素属于一个cell,则只有一个hist,对应的只有一个histofs:hist offset
//分别应为:(0,0,0,0),(9,0,0,0),(18,0,0,0),(27,0,0,0)
//对于权重的理解看后面的注释,选择第二种情况,其他可类推
if( (unsigned)icellX0 < (unsigned)ncells.width &&
(unsigned)icellX1 < (unsigned)ncells.width )
{
if( (unsigned)icellY0 < (unsigned)ncells.height &&
(unsigned)icellY1 < (unsigned)ncells.height )
{
data = &pixData[rawBlockSize*2 + (count4++)];
data->histOfs[0] = (icellX0*ncells.height + icellY0)*nbins;
data->histWeights[0] = (1.f - cellX)*(1.f - cellY);
data->histOfs[1] = (icellX1*ncells.height + icellY0)*nbins;
data->histWeights[1] = cellX*(1.f - cellY);
data->histOfs[2] = (icellX0*ncells.height + icellY1)*nbins;
data->histWeights[2] = (1.f - cellX)*cellY;
data->histOfs[3] = (icellX1*ncells.height + icellY1)*nbins;
data->histWeights[3] = cellX*cellY;
}
else
{
data = &pixData[rawBlockSize + (count2++)];
if( (unsigned)icellY0 < (unsigned)ncells.height )
{
icellY1 = icellY0;
cellY = 1.f - cellY;
}
//|_1_|_2_|_3_|_4_|第二中情况是位于(2,3),(14,15)。感性上可以认为(2,3)中的像素对cell0与cell1的贡献中
//|_5_|_6_|_7_|_8_|其中y分量的贡献都是相同的,由于距离各cell的中心距离相同,而x分量的影响是不同的
//|_9_|_10|_11|_12|所以权重的分配为(1-cellx)*celly和cellx*celly
//|_13|_14|_15|_16|
//挑了中简单的情况,情况1中可以类似分析
data->histOfs[0] = (icellX0*ncells.height + icellY1)*nbins;
data->histWeights[0] = (1.f - cellX)*cellY;
data->histOfs[1] = (icellX1*ncells.height + icellY1)*nbins;
data->histWeights[1] = cellX*cellY;
data->histOfs[2] = data->histOfs[3] = 0;
data->histWeights[2] = data->histWeights[3] = 0;
}
}
else
{
if( (unsigned)icellX0 < (unsigned)ncells.width )
{
icellX1 = icellX0;
cellX = 1.f - cellX;
}
if( (unsigned)icellY0 < (unsigned)ncells.height &&
(unsigned)icellY1 < (unsigned)ncells.height )
{
data = &pixData[rawBlockSize + (count2++)];
data->histOfs[0] = (icellX1*ncells.height + icellY0)*nbins;
data->histWeights[0] = cellX*(1.f - cellY);
data->histOfs[1] = (icellX1*ncells.height + icellY1)*nbins;
data->histWeights[1] = cellX*cellY;
data->histOfs[2] = data->histOfs[3] = 0;
data->histWeights[2] = data->histWeights[3] = 0;
}
else
{
data = &pixData[count1++];
if( (unsigned)icellY0 < (unsigned)ncells.height )
{
icellY1 = icellY0;
cellY = 1.f - cellY;
}
data->histOfs[0] = (icellX1*ncells.height + icellY1)*nbins;
data->histWeights[0] = cellX*cellY;
data->histOfs[1] = data->histOfs[2] = data->histOfs[3] = 0;
data->histWeights[1] = data->histWeights[2] = data->histWeights[3] = 0;
}
}
data->gradOfs = (grad.cols*i + j)*2;
data->qangleOfs = (qangle.cols*i + j)*2;
data->gradWeight = weights(i,j);
}
assert( count1 + count2 + count4 == rawBlockSize );//rawBlockSize=105*36=3780
// defragment pixData,重新整理数据使其连贯存储
//由图1表示,内存中存储顺序为:1,4,13,16/2,3,5,8,9,12,14,15/6,7,10,11区域像素的信息
for( j = 0; j < count2; j++ )
pixData[j + count1] = pixData[j + rawBlockSize];
for( j = 0; j < count4; j++ )
pixData[j + count1 + count2] = pixData[j + rawBlockSize*2];
count2 += count1;
count4 += count2;
// 初始化blockData
for( j = 0; j < nblocks.width; j++ )
for( i = 0; i < nblocks.height; i++ )
{
BlockData& data = blockData[j*nblocks.height + i];
//histofs:hist off set,直方图信息在blockData中的偏置
data.histOfs = (j*nblocks.height + i)*blockHistogramSize;
data.imgOffset = Point(j*blockStride.width,i*blockStride.height);
}
}
//buf:存储空间
//pt:block在parent img中的坐标,或偏置(左上角)
//只获取一个block中的信息:将256个像素的grad和angle信息变为36个bin的信息并保存
const float* HOGCache::getBlock(Point pt, float* buf)
{
float* blockHist = buf;
assert(descriptor != 0);
Size blockSize = descriptor->blockSize;
//imgoffset = _paddingTL;16,24,从parent img==>grad img的坐标
pt += imgoffset;
CV_Assert( (unsigned)pt.x <= (unsigned)(grad.cols - blockSize.width) &&
(unsigned)pt.y <= (unsigned)(grad.rows - blockSize.height) );
//相关解释见init函数注释
if( useCache )
{
CV_Assert( pt.x % cacheStride.width == 0 &&
pt.y % cacheStride.height == 0 );
Point cacheIdx(pt.x/cacheStride.width,
(pt.y/cacheStride.height) % blockCache.rows);
if( pt.y != ymaxCached[cacheIdx.y] )
{
Mat_<uchar> cacheRow = blockCacheFlags.row(cacheIdx.y);
cacheRow = (uchar)0;
ymaxCached[cacheIdx.y] = pt.y;
}
blockHist = &blockCache[cacheIdx.y][cacheIdx.x*blockHistogramSize];
uchar& computedFlag = blockCacheFlags(cacheIdx.y, cacheIdx.x);
if( computedFlag != 0 )
return blockHist;
computedFlag = (uchar)1; // set it at once, before actual computing
}
int k, C1 = count1, C2 = count2, C4 = count4;
//pt.x*2由于是2通道,记录block左上角对应在grad.data和qangle.data中的位置
const float* gradPtr = (const float*)(grad.data + grad.step*pt.y) + pt.x*2;
const uchar* qanglePtr = qangle.data + qangle.step*pt.y + pt.x*2;
CV_Assert( blockHist != 0 );
//blockHistogramSize=36
for( k = 0; k < blockHistogramSize; k++ )
blockHist[k] = 0.f;
//pixData包含256个元素,blockData包含105个block
const PixData* _pixData = &pixData[0];
//遍历一个block中所有像素256个,以像素为单位取
//一个像素包含:gradofs,qangleofs,gradweight,histofs[4],histweight[4]
for( k = 0; k < C1; k++ )
{
const PixData& pk = _pixData[k];
const float* a = gradPtr + pk.gradOfs
;//gradPtr起始地址,由不同输入Point pt而变化,pk.gradOfs偏置
float w = pk.gradWeight*pk.histWeights[0];
const uchar* h = qanglePtr + pk.qangleOfs;
int h0 = h[0], h1 = h[1
];//h[0]为angle所在bin的位置0~8,hist[h0]表示第h0个bin其中存储的是相应的幅度与权重
float* hist = blockHist + pk.histOfs[0];
//blockHist为buff的地址,histOfs即为偏置
float t0 = hist[h0] + a[0]*w;
float t1 = hist[h1] + a[1]*w;
hist[h0] = t0; hist[h1] = t1;
}
//两个
for( ; k < C2; k++ )
{
const PixData& pk = _pixData[k];
const float* a = gradPtr + pk.gradOfs;
float w, t0, t1, a0 = a[0], a1 = a[1];
const uchar* h = qanglePtr + pk.qangleOfs;
int h0 = h[0], h1 = h[1];
float* hist = blockHist + pk.histOfs[0];
w = pk.gradWeight*pk.histWeights[0];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
hist = blockHist + pk.histOfs[1];
w = pk.gradWeight*pk.histWeights[1];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
}
//四个
for( ; k < C4; k++ )
{
const PixData& pk = _pixData[k];
const float* a = gradPtr + pk.gradOfs;
float w, t0, t1, a0 = a[0], a1 = a[1];
const uchar* h = qanglePtr + pk.qangleOfs;
int h0 = h[0], h1 = h[1];
float* hist = blockHist + pk.histOfs[0];
w = pk.gradWeight*pk.histWeights[0];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
hist = blockHist + pk.histOfs[1];
w = pk.gradWeight*pk.histWeights[1];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
hist = blockHist + pk.histOfs[2];
w = pk.gradWeight*pk.histWeights[2];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
hist = blockHist + pk.histOfs[3];
w = pk.gradWeight*pk.histWeights[3];
t0 = hist[h0] + a0*w;
t1 = hist[h1] + a1*w;
hist[h0] = t0; hist[h1] = t1;
}
normalizeBlockHistogram(blockHist);
return blockHist;
}
void HOGCache::normalizeBlockHistogram(float* _hist) const
{
float* hist = &_hist[0];
size_t i, sz = blockHistogramSize;
float sum = 0;
for( i = 0; i < sz; i++ )
sum += hist[i]*hist[i];
//为啥+sz*0.1=25.6??难道是实验经验??
float scale = 1.f/(std::sqrt(sum)+sz*0.1f);
float thresh = (float)descriptor->L2HysThreshold;//缺省值0.2
for( i = 0, sum = 0; i < sz; i++ )
{
hist[i] = std::min(hist[i]*scale, thresh);//限制最大值为0.2
sum += hist[i]*hist[i];
}
//在归一化一遍,使得各项平方和为1,即单位化
scale = 1.f/(std::sqrt(sum)+1e-3f);
for( i = 0; i < sz; i++ )
hist[i] *= scale;
}
Size HOGCache::windowsInImage(Size imageSize, Size winStride) const
{
return Size((imageSize.width - winSize.width)/winStride.width + 1,
(imageSize.height - winSize.height)/winStride.height + 1);
}
//依据img大小,窗口移动步伐,即窗口序号得到窗口在img中的位置
Rect HOGCache::getWindow(Size imageSize, Size winStride, int idx) const
{
int nwindowsX = (imageSize.width - winSize.width)/winStride.width + 1;
int y = idx / nwindowsX;//会自动取整
int x = idx - nwindowsX*y;
return Rect( x*winStride.width, y*winStride.height, winSize.width, winSize.height );
}
//img:待检测或计算的图像
//descriptors:Hog描述结构
//winStride:窗口移动步伐
//padding:扩充图像相关尺寸
//locations:对于正样本可以直接取(0,0),负样本为随机产生合理坐标范围内的点坐标
void HOGDescriptor::compute(const Mat& img, vector<float>& descriptors,
Size winStride, Size padding,
const vector<Point>& locations) const
{
//若winStride.width=0,winStride.height=0,取(8,8)
if( winStride == Size() )
winStride = cellSize;
//gcd(a,b)可认为取小的
//默认的winStride=blockStride,暂时忽视
Size cacheStride(gcd(winStride.width, blockStride.width),
gcd(winStride.height, blockStride.height));
//正样本只有一个窗口,如果未扩充
//负样本按论文中所说会随机产生10副图,若未扩充则会有10个窗口
size_t nwindows = locations.size();
//alignSize(size_t sz, int n)
返回n的倍数中不小于sz的最小数,对padding.width进行修正
//由默认参数有cacheStride=blockStride=(8,8),padding.width=24,padding.height=16,所以也不需要修正,可忽视
padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);
padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);
//HOGCache(const HOGDescriptor* _descriptor,const Mat& _img, Size _paddingTL, Size _paddingBR,bool _useCache, Size _cacheStride)
//nwindows==0表示useCache=1
HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);
//当nwidows=0时扩充图像,之后再计算共有多少窗口area()=size.width*size.height,windowsInImage返回的是nwidth和nheight
//在检测时会有用,由于检测时是不知道要计算哪块区域的,所以需要对整副图像需要多少窗口
//训练时由于样本大小均为窗口大小,所以不需要额外存储block信息,则useCache=0,nwindows=1;
//检测时由于待检测图像大于检测窗口大小,所以需要额外存储重复的block信息,则useCache=1,需要重新计算nwindows
//detect函数中的useCache默认值为1,即检测时是需要额外存储block信息的
//compute函数中的useCache默认值为0,detect会调用compute,会改变useCache的值
if( !nwindows )
nwindows = cache.windowsInImage(paddedImgSize, winStride).area();
const HOGCache::BlockData* blockData = &cache.blockData[0];
int nblocks = cache.nblocks.area();
int blockHistogramSize = cache.blockHistogramSize;
size_t dsize = getDescriptorSize();//一个窗口中特征向量大小:2*2*9*15*7=3780
descriptors.resize(dsize*nwindows);//注意到算法中样本大小为64*128,但实际上是有扩充的,实际特征向量还要乘上nwindows
//descriptor存储分nwindows段,每段又分nblocks=105段,每段又有36个bin
for( size_t i = 0; i < nwindows; i++ )
{
float* descriptor = &descriptors[i*dsize];
Point pt0;
//locations.empty()为空返回1
//不为空时
if( !locations.empty() )
{
pt0 = locations[i];
if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )
continue;
}
//为空时:
else
{
pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);
CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
}
for( int j = 0; j < nblocks; j++ )//nblocks=105
{
const HOGCache::BlockData& bj = blockData[j];
//imgOffset = Point(j*blockStride.width,i*blockStride.height),block在window中的位置
//pt0:为img在parent img中的位置,注意到getBlock(pt,dst)中pt就是指的在parent img中的位置
Point pt = pt0 + bj.imgOffset;
//histOfs=(j*nblocks.height + i)*blockHistogramSize,nblocks.height=15
float* dst = descriptor + bj.histOfs;
//dst只是该block的存储空间,pt表示该block在图中的位置,src才是计算后的直方图,将其赋值给dst
const float* src = cache.getBlock(pt, dst);
if( src != dst )
for( int k = 0; k < blockHistogramSize; k++ )//blockHistogramSize=36
dst[k] = src[k];
}
}
}
//hits:检测图像中存在目标的区域的坐标
//hitThreshold:为目标的阈值
//img:不要求为64*128
//处理固定尺度上目标的检测,detectMultiScale中Scale循环,每个循环中调用detect
void HOGDescriptor::detect(const Mat& img,
vector<Point>& hits, double hitThreshold,
Size winStride, Size padding, const vector<Point>& locations) const
{
hits.clear();
if( svmDetector.empty() )
return;
if( winStride == Size() )
winStride = cellSize;
Size cacheStride(gcd(winStride.width, blockStride.width),
gcd(winStride.height, blockStride.height));
size_t nwindows = locations.size();
padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width);
padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2);
HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);
if( !nwindows )
nwindows = cache.windowsInImage(paddedImgSize, winStride).area();
const HOGCache::BlockData* blockData = &cache.blockData[0];
int nblocks = cache.nblocks.area();
int blockHistogramSize = cache.blockHistogramSize;
size_t dsize = getDescriptorSize();
double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0;
vector<float> blockHist(blockHistogramSize);
for( size_t i = 0; i < nwindows; i++ )
{
Point pt0;
if( !locations.empty() )
{
pt0 = locations[i];
if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )
continue;
}
else
{
pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);
CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
}
double s = rho;
const float* svmVec = &svmDetector[0];
int j, k;
for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
{
const HOGCache::BlockData& bj = blockData[j];
Point pt = pt0 + bj.imgOffset;
const float* vec = cache.getBlock(pt, &blockHist[0]);
//分两步,考虑到检测算子中的偏置
for( k = 0; k <= blockHistogramSize - 4; k += 4 )
s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +
vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];
for( ; k < blockHistogramSize; k++ )
s += vec[k]*svmVec[k];
}
if( s >= hitThreshold )
hits.push_back(pt0);
}
}
struct HOGThreadData
{
vector<Rect> rectangles;
vector<Point> locations;
Mat smallerImgBuf;
};
void HOGDescriptor::detectMultiScale(
const Mat& img, vector<Rect>& foundLocations,
double hitThreshold, Size winStride, Size padding,
double scale0, int groupThreshold) const
{
double scale = 1.;
foundLocations.clear();
int i, levels = 0;
const int maxLevels = 64;
//getNumThreads得到线程最大数目
int t, nthreads = getNumThreads();
vector<HOGThreadData> threadData(nthreads);
for( t = 0; t < nthreads; t++ )
threadData[t].smallerImgBuf.create(img.size(), img.type());
vector<double> levelScale(maxLevels);
//计算出最大层数,基本是将图像缩小,即认为样本尺度已经很小了,实际的行人只会大于样本尺寸,小于样本尺寸的行人无法检测
for( levels = 0; levels < maxLevels; levels++ )
{
levelScale[levels] = scale;
if( cvRound(img.cols/scale) < winSize.width ||
cvRound(img.rows/scale) < winSize.height ||
scale0 <= 1 )
break;
scale *= scale0;
}
levels = std::max(levels, 1);
levelScale.resize(levels);
#ifdef _OPENMP
#pragma omp parallel for num_threads(nthreads) schedule(dynamic)
#endif // _OPENMP
//外循环为尺度金字塔循环
for( i = 0; i < levels; i++ )
{
//getThreadNum:得到OpenCV正在用的线程序号
HOGThreadData& tdata = threadData[getThreadNum()];
double scale = levelScale[i];
Size sz(cvRound(img.cols/scale), cvRound(img.rows/scale));
Mat smallerImg(sz, img.type(), tdata.smallerImgBuf.data);
//缩小图像
if( sz == img.size() )
smallerImg = Mat(sz, img.type(), img.data, img.step);
else
resize(img, smallerImg, sz);
//每层的检测
detect(smallerImg, tdata.locations, hitThreshold, winStride, padding);
Size scaledWinSize = Size(cvRound(winSize.width*scale), cvRound(winSize.height*scale));
for( size_t j = 0; j < tdata.locations.size(); j++ )
tdata.rectangles.push_back(Rect(
cvRound(tdata.locations[j].x*scale),
cvRound(tdata.locations[j].y*scale),
scaledWinSize.width, scaledWinSize.height));
}
}
for( t = 0; t < nthreads; t++ )
{
HOGThreadData& tdata = threadData[t];
//将tdata.rectagnles中的数据拷贝到foundLocation中
std::copy(tdata.rectangles.begin(), tdata.rectangles.end(),
std::back_inserter(foundLocations));
}
//从一群找到的矩形区域提取出一个,这里直接调用了函数,我们可以不细究
groupRectangles(foundLocations, groupThreshold, 0.2);
}
vector<float> HOGDescriptor::getDefaultPeopleDetector()
{
static const float detector[] = {0,0};
return vector<float>(detector, detector + sizeof(detector)/sizeof(detector[0]));
}
}
以上为HOG代码的注释与理解~不清楚的欢迎提问~有不对的地方,欢迎指出~