很好的Hog 博文 :http://www.cnblogs.com/tornadomeet/archive/2012/08/15/2640754.html
除了HOG,重点是滑动窗口和尺度变换两部分
void HOGDescriptor::detectMultiScale(
const Mat& img, vector& foundLocations,
double hitThreshold, Size winStride, Size padding,
double scale0, int groupThreshold) const
{
double scale = 1.;
foundLocations.clear();
int i, levels = 0;
const int maxLevels = 64;
int t, nthreads = getNumThreads();
vector threadData(nthreads);
for( t = 0; t < nthreads; t++ )
threadData[t].smallerImgBuf.create(img.size(), img.type()); //为什么每个线程数据存放的img都是和图像一样大小呢?
vector levelScale(maxLevels); // 记录尺度缩放比例,这里只是放大窗口,或者说缩小了图像大小
for( levels = 0; levels < maxLevels; levels++ )
{
levelScale[levels] = scale;
if( cvRound(img.cols/scale) < winSize.width || // 等价于 img.cols < winSize.width*scale,这个条件是说,窗口大小大于了图像大小。
cvRound(img.rows/scale) < winSize.height ||
scale0 <= 1 )
break;
scale *= scale0; // scale0 := 1.05 ,输入值,按这个比例进行缩放
}
levels = std::max(levels, 1);
levelScale.resize(levels); // vector容器的一种操作
{// 用一组大括号比较好
#ifdef _OPENMP // 通过宏来控制更好,不仅仅要打开OpenMP,还需要宏的定义
#pragma omp parallel for num_threads(nthreads) schedule(dynamic)
#endif // _OPENMP
for( i = 0; i < levels; i++ )// 若是多线程,可按不同尺度并行计算,否则就是一个尺度一个尺度的计算
{
HOGThreadData& tdata = threadData[getThreadNum()]; // 获取线程号;getNumThreads()为获取总线程数
double scale = levelScale[i];
Size sz(cvRound(img.cols/scale), cvRound(img.rows/scale)); // 图像新的大小
Mat smallerImg(sz, img.type(), tdata.smallerImgBuf.data); // 暂且理解smallerImg与tdata.smallerImgBuf.data 共享同一个地址空间
if( sz == img.size() )
smallerImg = Mat(sz, img.type(), img.data, img.step); // 不用resize
else
resize(img, smallerImg, sz); // resize到指定的大小,这时就把img数据拷贝到了tdata.smallerImgBuf.data
detect(smallerImg, tdata.locations, hitThreshold, winStride, padding);
// 窗口大小映射到相应尺度下的窗口大小
Size scaledWinSize = Size(cvRound(winSize.width*scale), cvRound(winSize.height*scale));
// 记录检测到的矩形区域
for( size_t j = 0; j < tdata.locations.size(); j++ )
tdata.rectangles.push_back(Rect(
cvRound(tdata.locations[j].x*scale), // 左上角坐标恢复到源大小图像的位置
cvRound(tdata.locations[j].y*scale),
scaledWinSize.width, scaledWinSize.height));
}
}
// 将
for( t = 0; t < nthreads; t++ )
{
HOGThreadData& tdata = threadData[t];
std::copy(tdata.rectangles.begin(), tdata.rectangles.end(),//如果要把一个序列(sequence)拷贝到一个容器(container)中去,通常用std::copy算法
std::back_inserter(foundLocations)); // back_inserter是迭代器,将元素插入到容器尾部
}
groupRectangles(foundLocations, groupThreshold, 0.2);// 将重复的矩形框进行合并
}
//计算滑动窗口数量
Size HOGCache::windowsInImage(Size imageSize, Size winStride) const
{
return Size((imageSize.width - winSize.width)/winStride.width + 1,
(imageSize.height - winSize.height)/winStride.height + 1);
}
void HOGDescriptor::detect(const Mat& img,
vector& hits, double hitThreshold,
Size winStride, Size padding , const vector& locations/*默认创建一个空的vector*/) const
{
hits.clear();
if( svmDetector.empty() )
return;
if( winStride == Size() )
winStride = cellSize; // cellSize = 8*8 ,padding = 32*32
Size cacheStride(gcd(winStride.width, blockStride.width),
gcd(winStride.height, blockStride.height));
size_t nwindows = locations.size(); // nwindows = 0
padding.width = (int)alignSize(std::max(padding.width, 0), cacheStride.width); // 不理解。暂且理解为扩充后的padding大小
padding.height = (int)alignSize(std::max(padding.height, 0), cacheStride.height);
Size paddedImgSize(img.cols + padding.width*2, img.rows + padding.height*2); // 扩充后的图像大小
HOGCache cache(this, img, padding, padding, nwindows == 0, cacheStride);
if( !nwindows )
nwindows = cache.windowsInImage(paddedImgSize, winStride).area(); // 这里得到的就是滑动窗口的数量,具体windowsInImage函数;注意nWindows是size类型
const HOGCache::BlockData* blockData = &cache.blockData[0];
int nblocks = cache.nblocks.area();
int blockHistogramSize = cache.blockHistogramSize;
size_t dsize = getDescriptorSize();
double rho = svmDetector.size() > dsize ? svmDetector[dsize] : 0;
vector blockHist(blockHistogramSize);
// 按每个滑动窗口进行计算
for( size_t i = 0; i < nwindows; i++ )
{
Point pt0;
if( !locations.empty() )
{
pt0 = locations[i];
if( pt0.x < -padding.width || pt0.x > img.cols + padding.width - winSize.width ||
pt0.y < -padding.height || pt0.y > img.rows + padding.height - winSize.height )
continue;
}
else
{
pt0 = cache.getWindow(paddedImgSize, winStride, (int)i).tl() - Point(padding);
CV_Assert(pt0.x % cacheStride.width == 0 && pt0.y % cacheStride.height == 0);
}
double s = rho;
const float* svmVec = &svmDetector[0];
int j, k;
for( j = 0; j < nblocks; j++, svmVec += blockHistogramSize )
{
const HOGCache::BlockData& bj = blockData[j];
Point pt = pt0 + bj.imgOffset;
const float* vec = cache.getBlock(pt, &blockHist[0]);
for( k = 0; k <= blockHistogramSize - 4; k += 4 )
s += vec[k]*svmVec[k] + vec[k+1]*svmVec[k+1] +
vec[k+2]*svmVec[k+2] + vec[k+3]*svmVec[k+3];
for( ; k < blockHistogramSize; k++ )
s += vec[k]*svmVec[k];
}
if( s >= hitThreshold )
hits.push_back(pt0);
}
}