在人体检测系统中进行HOG计算时一般使用L2-norm,Dalal的文章也验证了对于人体检测系统使用L2-norm的时候效果最好。
1.第一步的作用在于将图像规范化,通过两个方面GAMMA和COLOUR, GAMMA方面的话,其规范化后图像中的参量可以被直接提取出来,方便后面的操作,颜色的规范化则是去除图像中光强值同时保留颜色值,例如去除阴影或者光强变化的像素。
在低FPPW中,均方根的GAMMA压缩能够提高其表现。而LOG则起到了反作用。
2.斜率的计算直接影响识别的表现,不同的斜率计算方法在FPPW的表现上不同,总体而言,较为简单的斜率计算能够获得更好的效果,此外,对于颜色的斜率计算则是对每一个颜色通道进行独立的斜率测量,并且寻找到最标准的一个作为像素的斜率向量。求导不仅能够捕捉人物轮廓信息,也能进一步削弱光强差异。
3.这个模块的主要目的在于通过计算每个像素的权重投票,通过局部空间地区(CELL)累计投票,投票是用来反映某像素的斜率幅度的大小。
4.针对图像中前景和背景之间的信息的不同,运用归一化使得信息得以统一,通过局部空间单元的信息组成向量(最关键部分),空间中的模块是重合的,这样每个单元可以包含多个单元的信息,使得向量能够反映更多的图像信息。这可以大幅度提高图像识别能力。
5.HOG技术在人物识别窗口中共有16个像素,这大大降低了识别的错误率,而这一步就是将像素收集并整理信息。
6.将之前整理的向量送入SVM进行分级,来判断其是否是人物。
Hog.h
#pragma once #include<vector> #include<map> #define PI 3.1416 typedef unsigned char BYTE; #define GradType BYTE #define MagType BYTE #define FeaType double class BlockManager { private: std::map<std::pair<int, int>, double*>cache; int level; public: bool find(int y, int x) { return cache.find(std::pair<int, int>(y, x)) != cache.end(); } double*GetBlockData(int y, int x) { return cache[std::pair<int, int>(y, x)]; } void AddBlock(int y, int x, double*data) { cache.insert(std::pair<std::pair<int, int>, double*>(std::pair<int, int>(y, x), data)); } void SetLevel(const int lev) { level = lev; } void deleteBlock(int y, int x) { delete[]cache[std::pair<int, int>(y, x)]; cache.erase(std::pair<int, int>(y, x)); } void deleteAllBlocks() { std::map<std::pair<int, int>, double*>::iterator it; for (it = cache.begin(); it != cache.end(); it++) { delete[](*it).second; cache.erase(it); it = cache.begin(); if (cache.empty()) return; } } ~BlockManager() { deleteAllBlocks(); } }; class Hog { public: int img_width;//待检测图片的宽度 int img_height;//待检测图片的高度 int window_width;//检测窗口的宽度,64 int window_height;//检测窗口的高度,128 int CellSize;//cell的大小,设为8 int blkcell;//block尺寸是cell的几倍,2*2 int blocksize;//block的大小 int blockSkipStep;//Block在检测窗口中上下移动尺寸为8,与blocksize=16相比, //即overlap=1/2,blockSkipStep减小到4使得overlap增加到3/4后,可使精度增加,但计算量增大 int windowSkipStep;//滑动窗口在检测图片中滑动的尺寸为8 int m_histBin;//180度分几个区间,设为9,即1个cell的梯度直方图化成9个bin int win_fea_dim; int xblkSkipStepNum; int yblkSkipStepNum; private: int max_pyramid_height;//图像金字塔高度 int current_pyramid_height;//当前图像金字塔高度 double ratio;//缩放比例 bool isGaussianWeight;//是否使用高斯权重 BYTE*RGBdata; BYTE*greydata; GradType*grad;//梯度矩阵 MagType*theta;//角度矩阵 std::vector < FeaType* > windowHOGFeature; BlockManager blockmanager; bool GetImgData(); void Gamma(); void RGB2Grey(); double* GetBlkFeature(int offsetX, int offsetY); double GaussianKernel(int x, int y, int cent_x, int cent_y, int Hx, int Hy); void NextPyramid(); public: GradType*get_grad(){ return grad; } MagType*get_mag(){ return theta; } void ComputeGradient(); Hog(const int winW, const int winH, const int CellSize, const int blkcell, const int blockSkipStep, const int windowSkipStep, const int m_histBin, const double rat) :ratio(rat), window_width(winW), window_height(winH), CellSize(CellSize), blkcell(blkcell), blockSkipStep(blockSkipStep), windowSkipStep(windowSkipStep), m_histBin(m_histBin) { blocksize = CellSize*blkcell; //RGBdata = new BYTE[imgw*imgh * 3]; max_pyramid_height = 0; current_pyramid_height = 1; int ww = img_width = 64; int hh = img_height = 128; while (ww >= window_width&&hh >= window_height) { ww = ww / 2; hh = hh / 2; max_pyramid_height++; } xblkSkipStepNum = floor((window_width - blkcell * CellSize) / blockSkipStep + 1); yblkSkipStepNum = floor((window_height - blkcell * CellSize) / blockSkipStep + 1); win_fea_dim = xblkSkipStepNum*yblkSkipStepNum*blkcell*blkcell*m_histBin; _ASSERTE(max_pyramid_height >= 1); }; int getwindow_width(){ return window_width; }; int getwindow_height(){ return window_height; }; void GetWindowFeature(const int offsetY_againstImg, const int offsetX_againstImg); void L2Normalize(double*vec, int length); void set_img_size(const int h, const int w){ img_height = h; img_width = w; } void SingleScaleDetect(); void MultiScaleDetect(); void setgreyData(BYTE*src){ this->greydata = src; } GradType*get_grad_data(){ return grad; } void writeHogFea2File(); std::vector < FeaType* >getwindowHOGFeature(){ return windowHOGFeature; } ~Hog() { if (RGBdata != NULL) delete[]RGBdata; if (greydata != NULL) { delete[]greydata; } for (int i = 0; i < windowHOGFeature.size(); i++) if (windowHOGFeature[i] != NULL) delete[]windowHOGFeature[i]; delete[]grad; delete[]theta; }; };
#include "stdafx.h" #include "Hog.h" #include<cmath> #include <fstream> void Hog::writeHogFea2File() { std::ofstream myfile; myfile.open("example.txt"); myfile << "Writing HOG Feature to File.\n"; _ASSERTE(windowHOGFeature.size() == 105); for (int z = 0; z < 105; ++z) { for (int i = 0; i < 36; i++) myfile << windowHOGFeature[z][i] << std::endl; } myfile.close(); } void Hog::ComputeGradient() { if (grad != NULL) delete[]grad; grad = new GradType[img_height*img_width]; if (theta != NULL) delete[]theta; theta = new MagType[img_height*img_width]; for (int i = 1; i < img_height; i++) for (int j = 1; j < img_width; j++) { double dx = greydata[i*img_width + j + 1] - greydata[i*img_width + j - 1]; double dy = greydata[(i + 1)*img_width + j] - greydata[(i - 1)*img_width + j]; if (fabs(dx) <= 1.0e-6 && fabs(dy) <= 1.0e-6) { grad[i*img_width + j] = 0; } else grad[i*img_width + j] = (sqrt(dx*dx + dy*dy)); double theta = atan2(dy, dx); if (theta < 0) theta = (theta + PI); // normalize to [0, PI], CV_PI if (theta > PI) theta = theta - PI; theta = (theta * 180 / PI); this->theta[i*img_width + j] = theta; std::cout << theta + 0 << std::endl; } // 边界点的梯度取其近邻点的值 int i = 0; for (int j = 0; j < img_width; j++) { grad[i*img_width + j] = grad[(i + 1)*img_width + j]; this->theta[i*img_width + j] = this->theta[(i + 1)*img_width + j]; } i = img_height - 1; for (int j = 0; j < img_width; j++) { grad[i*img_width + j] = grad[(i - 1)*img_width + j]; this->theta[i*img_width + j] = this->theta[(i - 1)*img_width + j]; } int j = 0; for (i = 0; i < img_height; i++) { grad[i*img_width + j] = grad[i*img_width + j + 1]; this->theta[i*img_width + j] = this->theta[i*img_width + j + 1]; } j = img_width - 1; for (i = 0; i < img_height; i++) { grad[i*img_width + j] = grad[i*img_width + j - 1]; this->theta[i*img_width + j] = this->theta[i*img_width + j - 1]; } } void Hog::L2Normalize(double*vec, int length)//归一化 { double sum = 0; for (int i = 0; i < length; i++) sum += vec[i] * vec[i]; sum = (double)1.0 / sqrt(sum + FLT_EPSILON); for (int i = 0; i < length; i++) vec[i] = vec[i] * sum; } double* Hog::GetBlkFeature(int offsetY_againstImg, int offsetX_againstImg) { double *blkHOG = new double[blkcell*blkcell*m_histBin]; int aa = sizeof(char); memset(blkHOG, 0, 36 * sizeof(double)); int center_cell_0_X = CellSize / 2; int center_cell_0_Y = CellSize / 2; /*int center_cell_1_X = CellSize / 2+CellSize; int center_cell_1_Y = CellSize / 2; int center_cell_2_X = CellSize / 2; int center_cell_2_Y = CellSize / 2+CellSize; int center_cell_3_X = CellSize / 2+CellSize; int center_cell_3_Y = CellSize / 2+CellSize;*/ int regionsize = CellSize; for (int cell_no_y = 0; cell_no_y < blkcell; cell_no_y++) { for (int cell_no_x = 0; cell_no_x < blkcell; cell_no_x++) { // cell index in the blk int cell_idx = cell_no_y*blkcell + cell_no_x; // start of a cell int cell_start_y = cell_no_y*CellSize; int cell_start_x = cell_no_x*CellSize; // compute in the cell for (int y = cell_start_y; y < cell_start_y + CellSize; y++) { for (int x = cell_start_x; x<cell_start_x + CellSize; x++) { double theta = this->theta[(offsetY_againstImg + y)*img_width + offsetX_againstImg + x]; double magn = grad[(offsetY_againstImg + y)*img_width + offsetX_againstImg + x]; // 如果幅值为0, 没有梯度则不处理 if (magn >= 0.0) { int theta_idx = (int)(theta / (180.0 / m_histBin)); //double gaussweight = isGaussianWeight == true ? GaussianKernel(x, y, CellSize, CellSize, CellSize, CellSize) : 1; //magn= magn*gaussweight;//用高斯核函数调制 double tt = 1.0 - fabs(double(theta) / (180.0 / double(m_histBin)) - (double(theta_idx) + 0.5)); double fx0 = 1.0 - fabs(double(x - center_cell_0_X)) / double(regionsize); double fy0 = 1.0 - fabs(double(y - center_cell_0_Y)) / double(regionsize); if (y <= CellSize / 2 && x <= CellSize / 2 || y >= CellSize*blkcell - CellSize / 2 && x <= CellSize / 2 || y >= CellSize*blkcell - CellSize / 2 && x >= CellSize*blkcell - CellSize / 2 || x >= CellSize*blkcell - CellSize / 2 && y <= CellSize / 2)//四个角点不做三线性插值 { blkHOG[m_histBin*cell_idx + theta_idx] = blkHOG[m_histBin*cell_idx + theta_idx] + double(magn)*tt; blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt); } if (x>CellSize / 2 && x < CellSize && (y < CellSize / 2 || y> CellSize*blkcell - CellSize / 2)) { blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fx0; blkHOG[m_histBin*(cell_idx + 1) + theta_idx] += double(magn)*tt*(1.0 - fx0); blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0; blkHOG[m_histBin*(cell_idx + 1) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0); } if (x>CellSize && x < CellSize*blkcell - CellSize / 2 && (y < CellSize / 2 || y> CellSize*blkcell - CellSize / 2)) { blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fx0; blkHOG[m_histBin*(cell_idx - 1) + theta_idx] += double(magn)*tt*(1.0 - fx0); blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0; blkHOG[m_histBin*(cell_idx - 1) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0); } if (y>CellSize / 2 && y < CellSize && (x < CellSize / 2 || x> CellSize*blkcell - CellSize / 2)) { blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fy0; blkHOG[m_histBin*(cell_idx + blkcell) + theta_idx] += double(magn)*tt*(1.0 - fy0); blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fy0; blkHOG[m_histBin*(cell_idx + blkcell) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fy0); } if (y>CellSize && y < CellSize*blkcell - CellSize / 2 && (x < CellSize / 2 || x> CellSize*blkcell - CellSize / 2)) { blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fy0; blkHOG[m_histBin*(cell_idx - blkcell) + theta_idx] += double(magn)*tt*(1.0 - fy0); blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fy0; blkHOG[m_histBin*(cell_idx - blkcell) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fy0); } else//做三线性插值,将 4 个cell中的直方图串接起来 { blkHOG[m_histBin * 0 + theta_idx] += double(magn)*tt*fx0*fy0; blkHOG[m_histBin * 0 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0*fy0; blkHOG[m_histBin * 1 + theta_idx] += double(magn)*tt*(1.0 - fx0)*fy0; blkHOG[m_histBin * 1 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0)*fy0; blkHOG[m_histBin * 2 + theta_idx] += double(magn)*tt*fx0*(1.0 - fy0); blkHOG[m_histBin * 2 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0*(1.0 - fy0); blkHOG[m_histBin * 3 + theta_idx] += double(magn)*tt*(1.0 - fx0)*(1.0 - fy0); blkHOG[m_histBin * 3 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0)*(1.0 - fy0); } } } // for(x) } // for(y) } // for(cell_no_x) } // for(cell_no_y) /*for (int i = 0; i < 36; i++) std::cout << blkHOG[i] << std::endl; std::cout << std::endl << std::endl;*/ L2Normalize(blkHOG, blkcell*blkcell*m_histBin);//以一个block为单位进行归一化 /*for (int i = 0; i < 36; i++) std::cout << blkHOG[i] << std::endl;*/ blockmanager.AddBlock(offsetY_againstImg, offsetX_againstImg, blkHOG);//存入cache避免重复计算 return blkHOG; } double Hog::GaussianKernel(int x, int y, int cent_x, int cent_y, int Hx, int Hy)//高斯核函数 { int dx = x - cent_x; int dy = y - cent_y; double temp = 1 - ((double)(dx*dx) / (Hx*Hx) + (double)(dy*dy) / (Hy*Hy)) / 2; if (temp >= 0) { return (double)(4.0 * temp / (2 * PI)); } else { return 0.0f; } } void Hog::GetWindowFeature(const int offsetY_againstImg, const int offsetX_againstImg)//获得window的feature { windowHOGFeature.clear(); //double*imgHOGFeature = new double[blkcell*blkcell*m_histBin*xSkipStepNum*ySkipStepNum]; for (int i = 0; i < yblkSkipStepNum; i++) for (int j = 0; j < xblkSkipStepNum; j++) { double*blkFea; if (blockmanager.find(offsetY_againstImg + i*blockSkipStep, offsetX_againstImg + j*blockSkipStep)) blkFea = blockmanager.GetBlockData(offsetY_againstImg + i*blockSkipStep, offsetX_againstImg + j*blockSkipStep); else blkFea = GetBlkFeature(offsetY_againstImg + i*blockSkipStep, offsetX_againstImg + j*blockSkipStep); /*memcpy(imgHOGFeature + (i*xSkipStepNum + j)*blkcell*blkcell*m_histBin, blkFea, blkcell*blkcell*m_histBin); delete[]blkFea;*/ windowHOGFeature.push_back(blkFea); } } void Hog::RGB2Grey() { if (greydata == NULL) greydata = new BYTE[img_width*img_height]; for (int i = 0; i < img_height; i++) for (int j = 0; j < img_width; j++) { greydata[i*img_width + j] = 0.299*RGBdata[i*img_width * 3 + 3 * j] + 0.587*RGBdata[i*img_width * 3 + 3 * j + 1] + 0.114*RGBdata[i*img_width * 3 + 3 * j + 2]; } delete[]RGBdata; } void Hog::NextPyramid()//双线性插值获得下一层图像 { int new_img_height = img_height / ratio; int new_img_width = img_width / ratio; BYTE*new_greydata = new BYTE[new_img_height*new_img_width]; double fw = ratio;//double(nW) / W1; double fh = ratio;//double(nH) / H1; int y1, y2, x1, x2, x0, y0; double fx1, fx2, fy1, fy2; for (int i = 0; i < new_img_height; i++) { y0 = i*fh; y1 = int(y0); if (y1 == img_height - 1) y2 = y1; else y2 = y1 + 1; fy1 = y1 - y0; fy2 = 1.0f - fy1; for (int j = 0; j < new_img_width; j++) { x0 = j*fw; x1 = int(x0); if (x1 == img_width - 1) x2 = x1; else x2 = x1 + 1; fx1 = y1 - y0; fx2 = 1.0f - fx1; double s1 = fx1*fy1; double s2 = fx2*fy1; double s3 = fx2*fy2; double s4 = fx1*fy2; BYTE c1r, c2r, c3r, c4r; c1r = greydata[y1*img_width + x1]; c2r = greydata[y1*img_width + x2]; c3r = greydata[y2*img_width + x1]; c4r = greydata[y2*img_width + x2]; BYTE r; r = (BYTE)(c1r*s3) + (BYTE)(c2r*s4) + (BYTE)(c3r*s2) + (BYTE)(c4r*s1); new_greydata[i*new_img_width + j] = r; } } delete[]greydata; greydata = new_greydata; img_height = new_img_height; img_width = new_img_width; current_pyramid_height++; blockmanager.deleteAllBlocks(); blockmanager.SetLevel(current_pyramid_height); } /*void Hog::SingleScaleDetect() { int xSkipStepNum = floor((img_width - window_width) / windowSkipStep + 1); int ySkipStepNum = floor((img_height - window_width) / windowSkipStep + 1); //double*imgHOGFeature = new double[blkcell*blkcell*m_histBin*xSkipStepNum*ySkipStepNum]; for (int i = 0; i < ySkipStepNum; i++) for (int j = 0; j < xSkipStepNum; j++) { GetWindowFeature(i*windowSkipStep, j*windowSkipStep); } } void Hog::MultiScaleDetect() { while (current_pyramid_height < max_pyramid_height) { SingleScaleDetect(); NextPyramid(); } }*/