Hog行人检测


Hog行人检测_第1张图片





HOG特征向量归一化

对block块内的HOG特征向量进行归一化。对block块内特征向量的归一化主要是为了使特征向量空间对光照,阴影和边缘变化具有鲁棒性。还有归一化是针对每一个block进行的,一般采用的归一化函数有以下四种:

Hog行人检测_第2张图片

在人体检测系统中进行HOG计算时一般使用L2-norm,Dalal的文章也验证了对于人体检测系统使用L2-norm的时候效果最好。

关于就算直方图时用到的三线性插值

Hog行人检测_第3张图片
Hog行人检测_第4张图片
Hog行人检测_第5张图片
Hog行人检测_第6张图片
Hog行人检测_第7张图片







Hog行人检测_第8张图片

1.第一步的作用在于将图像规范化,通过两个方面GAMMA和COLOUR, GAMMA方面的话,其规范化后图像中的参量可以被直接提取出来,方便后面的操作,颜色的规范化则是去除图像中光强值同时保留颜色值,例如去除阴影或者光强变化的像素。
在低FPPW中,均方根的GAMMA压缩能够提高其表现。而LOG则起到了反作用。
2.斜率的计算直接影响识别的表现,不同的斜率计算方法在FPPW的表现上不同,总体而言,较为简单的斜率计算能够获得更好的效果,此外,对于颜色的斜率计算则是对每一个颜色通道进行独立的斜率测量,并且寻找到最标准的一个作为像素的斜率向量。求导不仅能够捕捉人物轮廓信息,也能进一步削弱光强差异。
3.这个模块的主要目的在于通过计算每个像素的权重投票,通过局部空间地区(CELL)累计投票,投票是用来反映某像素的斜率幅度的大小。
4.针对图像中前景和背景之间的信息的不同,运用归一化使得信息得以统一,通过局部空间单元的信息组成向量(最关键部分),空间中的模块是重合的,这样每个单元可以包含多个单元的信息,使得向量能够反映更多的图像信息。这可以大幅度提高图像识别能力。
5.HOG技术在人物识别窗口中共有16个像素,这大大降低了识别的错误率,而这一步就是将像素收集并整理信息。
6.将之前整理的向量送入SVM进行分级,来判断其是否是人物。





Hog.h

#pragma once
#include<vector>
#include<map>
#define PI 3.1416
typedef unsigned char BYTE;
#define GradType BYTE
#define MagType BYTE
#define FeaType double
class BlockManager
{
private:
	std::map<std::pair<int, int>, double*>cache;
	int level;

public:
	bool find(int y, int x)
	{
		return cache.find(std::pair<int, int>(y, x)) != cache.end();
	}
	double*GetBlockData(int y, int x)
	{
		return cache[std::pair<int, int>(y, x)];
	}
	void AddBlock(int y, int x, double*data)
	{
		cache.insert(std::pair<std::pair<int, int>, double*>(std::pair<int, int>(y, x), data));
	}
	void SetLevel(const int lev)
	{
		level = lev;
	}
	void deleteBlock(int y, int x)
	{
		delete[]cache[std::pair<int, int>(y, x)];
		cache.erase(std::pair<int, int>(y, x));
	}
	void deleteAllBlocks()
	{
		std::map<std::pair<int, int>, double*>::iterator it;
		for (it = cache.begin(); it != cache.end(); it++)
		{
			delete[](*it).second;
			cache.erase(it);
			it = cache.begin();
			if (cache.empty())
				return;
		}
	}
	~BlockManager()
	{
		deleteAllBlocks();
	}
};
class Hog
{
public:
	int img_width;//待检测图片的宽度
	int img_height;//待检测图片的高度
	int window_width;//检测窗口的宽度,64
	int window_height;//检测窗口的高度,128
	int CellSize;//cell的大小,设为8
	int blkcell;//block尺寸是cell的几倍,2*2
	int blocksize;//block的大小
	int blockSkipStep;//Block在检测窗口中上下移动尺寸为8,与blocksize=16相比,
	//即overlap=1/2,blockSkipStep减小到4使得overlap增加到3/4后,可使精度增加,但计算量增大
	int windowSkipStep;//滑动窗口在检测图片中滑动的尺寸为8
	int m_histBin;//180度分几个区间,设为9,即1个cell的梯度直方图化成9个bin
	int win_fea_dim;
	int xblkSkipStepNum;
	int yblkSkipStepNum;
private:

	int max_pyramid_height;//图像金字塔高度
	int current_pyramid_height;//当前图像金字塔高度
	double ratio;//缩放比例
	bool isGaussianWeight;//是否使用高斯权重
	BYTE*RGBdata;
	BYTE*greydata;
	GradType*grad;//梯度矩阵
	MagType*theta;//角度矩阵
	std::vector < FeaType* > windowHOGFeature;
	BlockManager blockmanager;

	bool GetImgData();

	void Gamma();
	void RGB2Grey();
	double* GetBlkFeature(int offsetX, int offsetY);
	double GaussianKernel(int x, int y, int cent_x, int cent_y, int Hx, int Hy);
	void NextPyramid();
public:
	GradType*get_grad(){ return grad; }
	MagType*get_mag(){ return theta; }
	void ComputeGradient();
	Hog(const int winW, const int winH, const int CellSize,
		const int blkcell, const int blockSkipStep, const int windowSkipStep,
		const int m_histBin, const double rat) :ratio(rat),
		window_width(winW), window_height(winH),
		CellSize(CellSize), blkcell(blkcell), blockSkipStep(blockSkipStep),
		windowSkipStep(windowSkipStep), m_histBin(m_histBin)
	{
		blocksize = CellSize*blkcell;
		//RGBdata = new BYTE[imgw*imgh * 3]; 
		max_pyramid_height = 0;
		current_pyramid_height = 1;
		int ww = img_width = 64;
		int hh = img_height = 128;
		while (ww >= window_width&&hh >= window_height)
		{
			ww = ww / 2;
			hh = hh / 2;
			max_pyramid_height++;
		}
		xblkSkipStepNum = floor((window_width - blkcell * CellSize) / blockSkipStep + 1);
		yblkSkipStepNum = floor((window_height - blkcell * CellSize) / blockSkipStep + 1);
		win_fea_dim = xblkSkipStepNum*yblkSkipStepNum*blkcell*blkcell*m_histBin;
		_ASSERTE(max_pyramid_height >= 1);
	};
	int getwindow_width(){ return window_width; };
	int getwindow_height(){ return window_height; };
	void GetWindowFeature(const int offsetY_againstImg, const int offsetX_againstImg);
	void L2Normalize(double*vec, int length);
	void set_img_size(const int h, const int w){ img_height = h; img_width = w; }
	void SingleScaleDetect();
	void MultiScaleDetect();
	void setgreyData(BYTE*src){ this->greydata = src; }
	GradType*get_grad_data(){ return grad; }
	void writeHogFea2File();
	std::vector < FeaType* >getwindowHOGFeature(){ return windowHOGFeature; }
	~Hog()
	{
		if (RGBdata != NULL)
			delete[]RGBdata;
		if (greydata != NULL)
		{
			delete[]greydata;
		}
		for (int i = 0; i < windowHOGFeature.size(); i++)
			if (windowHOGFeature[i] != NULL)
				delete[]windowHOGFeature[i];
		delete[]grad;
		delete[]theta;
	};
};

Hog.cpp

#include "stdafx.h"
#include "Hog.h"
#include<cmath>
#include <fstream>


void Hog::writeHogFea2File()
{
	std::ofstream myfile;
	myfile.open("example.txt");
	myfile << "Writing HOG Feature to File.\n";
	_ASSERTE(windowHOGFeature.size() == 105);
	for (int z = 0; z < 105; ++z)
	{
		for (int i = 0; i < 36; i++)
			myfile << windowHOGFeature[z][i] << std::endl;
	}
	myfile.close();
}
void Hog::ComputeGradient()
{
	if (grad != NULL)
		delete[]grad;
	grad = new GradType[img_height*img_width];
	if (theta != NULL)
		delete[]theta;
	theta = new MagType[img_height*img_width];
	for (int i = 1; i < img_height; i++)
		for (int j = 1; j < img_width; j++)
		{
			double dx = greydata[i*img_width + j + 1] - greydata[i*img_width + j - 1];
			double dy = greydata[(i + 1)*img_width + j] - greydata[(i - 1)*img_width + j];
			if (fabs(dx) <= 1.0e-6 && fabs(dy) <= 1.0e-6) {
				grad[i*img_width + j] = 0;
			}
			else
				grad[i*img_width + j] = (sqrt(dx*dx + dy*dy));
			double theta = atan2(dy, dx);
			if (theta < 0)
				theta = (theta + PI);   // normalize to [0, PI], CV_PI   
			if (theta > PI)
				theta = theta - PI;
			theta = (theta * 180 / PI);
			this->theta[i*img_width + j] = theta;
			std::cout << theta + 0 << std::endl;
		}
	// 边界点的梯度取其近邻点的值   
	int i = 0;
	for (int j = 0; j < img_width; j++) {
		grad[i*img_width + j] = grad[(i + 1)*img_width + j];
		this->theta[i*img_width + j] = this->theta[(i + 1)*img_width + j];
	}
	i = img_height - 1;
	for (int j = 0; j < img_width; j++) {
		grad[i*img_width + j] = grad[(i - 1)*img_width + j];
		this->theta[i*img_width + j] = this->theta[(i - 1)*img_width + j];
	}
	int j = 0;
	for (i = 0; i < img_height; i++) {
		grad[i*img_width + j] = grad[i*img_width + j + 1];
		this->theta[i*img_width + j] = this->theta[i*img_width + j + 1];
	}
	j = img_width - 1;
	for (i = 0; i < img_height; i++) {
		grad[i*img_width + j] = grad[i*img_width + j - 1];
		this->theta[i*img_width + j] = this->theta[i*img_width + j - 1];
	}

}

void Hog::L2Normalize(double*vec, int length)//归一化
{
	double sum = 0;
	for (int i = 0; i < length; i++)
		sum += vec[i] * vec[i];
	sum = (double)1.0 / sqrt(sum + FLT_EPSILON);
	for (int i = 0; i < length; i++)
		vec[i] = vec[i] * sum;
}

double* Hog::GetBlkFeature(int offsetY_againstImg, int offsetX_againstImg)
{
	double *blkHOG = new double[blkcell*blkcell*m_histBin];
	int aa = sizeof(char);
	memset(blkHOG, 0, 36 * sizeof(double));
	int center_cell_0_X = CellSize / 2;
	int center_cell_0_Y = CellSize / 2;
	/*int center_cell_1_X = CellSize / 2+CellSize;
	int center_cell_1_Y = CellSize / 2;
	int center_cell_2_X = CellSize / 2;
	int center_cell_2_Y = CellSize / 2+CellSize;
	int center_cell_3_X = CellSize / 2+CellSize;
	int center_cell_3_Y = CellSize / 2+CellSize;*/
	int regionsize = CellSize;
	for (int cell_no_y = 0; cell_no_y < blkcell; cell_no_y++) {
		for (int cell_no_x = 0; cell_no_x < blkcell; cell_no_x++) {
			// cell index in the blk  
			int cell_idx = cell_no_y*blkcell + cell_no_x;
			// start of a cell  
			int cell_start_y = cell_no_y*CellSize;
			int cell_start_x = cell_no_x*CellSize;
			// compute in the cell  
			for (int y = cell_start_y; y < cell_start_y + CellSize; y++) {
				for (int x = cell_start_x; x<cell_start_x + CellSize; x++) {
					double theta = this->theta[(offsetY_againstImg + y)*img_width + offsetX_againstImg + x];
					double magn = grad[(offsetY_againstImg + y)*img_width + offsetX_againstImg + x];
					// 如果幅值为0, 没有梯度则不处理  
					if (magn >= 0.0) {
						int theta_idx = (int)(theta / (180.0 / m_histBin));
						//double gaussweight = isGaussianWeight == true ? GaussianKernel(x, y, CellSize, CellSize, CellSize, CellSize) : 1;
						//magn= magn*gaussweight;//用高斯核函数调制

						double tt = 1.0 - fabs(double(theta) / (180.0 / double(m_histBin)) - (double(theta_idx) + 0.5));
						double fx0 = 1.0 - fabs(double(x - center_cell_0_X)) / double(regionsize);
						double fy0 = 1.0 - fabs(double(y - center_cell_0_Y)) / double(regionsize);
						if (y <= CellSize / 2 && x <= CellSize / 2 || y >= CellSize*blkcell - CellSize / 2 && x <= CellSize / 2
							|| y >= CellSize*blkcell - CellSize / 2 && x >= CellSize*blkcell - CellSize / 2
							|| x >= CellSize*blkcell - CellSize / 2 && y <= CellSize / 2)//四个角点不做三线性插值
						{
							blkHOG[m_histBin*cell_idx + theta_idx] = blkHOG[m_histBin*cell_idx + theta_idx] + double(magn)*tt;
							blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt);
						}
						if (x>CellSize / 2 && x < CellSize && (y < CellSize / 2 || y> CellSize*blkcell - CellSize / 2))
						{
							blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fx0;
							blkHOG[m_histBin*(cell_idx + 1) + theta_idx] += double(magn)*tt*(1.0 - fx0);
							blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0;
							blkHOG[m_histBin*(cell_idx + 1) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0);
						}
						if (x>CellSize && x < CellSize*blkcell - CellSize / 2 && (y < CellSize / 2 || y> CellSize*blkcell - CellSize / 2))
						{
							blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fx0;
							blkHOG[m_histBin*(cell_idx - 1) + theta_idx] += double(magn)*tt*(1.0 - fx0);
							blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0;
							blkHOG[m_histBin*(cell_idx - 1) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0);
						}
						if (y>CellSize / 2 && y < CellSize && (x < CellSize / 2 || x> CellSize*blkcell - CellSize / 2))
						{
							blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fy0;
							blkHOG[m_histBin*(cell_idx + blkcell) + theta_idx] += double(magn)*tt*(1.0 - fy0);
							blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fy0;
							blkHOG[m_histBin*(cell_idx + blkcell) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fy0);
						}
						if (y>CellSize && y < CellSize*blkcell - CellSize / 2 && (x < CellSize / 2 || x> CellSize*blkcell - CellSize / 2))
						{
							blkHOG[m_histBin*cell_idx + theta_idx] += double(magn)*tt*fy0;
							blkHOG[m_histBin*(cell_idx - blkcell) + theta_idx] += double(magn)*tt*(1.0 - fy0);
							blkHOG[m_histBin*cell_idx + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fy0;
							blkHOG[m_histBin*(cell_idx - blkcell) + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fy0);
						}
						else//做三线性插值,将 4 个cell中的直方图串接起来
						{
							blkHOG[m_histBin * 0 + theta_idx] += double(magn)*tt*fx0*fy0;
							blkHOG[m_histBin * 0 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0*fy0;
							blkHOG[m_histBin * 1 + theta_idx] += double(magn)*tt*(1.0 - fx0)*fy0;
							blkHOG[m_histBin * 1 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0)*fy0;
							blkHOG[m_histBin * 2 + theta_idx] += double(magn)*tt*fx0*(1.0 - fy0);
							blkHOG[m_histBin * 2 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*fx0*(1.0 - fy0);
							blkHOG[m_histBin * 3 + theta_idx] += double(magn)*tt*(1.0 - fx0)*(1.0 - fy0);
							blkHOG[m_histBin * 3 + (theta_idx + 1) % m_histBin] += double(magn)*(1.0 - tt)*(1.0 - fx0)*(1.0 - fy0);

						}
					}
				} // for(x)  
			}   // for(y)                 
		}   // for(cell_no_x)  
	}   // for(cell_no_y)   
	/*for (int i = 0; i < 36; i++)
		std::cout << blkHOG[i] << std::endl;
		std::cout << std::endl << std::endl;*/
	L2Normalize(blkHOG, blkcell*blkcell*m_histBin);//以一个block为单位进行归一化
	/*for (int i = 0; i < 36; i++)
		std::cout << blkHOG[i] << std::endl;*/
	blockmanager.AddBlock(offsetY_againstImg, offsetX_againstImg, blkHOG);//存入cache避免重复计算
	return blkHOG;
}
double Hog::GaussianKernel(int x, int y, int cent_x, int cent_y, int Hx, int Hy)//高斯核函数
{
	int dx = x - cent_x;
	int dy = y - cent_y;
	double temp = 1 - ((double)(dx*dx) / (Hx*Hx) + (double)(dy*dy) / (Hy*Hy)) / 2;
	if (temp >= 0)
	{
		return (double)(4.0 * temp / (2 * PI));
	}
	else
	{
		return 0.0f;
	}
}

void Hog::GetWindowFeature(const int offsetY_againstImg, const int offsetX_againstImg)//获得window的feature
{
	windowHOGFeature.clear();

	//double*imgHOGFeature = new double[blkcell*blkcell*m_histBin*xSkipStepNum*ySkipStepNum];
	for (int i = 0; i < yblkSkipStepNum; i++)
		for (int j = 0; j < xblkSkipStepNum; j++)
		{
			double*blkFea;
			if (blockmanager.find(offsetY_againstImg + i*blockSkipStep, offsetX_againstImg + j*blockSkipStep))
				blkFea = blockmanager.GetBlockData(offsetY_againstImg + i*blockSkipStep,
				offsetX_againstImg + j*blockSkipStep);
			else
				blkFea = GetBlkFeature(offsetY_againstImg + i*blockSkipStep, offsetX_againstImg + j*blockSkipStep);
			/*memcpy(imgHOGFeature + (i*xSkipStepNum + j)*blkcell*blkcell*m_histBin, blkFea,
			blkcell*blkcell*m_histBin);
			delete[]blkFea;*/
			windowHOGFeature.push_back(blkFea);
		}
}

void Hog::RGB2Grey()
{
	if (greydata == NULL)
		greydata = new BYTE[img_width*img_height];
	for (int i = 0; i < img_height; i++)
		for (int j = 0; j < img_width; j++)
		{
			greydata[i*img_width + j] = 0.299*RGBdata[i*img_width * 3 + 3 * j] +
				0.587*RGBdata[i*img_width * 3 + 3 * j + 1] +
				0.114*RGBdata[i*img_width * 3 + 3 * j + 2];
		}
	delete[]RGBdata;
}

void Hog::NextPyramid()//双线性插值获得下一层图像
{
	int new_img_height = img_height / ratio;
	int new_img_width = img_width / ratio;

	BYTE*new_greydata = new BYTE[new_img_height*new_img_width];
	double fw = ratio;//double(nW) / W1;
	double fh = ratio;//double(nH) / H1;
	int y1, y2, x1, x2, x0, y0;
	double fx1, fx2, fy1, fy2;

	for (int i = 0; i < new_img_height; i++)
	{
		y0 = i*fh;
		y1 = int(y0);
		if (y1 == img_height - 1)    y2 = y1;
		else y2 = y1 + 1;
		fy1 = y1 - y0;
		fy2 = 1.0f - fy1;
		for (int j = 0; j < new_img_width; j++)
		{
			x0 = j*fw;
			x1 = int(x0);
			if (x1 == img_width - 1)    x2 = x1;
			else x2 = x1 + 1;
			fx1 = y1 - y0;
			fx2 = 1.0f - fx1;
			double s1 = fx1*fy1;
			double s2 = fx2*fy1;
			double s3 = fx2*fy2;
			double s4 = fx1*fy2;
			BYTE c1r, c2r, c3r, c4r;
			c1r = greydata[y1*img_width + x1];
			c2r = greydata[y1*img_width + x2];
			c3r = greydata[y2*img_width + x1];
			c4r = greydata[y2*img_width + x2];
			BYTE r;
			r = (BYTE)(c1r*s3) + (BYTE)(c2r*s4) + (BYTE)(c3r*s2) + (BYTE)(c4r*s1);
			new_greydata[i*new_img_width + j] = r;
		}
	}
	delete[]greydata;
	greydata = new_greydata;
	img_height = new_img_height;
	img_width = new_img_width;
	current_pyramid_height++;
	blockmanager.deleteAllBlocks();
	blockmanager.SetLevel(current_pyramid_height);
}
/*void Hog::SingleScaleDetect()
{
int xSkipStepNum = floor((img_width - window_width) / windowSkipStep + 1);
int ySkipStepNum = floor((img_height - window_width) / windowSkipStep + 1);
//double*imgHOGFeature = new double[blkcell*blkcell*m_histBin*xSkipStepNum*ySkipStepNum];
for (int i = 0; i < ySkipStepNum; i++)
for (int j = 0; j < xSkipStepNum; j++)
{
GetWindowFeature(i*windowSkipStep, j*windowSkipStep);
}
}


void Hog::MultiScaleDetect()
{
while (current_pyramid_height < max_pyramid_height)
{
SingleScaleDetect();
NextPyramid();
}
}*/




你可能感兴趣的:(计算机视觉)