申明,本文非笔者原创,原文转载自:http://blog.csdn.net/van_ruin/article/details/9166591
1.方向梯度直方图(Histogramof Oriented Gradient, HOG)特征是一种在计算机视觉和图像处理中用来进行物体检测的特征描述子。它通过计算和统计图像局部区域的梯度方向直方图来构成特征。基本知识可以参考博客:http://blog.csdn.net/zouxy09/article/details/7929348
2.Adaboost的基础知识可以参考书籍:统计学习方法,第八章-提升方法adaboost。
这里利用HOG来训练Adaboost行人检测。在Haar-Adaboost算法中,弱分类器仅对一维分类。但是在Hog特征中,特征是每个block的串联。如果仅对一维分类(一个cell的其中一个方向的权值),就不能有效利用block的归一化效果。所以我们使用logistic弱分类器对每个block进行分类(实验中,每个block包含4个cell,每个cell有9个bin,即36维特征)。
本实验需要注意的地方:
1. adaboost误差率需要计算权重
2. logistic回归需要使用带权重的logistic分类器
3. logistic分类可能与数据分布相反。需要计算两次。(相反的情况下,拟合没有意义,需要将数据反转(1->0,0->1))
发现总结与问题
1. 公理
1.
对于任何数据的二值分类,能够得到大于等于
0.5
的线性分类器。
2.
推论
.
对于任何带权重数据的二值分类,能够得到大于等于
0.5
的线性分类器。
3. 推论?
对于任何带权重数据的
n
值分类,能够得到大于等于
1/n
的线性分类器。
4.
对于与
logistic
函数分布相反的数据,应该如何处理?(本实验的处理方式如前面所述)。
实验结果后的猜想
ß
猜想
1
:
Adaboost
弱分类器所选取的特征仍然要保持一定的颗粒度。像素级的特征是无效的。
实验结果与分析
训练集
: 500/500
;测试集
: 19/22(200个弱分类器)
测试数据较少,但是训练集的高正确率至少证明其能够由弱分类器(错误率普遍在
0.25
左右)提高样本数据集的精度。
17
张图片中,有部分图片较模糊,行人影像较小,可能导致难以分辨。
下面给出代码,希望各位能够指正错误。说明:本代码全部由自己编写,所用函数未调用OpenCV实用库函数,及机器学习库函数(基本数据除外)。
-
-
-
-
- #include <opencv2\opencv.hpp>
- #include <cstring>
- #include <cstdio>
- #include <cmath>
- #include <ctime>
-
- using std::clock_t;
- using std::clock;
- using namespace cv;
-
-
- const int NUM_NEGIMAGE = 1000;
- const int NUM_POSIMAGE = 500;
- const int NUM_IMAGE = 1500;
- const int NUM_TESTIMAGE = 22;
- const int MAX_DIMENSION = 3781;
- const int IMAGE_ROWS = 128;
- const int IMAGE_COLS = 64;
- const int CELLSIZE = 8;
- const int BLOCKSIZE = 16;
- const int MOVELENGTH = 8;
- const int BINSIZE = 9;
- const double PI = 2*acos(0.0);
- const double eps = 1e-8;
-
-
- const int NUM_BLOCK_ROWS = (IMAGE_ROWS-BLOCKSIZE)/MOVELENGTH+1;
- const int NUM_BLOCK_COLS = (IMAGE_COLS-BLOCKSIZE)/MOVELENGTH+1;
- const int NUM_BLOCK_FEATURES = (BLOCKSIZE/CELLSIZE)*(BLOCKSIZE/CELLSIZE)*BINSIZE+1;
-
-
-
- double features[NUM_IMAGE][NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES];
- double type[NUM_IMAGE];
- double y[NUM_IMAGE];
-
-
- const int NUM_WEAKCLASSIFIER = 100;
-
-
- double weight[NUM_IMAGE];
-
-
- double logistic(double theta[], double x[])
- {
- double ans = 0;
- for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++)
- {
- ans += theta[i]*x[i];
- }
- return 1/(1+std::exp(-ans));
- }
-
- struct WeakClassifier
- {
- double _theta[NUM_BLOCK_FEATURES];
- int _index_row;
- int _index_col;
- int _isreverse;
- double _alpha;
- double _error;
- void clear()
- {
- memset(_theta, 0.0, NUM_BLOCK_FEATURES*sizeof(double));
- _alpha = 0.0;
- _error = 1;
- _index_row = -1;
- _index_col = -1;
- _isreverse = true;
- }
-
-
- int cal(double x[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES])
- {
- int ans = logistic(_theta, x[_index_row][_index_col]);
- if(ans > 0.5)
- {
- if(_isreverse)
- return -1;
- else
- return 1;
- }
- else
- {
- if(_isreverse)
- return 1;
- else
- return -1;
- }
- }
-
- void print()
- {
-
- for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++)
- printf("%lf ", _theta[i]);
- printf("\n");
-
-
- printf("%d ",_index_row);
-
-
- printf("%d ",_index_col);
-
-
- printf("%d ",_isreverse);
-
-
- printf("%lf ",_alpha);
-
-
- printf("%lf \n",_error);
- }
- }weakClassifier[NUM_WEAKCLASSIFIER];
-
-
- double arc2angle(double arc)
- {
- return arc/PI*180.0;
- }
-
- double angle2arc(double angle)
- {
- return angle/180.0*PI;
- }
-
- void posfilename(int i, char* filename)
- {
- sprintf(filename, "pos/pos (%d).png", i);
- return;
- }
-
- void negfilename(int i, char* filename)
- {
- sprintf(filename, "neg/neg (%d).png", i);
- return;
- }
-
- void testfilename(int i, char* filename)
- {
- sprintf(filename, "test_pos/test (%d).png", i);
- return ;
- }
-
-
- void normalizeImage(Mat& inputImage)
- {
-
- CV_Assert(inputImage.depth() != sizeof(uchar));
- int channels = inputImage.channels();
- int nRows = inputImage.rows ;
- int nCols = inputImage.cols* channels;
- if (inputImage.isContinuous())
- {
- nCols *= nRows;
- nRows = 1;
- }
- int i,j;
- uchar* p;
- for( i = 0; i < nRows; ++i)
- {
- p = inputImage.ptr<uchar>(i);
- for ( j = 0; j < nCols; ++j)
- {
- p[j] = int(sqrt(p[j]*1.0));
- }
- }
- return;
- }
-
-
- void calGredient(const Mat& inputImage, double xGradient[IMAGE_ROWS][IMAGE_COLS], double yGradient[IMAGE_ROWS][IMAGE_COLS])
- {
- uchar* dataptr = inputImage.data;
- int nrows = inputImage.rows;
- int ncols = inputImage.cols;
-
-
- for(int i = 1 ; i < nrows - 1; i++)
- {
- for(int j = 0 ; j < ncols; j++)
- {
- xGradient[i][j] = inputImage.at<uchar>(i+1,j) - inputImage.at<uchar>(i-1,j);
- }
- }
-
-
- for(int i = 0 ; i < ncols; i++)
- {
- xGradient[0][i] = (inputImage.at<uchar>(1,i) - inputImage.at<uchar>(0,i))*2;
- xGradient[nrows-1][i] = (inputImage.at<uchar>(nrows-1,i) - inputImage.at<uchar>(nrows-2,i))*2;
- }
-
-
- for(int i = 0 ; i < nrows ; i++)
- {
- for(int j = 1 ; j < ncols - 1; j++)
- {
- yGradient[i][j] = inputImage.at<uchar>(i,j+1) - inputImage.at<uchar>(i,j-1);
- }
- }
-
-
- for(int i = 0 ; i < nrows; i++)
- {
- xGradient[i][0] = (inputImage.at<uchar>(i,1) - inputImage.at<uchar>(i,0))*2;
- xGradient[i][ncols-1] = (inputImage.at<uchar>(i,ncols-1) - inputImage.at<uchar>(i,ncols-2))*2;
- }
- }
-
-
- void calHogFeatures(Mat& inputImage, double outputFeature[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES])
- {
- int nrows = inputImage.rows;
- int ncols = inputImage.cols;
- int type = inputImage.type();
-
- if(nrows != IMAGE_ROWS || ncols != IMAGE_COLS)
- abort();
-
-
- double xGradient[IMAGE_ROWS][IMAGE_COLS];
- double yGradient[IMAGE_ROWS][IMAGE_COLS];
- calGredient(inputImage, xGradient, yGradient);
-
-
- double gradient[IMAGE_ROWS][IMAGE_COLS];
- double direction[IMAGE_ROWS][IMAGE_COLS];
-
- for(int i = 0 ; i < nrows; i++)
- {
- for(int j = 0 ; j < ncols; j++)
- {
- double gx = xGradient[i][j];
- double gy = yGradient[i][j];
- gradient[i][j] = sqrt(gx*gx + gy*gy);
- direction[i][j] = arc2angle(atan2(gy, gx));
- }
- }
-
-
- double cellinfo[IMAGE_ROWS/CELLSIZE][IMAGE_COLS/CELLSIZE][BINSIZE];
- memset(cellinfo, 0, sizeof(cellinfo));
-
- for(int i = 0; i < IMAGE_ROWS/CELLSIZE; i++)
- {
- for(int j = 0 ; j < IMAGE_COLS/CELLSIZE; j++)
- {
- double* cell = cellinfo[i][j];
-
-
- for(int ci = 0 ; ci < CELLSIZE; ci++)
- {
- for(int cj = 0; cj < CELLSIZE; cj++)
- {
-
- int px = i*CELLSIZE + ci;
- int py = j*CELLSIZE + cj;
-
- int binindex = int((direction[px][py]+180.0)/(360.0/BINSIZE));
-
- if(fabs(direction[px][py]-180) < eps)
- {
- binindex = BINSIZE-1;
- }
- if(fabs(direction[px][py]+180) < eps)
- {
- binindex = 0;
- }
- if(binindex < 0 || binindex >= BINSIZE)
- {
- printf("Wrong binindex: %d %lf %lf %lf", binindex, xGradient[px][py], yGradient[px][py], direction[px][py]);
- abort();
- }
-
- cell[binindex] += gradient[px][py];
- }
- }
- }
- }
-
-
-
- if(MOVELENGTH%CELLSIZE != 0)
- {
- printf("MOVELENGTH%CELLSIZE != 0");
- abort();
- }
-
-
- for(int i = 0 ; i < (IMAGE_ROWS-BLOCKSIZE)/MOVELENGTH + 1; i++)
- {
- for(int j = 0 ; j < (IMAGE_COLS-BLOCKSIZE)/MOVELENGTH + 1; j++)
- {
- int bfindex = 0; outputFeature[i][j][bfindex++] = 1;
-
-
- for(int c1 = 0; c1 < BLOCKSIZE/CELLSIZE; c1++)
- {
- for(int c2 = 0 ; c2 < BLOCKSIZE/CELLSIZE; c2++)
- {
-
- int cx = i*MOVELENGTH/CELLSIZE+c1;
- int cy = j*MOVELENGTH/CELLSIZE+c2;
-
- for(int binindex = 0 ; binindex < BINSIZE; binindex++)
- {
- outputFeature[i][j][bfindex++] = cellinfo[cx][cy][binindex];
- }
- }
- }
- }
- }
- return;
- }
-
-
- void trainLogisticRegression(int block_row,int block_col, double theta[], double& errorrate, int& isreverse)
- {
- double theta1[NUM_BLOCK_FEATURES], theta2[NUM_BLOCK_FEATURES];
- memset(theta1, 0, NUM_BLOCK_FEATURES*sizeof(double));
- memset(theta2, 0, NUM_BLOCK_FEATURES*sizeof(double));
- double errorrate1 = 0;
- double errorrate2 = 0;
- double rightnum1 = 0;
- double rightnum2 = 0;
- isreverse = 0;
-
-
- for(int k = 0 ; k < 100000; k++)
- {
- int i = rand()%NUM_IMAGE;
- int j = rand()%NUM_BLOCK_FEATURES;
- theta1[j] = theta1[j] + weight[i]*0.01*(type[i] - logistic(theta1, features[i][block_row][block_col]))*features[i][block_row][block_col][j];
- }
-
- for(int i = 0 ; i < NUM_IMAGE; i++)
- {
- double tmp = logistic(theta1, features[i][block_row][block_col]);
- if(tmp > 0.5 && fabs(type[i] - 1) < eps)
- rightnum1 += 1.0*weight[i];
- if(tmp < 0.5 && fabs(type[i] - 0) < eps)
- rightnum1 += 1.0*weight[i];
- }
- errorrate1 = 1 - rightnum1;
-
-
- for(int k = 0 ; k < 100000; k++)
- {
- int i = rand()%NUM_IMAGE;
- int j = rand()%NUM_BLOCK_FEATURES;
- theta2[j] = theta2[j] + weight[i]*0.01*(1- type[i] - logistic(theta2, features[i][block_row][block_col]))*features[i][block_row][block_col][j];
- }
-
- for(int i = 0 ; i < NUM_IMAGE; i++)
- {
- double tmp = logistic(theta2, features[i][block_row][block_col]);
- if(tmp > 0.5 && fabs(type[i] - 0) < eps)
- rightnum2 += 1.0*weight[i];
- if(tmp < 0.5 && fabs(type[i] - 1) < eps)
- rightnum2 += 1.0*weight[i];
- }
- errorrate2 = 1 - rightnum2;
-
- if(errorrate1 < errorrate2)
- {
- for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++)
- {
- theta[i] = theta1[i];
- }
- isreverse = 0;
- errorrate = errorrate1 + eps;
- }
- else
- {
- for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++)
- {
- theta[i] = theta2[i];
- }
- isreverse = 1;
- errorrate = errorrate2 + eps;
- }
- return;
- }
-
- WeakClassifier trainClassifier()
- {
- WeakClassifier ansclassifier;
- double theta[NUM_BLOCK_FEATURES];
- double errorrate = 1;
- int isreverse = 0;
- double best_theta[NUM_BLOCK_FEATURES];
- double best_errorrate = 1;
- int best_row = -1;
- int best_col = -1;
- int best_isreverse = 0;
-
-
- for(int i = 0 ; i < NUM_BLOCK_ROWS; i++)
- {
- for(int j = 0 ; j < NUM_BLOCK_COLS; j++)
- {
- trainLogisticRegression(i,j,theta,errorrate, isreverse);
-
- if(errorrate < 0)
- {
- printf("Wrong errorrate < 0 : %lf", errorrate);
- abort();
- }
-
- if(errorrate < best_errorrate)
- {
- for(int tempi = 0 ; tempi < NUM_BLOCK_FEATURES; tempi++)
- {
- best_theta[tempi] = theta[tempi];
- }
- best_errorrate = errorrate;
- best_row = i;
- best_col = j;
- best_isreverse = isreverse;
- }
- }
- }
-
- if(best_errorrate > 0.5)
- {
- printf("The best_errorrate is greater than 0.5.\n");
- abort();
- }
-
-
- ansclassifier._alpha = 1.0/2*std::log((1-best_errorrate)/best_errorrate);
- ansclassifier._error = best_errorrate;
- ansclassifier._index_col = best_col;
- ansclassifier._index_row = best_row;
- ansclassifier._isreverse = best_isreverse;
- for(int i = 0 ; i < NUM_BLOCK_FEATURES; i++) ansclassifier._theta[i] = best_theta[i];
-
- return ansclassifier;
- }
-
- int calByStrongClassifier(double x[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES])
- {
- double ans = 0;
- for(int i = 0 ; i < NUM_WEAKCLASSIFIER; i++)
- {
- ans += weakClassifier[i]._alpha * weakClassifier[i].cal(x);
- }
- if(ans > 0)
- return 1;
- else
- return -1;
- }
-
-
-
-
-
-
-
-
-
-
-
-
-
- int main()
- {
- char filename[100];
- IplImage* inputImage = NULL;
- clock_t timecount = clock();
-
-
- for(int i = 0 ; i < NUM_POSIMAGE; i++)
- {
- posfilename(i+1 ,filename);
-
-
- inputImage = cvLoadImage(filename, 0);
-
-
-
- Mat inputMat(inputImage);
- calHogFeatures(inputMat, features[i]);
- type[i] = 1;
- y[i] = 1;
-
-
-
- inputMat.release();
- cvReleaseImage(&inputImage);
- inputImage = NULL;
- }
-
- printf("The feature process of pos-image have done in %d second.\n", (clock()-timecount)/1000);
- timecount = clock();
-
-
- for(int i = 0; i < NUM_NEGIMAGE; i++)
- {
- negfilename(i+1, filename);
-
-
- inputImage = cvLoadImage(filename, 0);
- type[NUM_POSIMAGE+i] = 0;
- y[NUM_POSIMAGE+i] = -1;
-
- Mat inputMat(inputImage);
- calHogFeatures(inputMat, features[NUM_POSIMAGE+i]);
-
-
- inputMat.release();
- cvReleaseImage(&inputImage);
- inputImage = NULL;
- }
-
- printf("The feature process of neg-image have done in %d second.\n", (clock()-timecount)/1000);
- timecount = clock();
-
-
- for(int i = 0 ; i < NUM_IMAGE; i++)
- {
- weight[i] = 1.0/NUM_IMAGE;
- }
-
-
- freopen("HOG_CLASSIFIER.txt", "w", stdout);
-
-
- printf("%d\n", NUM_WEAKCLASSIFIER);
-
-
- for(int classifierindex = 0 ; classifierindex < NUM_WEAKCLASSIFIER; classifierindex++)
- {
- weakClassifier[classifierindex] = trainClassifier();
-
- double error = weakClassifier[classifierindex]._error;
- double alpha = weakClassifier[classifierindex]._alpha;
-
-
-
-
- double identitysum = 0;
- for(int sampleindex = 0 ; sampleindex < NUM_IMAGE; sampleindex++)
- {
- weight[sampleindex] *= std::exp(-alpha*y[sampleindex]*weakClassifier[classifierindex].cal(features[sampleindex]));
- identitysum += weight[sampleindex];
- }
-
-
- for(int sampleindex = 0 ; sampleindex < NUM_IMAGE; sampleindex++)
- {
- weight[sampleindex] /= identitysum;
- }
-
- weakClassifier[classifierindex].print();
- }
-
- freopen("CON", "w", stdout);
- int rightnum = 0;
- for(int testindex = 0 ;testindex < NUM_TESTIMAGE; testindex ++)
- {
-
- testfilename(testindex+1, filename);
- inputImage = cvLoadImage(filename, 0);
-
- double testfeatures[NUM_BLOCK_ROWS][NUM_BLOCK_COLS][NUM_BLOCK_FEATURES];
- memset(testfeatures, 0, sizeof(testfeatures));
-
- Mat inputMat(inputImage);
- calHogFeatures(inputMat, testfeatures);
-
- if(calByStrongClassifier(testfeatures) == 1)
- {
- rightnum++;
-
- }
- else
-
-
- inputMat.release();
- }
- printf("Accuracy: %d\n", rightnum);
- }
//测试数据是网上流行的128*64灰度行人图像数据。