具体原理网上例子太多了,这里只提供代码。
先来看一下不用CUDA时的效果:代码如下:
#include
#include
using namespace std;
using namespace cv;
int main()
{
VideoCapture cap("abc.avi");
if (!cap.isOpened())
{
cerr << "can not open camera or video file" << endl;
return -1;
}
while (1) {
Mat src;
cap.read(src);
if (src.empty())
break;
//Define HOG Object
HOGDescriptor hog;
hog.setSVMDetector(HOGDescriptor::getDefaultPeopleDetector());
//Detect the Pedestrians region on the test image
vector regions;
int64 start = getTickCount();
hog.detectMultiScale(src, regions, 0.3, Size(8, 8), Size(32, 32), 1.05, 1);
double fps = getTickFrequency() / (getTickCount() - start);
cout << "FPS : " << fps << endl;
// Display
for (size_t i = 0; i < regions.size(); i++)
rectangle(src, regions[i], Scalar(0, 0, 255), 2);
imshow("hog", src);
if (waitKey(30) == 'q')
break;
}
return 0;
}
opencv 提供了两种训练好的SVM分类器:
1)采用64*128 (像素为单位)的detect window。getDefaultPeopleDetector()
2)采用48*96(像素为单位)的detect window。getDaimlerPeopleDetector()
结果如下:
[ INFO:0] VIDEOIO: Enabled backends(5, sorted by priority): FFMPEG(1000); MSMF(990); DSHOW(980); CV_IMAGES(970); CV_MJPEG(960)
[ INFO:0] Initialize OpenCL runtime...
FPS : 0.680916
FPS : 0.679869
FPS : 0.67003
FPS : 0.680312
FPS : 0.642837
CUDA版本的HOG与常规版本的声明方法不太一样,OpenCV提供了一个静态方法来创建HOG:
static
Ptr
create(
Size win_size = Size(64, 128),
Size block_size = Size(16, 16),
Size block_stride = Size(8, 8),
Size cell_size = Size(8, 8),
int nbins = 9
)
各个参数的意义如下:粗线标注的几个变量的值目前是固定不变的。
win_size | Detection window size. Align to block size and block stride. |
block_size | Block size in pixels. Align to cell size. Only (16,16) is supported for now. |
block_stride | Block stride. It must be a multiple of cell size. |
cell_size | Cell size. Only (8, 8) is supported for now. |
nbins | Number of bins. Only 9 bins per cell are supported for now. |
检测时一些设置:
gpu_hog->setNumLevels(nlevels);
gpu_hog->setHitThreshold(hit_threshold);
gpu_hog->setWinStride(win_stride);
gpu_hog->setScaleFactor(scale);
gpu_hog->setGroupThreshold(gr_threshold);
gpu_hog->detectMultiScale(d_gray, regions);
void setNumLevels(int nlevels) = 0 :Maximum number of detection window increases.
void setHitThreshold(double hit_threshold) = 0 :Threshold for the distance between features and SVM classifying plane. Usually it is 0 and should be specfied in the detector coefficients (as the last free coefficient). But if the free coefficient is omitted (which is allowed), you can specify it manually here.
void setWinStride(Size win_stride) = 0 :Window stride. It must be a multiple of block stride.
void setScaleFactor(double scale0) = 0:Coefficient of the detection window increase.
void setGroupThreshold(int group_threshold) = 0 :Coefficient to regulate the similarity threshold. When detected, some objects can be covered by many rectangles. 0 means not to perform grouping. See groupRectangles.
完整代码如下:
#include
#include
using namespace std;
using namespace cv;
int main()
{
VideoCapture cap("abc.avi");
if (!cap.isOpened())
{
cerr << "can not open camera or video file" << endl;
return -1;
}
while (1) {
Mat src;
cap.read(src);
if (src.empty())
break;
cuda::GpuMat d_image, d_gray;
d_image.upload(src);
cuda::cvtColor(d_image, d_gray, COLOR_BGR2GRAY);
int64 start = getTickCount();
Ptr gpu_hog = cuda::HOG::create(Size(64, 128),
Size(16, 16),
Size(8, 8),
Size(8, 8),
9);
gpu_hog->setSVMDetector(gpu_hog->getDefaultPeopleDetector());
vector regions;
int nlevels = 13;
double hit_threshold = 0.3;
Size win_stride = Size(8, 8);
double scale = 1.05;
int gr_threshold = 1;
gpu_hog->setNumLevels(nlevels);
gpu_hog->setHitThreshold(hit_threshold);
gpu_hog->setWinStride(win_stride);
gpu_hog->setScaleFactor(scale);
gpu_hog->setGroupThreshold(gr_threshold);
gpu_hog->detectMultiScale(d_gray, regions);
double fps = getTickFrequency() / (getTickCount() - start);
cout << "FPS : " << fps << endl;
for (size_t i = 0; i < regions.size(); i++)
{
rectangle(src, regions[i], Scalar(0, 0, 255), 2);
}
imshow("hog", src);
if (waitKey(30) == 'q')
break;
}
return 0;
}
结果如下:
FPS : 28.0373
FPS : 21.7973
FPS : 23.3379
FPS : 23.446
FPS : 24.5155
FPS : 25.3211
FPS : 23.9942
FPS : 24.5319
FPS : 26.037
经过比较,不用CUDA加速时,FPS还不到1,CUDA加速后,FPS达到20多,基本达到了实时的效果。
到C盘下NVSMI文件夹运行nvidia-smi.exe程序,查看GPU使用情况,如下图,使用30%左右。