opencv4.3+cuda10.2 GPU完美编译版 SDK附件的说明
1.opencv4.4dnn模块封装yolo的SDK方法
2.darknet的工程实现yolo的调用.
opencv需要4.4的版本,下载源码编译.
勾选:
cuda
dnn相关选项
其他的看个人需求吧
我这里为了部署简单全部采用/MT编译的 不再依赖VS的CRT运行库.
编译时间特别慢,挂机 vs2019下P51(E3-1505CPU-32G) 大概4小时完事了.多多少少还是有些麻烦.多cmake几次你就习惯了
依赖如下图:
看我的文件是有点怪,全网都没有人这么弄过.名字我修改了叫opencv_gpu.so(约931M很大的)
还有就是cuda10.2的依赖库我都改成了so结尾 主要是为了防止和别的版本的opencv库冲突.好多项目依赖opencv版本不一致SDK又是其他方提供 没有源码.所以才这样的 这些SO文件其实就是dll,所以不要见怪! 运行无影响的.
当你生成了opencv的GPU库后可以用opencv里面simple中的GPU例子测测看能不能运行.
首先准备yolo自己训练的模型和cfg (训练过程另篇)
我这里是一个挖掘机检测的cfg是3类(挖机身体,臂,斗) 模型都做了最好优化了(用darknet来练)
# Testing
batch=1
subdivisions=1
# Training
# 每64个样本进行一次参数更新
#batch=32
# 将batch分割为4个子batch(内存不够大)
# 降低对显存的占用情况
#subdivisions=2
# 图片宽和高
# 只设置成32的倍数(考虑precision)
#width=608
#height=608
width=416
height=416
# 输入图像的通道数
channels=3
# 动量
# 梯度下降到最优值的速度,建议配置为0.9
momentum=0.9
# 权重衰减正则项(防止过拟合)
# decay参数越大,对过拟合的抑制能力越强
decay=0.0005
# 通过旋转角度来生成更多训练样本
# 如果angle=10,就是生成新图片时随机旋转-10~10度
angle=0
# 调整饱和度
saturation = 1.5
# 调整曝光量
exposure = 1.5
# 调整色调
hue=.1
#初始学习率
learning_rate=0.001
# 在迭代次数小于burn_in时,其学习率的更新有一种方式
# 大于burn_in时,采用policy的更新方式
burn_in=1000
# 训练达到max_batches后,停止学习
max_batches = 120000
# 调整学习率的policy
# policy:CONSTANT, STEP, EXP, POLY, STEPS, SIG, RANDOM
policy=steps
# 根据batch_num调整学习率
steps=400000,450000
# 学习率变化的比例,累计相乘
scales=.1,.1
[convolutional]
# BN?
batch_normalize=1
# 输出特征图数
filters=16
# 卷积核的尺寸3X3
size=3
# 步长
stride=1
# pad=1,padding为size/2
pad=1
# 激励函数
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=1
[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky
###########
[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky
[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
# 每一个[region/yolo]层前的最后一个卷积层filters数
# 计算公式为filter=num*(classes+5)
# 5:tx,ty,tw,th,to
# 修改filters = 3 * (类别 + 5)
filters=24
activation=linear
[yolo]
mask = 3,4,5
# 修改anchors值
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
# 类别
classes=3
num=6
# 通过抖动增加噪声,抑制过拟合
jitter=.3
ignore_thresh = .7
truth_thresh = 1
random=1
[route]
# 将两层的featuremap做concat
layers = -4
[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky
[upsample]
stride=2
[route]
layers = -1, 8
[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky
[convolutional]
size=1
stride=1
pad=1
filters=24
activation=linear
[yolo]
# 当前属于第几个预选框
mask = 0,1,2
# 修改anchors值
# 若不设置,默认是0.5
anchors = 10,14, 23,27, 37,58, 81,82, 135,169, 344,319
# 类别,不加1
classes=3
# anchor的数量
num=6
# 通过抖动增加噪声,防止过拟合
jitter=.3
# 是否需要计算IOU误差的参数
# 大于thresh,IOU误差不会在cost function
ignore_thresh = .7
truth_thresh = 1
# random=1,启用Multi-Scale Training,随机使用多尺度图片进行训练
# random=0,训练图片大小与输入大小一致
random=1
Darknet.mk编译yolo.cpp
INCLUDES=.\
//nmake /f cv.mk
Darknet :yoloDetection.obj
cl.exe -EHsc opencv.res yoloDetection.obj -o Darknet
del *.obj
yoloDetection.obj:yoloDetection.cpp
cl.exe -EHsc /W0 /I $(INCLUDES) -c yoloDetection.cpp
clean:
del *.obj *.exe
yoloDetection.cpp
#include
#include
#include
#include
#include
using namespace cv;
using namespace dnn;
#pragma comment(lib,"opencv_gpu.lib")
float confThreshold, nmsThreshold;
std::vector<std::string> classes;
void postprocess(Mat& frame, const std::vector<Mat>& out, Net& net);
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame);
void callback(int pos, void* userdata);
int main(int argc, char** argv)
{
// 根据选择的检测模型文件进行配置
confThreshold = 0.5;
nmsThreshold = 0.4;
float scale = 0.00392;
Scalar mean = {
0, 0, 0 };
bool swapRB = true;
int inpWidth = 416;
int inpHeight = 416;
//String modelPath = "../../data/testdata/dnn/yolov3.weights";
//String configPath = "../../data/testdata/dnn/yolov3.cfg";
//String framework = "";
String modelPath = "yolov3.mo";
String configPath = "yolov3.cfg";
String framework = "";
//int backendId = cv::dnn::DNN_BACKEND_OPENCV;
//int targetId = cv::dnn::DNN_TARGET_CPU;
int backendId = cv::dnn::DNN_BACKEND_CUDA;
int targetId = cv::dnn::DNN_TARGET_CUDA;
//String classesFile = "object_detection_classes_yolov3.txt";
String classesFile = "coco.names";
//String classesFile = "../../data/dnn/object_detection_classes_yolov3.txt";
// Open file with classes names.
if (!classesFile.empty())
{
const std::string& file = classesFile;
std::ifstream ifs(file.c_str());
if (!ifs.is_open())
CV_Error(Error::StsError, "File " + file + " not found");
std::string line;
while (std::getline(ifs, line))
classes.push_back(line);
}
// Load a model.
Net net = readNet(modelPath, configPath, framework);
net.setPreferableBackend(backendId);
net.setPreferableTarget(targetId);
std::vector<String> outNames = net.getUnconnectedOutLayersNames();
// Create a window
static const std::string kWinName = "Deep learning object detection in OpenCV";
// Open a video file or an image file or a camera stream.
VideoCapture cap;
//cap.open(0);
cap.open("digger.mp4");
// Process frames.
Mat frame, blob;
while (waitKey(1) < 0)
{
cap >> frame;
if (frame.empty())
{
waitKey();
break;
}
// Create a 4D blob from a frame.
Size inpSize(inpWidth > 0 ? inpWidth : frame.cols,
inpHeight > 0 ? inpHeight : frame.rows);
blobFromImage(frame, blob, scale, inpSize, mean, swapRB, false);
// Run a model.
net.setInput(blob);
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
{
resize(frame, frame, inpSize);
Mat imInfo = (Mat_<float>(1, 3) << inpSize.height, inpSize.width, 1.6f);
net.setInput(imInfo, "im_info");
}
std::vector<Mat> outs;
net.forward(outs, outNames);
postprocess(frame, outs, net);
// Put efficiency information.
std::vector<double> layersTimes;
double freq = getTickFrequency() / 1000;
double t = net.getPerfProfile(layersTimes) / freq;
std::string label = format("Inference time: %.2f ms", t);
putText(frame, label, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
imshow(kWinName, frame);
}
return 0;
}
void postprocess(Mat& frame, const std::vector<Mat>& outs, Net& net)
{
static std::vector<int> outLayers = net.getUnconnectedOutLayers();
static std::string outLayerType = net.getLayer(outLayers[0])->type;
std::vector<int> classIds;
std::vector<float> confidences;
std::vector<Rect> boxes;
if (net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
{
// Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom]
CV_Assert(outs.size() == 1);
float* data = (float*)outs[0].data;
for (size_t i = 0; i < outs[0].total(); i += 7)
{
float confidence = data[i + 2];
if (confidence > confThreshold)
{
int left = (int)data[i + 3];
int top = (int)data[i + 4];
int right = (int)data[i + 5];
int bottom = (int)data[i + 6];
int width = right - left + 1;
int height = bottom - top + 1;
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
boxes.push_back(Rect(left, top, width, height));
confidences.push_back(confidence);
}
}
}
else if (outLayerType == "DetectionOutput")
{
// Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom]
CV_Assert(outs.size() == 1);
float* data = (float*)outs[0].data;
for (size_t i = 0; i < outs[0].total(); i += 7)
{
float confidence = data[i + 2];
if (confidence > confThreshold)
{
int left = (int)(data[i + 3] * frame.cols);
int top = (int)(data[i + 4] * frame.rows);
int right = (int)(data[i + 5] * frame.cols);
int bottom = (int)(data[i + 6] * frame.rows);
int width = right - left + 1;
int height = bottom - top + 1;
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
boxes.push_back(Rect(left, top, width, height));
confidences.push_back(confidence);
}
}
}
else if (outLayerType == "Region")
{
for (size_t i = 0; i < outs.size(); ++i)
{
// Network produces output blob with a shape NxC where N is a number of
// detected objects and C is a number of classes + 4 where the first 4
// numbers are [center_x, center_y, width, height]
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
{
Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > confThreshold)
{
int centerX = (int)(data[0] * frame.cols);
int centerY = (int)(data[1] * frame.rows);
int width = (int)(data[2] * frame.cols);
int height = (int)(data[3] * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(Rect(left, top, width, height));
}
}
}
}
else
CV_Error(Error::StsNotImplemented, "Unknown output layer type: " + outLayerType);
std::vector<int> indices;
NMSBoxes(boxes, confidences, confThreshold, nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
drawPred(classIds[idx], confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame);
}
}
void drawPred(int classId, float conf, int left, int top, int right, int bottom, Mat& frame)
{
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0));
std::string label = format("%.2f", conf);
if (!classes.empty())
{
CV_Assert(classId < (int)classes.size());
label = classes[classId] + ": " + label;
}
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
rectangle(frame, Point(left, top - labelSize.height),
Point(left + labelSize.width, top + baseLine), Scalar::all(255), FILLED);
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.5, Scalar());
}
opencv_dnn调用yolo模块官方的例子只要opencv_world的GPU版生成后可以直接复现 。非常容易. 封装的代码我就不贴出来了,参考yoloDetection.cpp即可.更换cfg和weight文件就是一个很好的识别框架了.
将opencv4.3+cuda10.2 GPU完美编译版 SDK 里面直接更换cfg和weight文件就可以直接跑起来了