主程序
#include
#include "Yolo.h"
using namespace cv;
void runningYoloV3();
void runningYoloV4();
int main(int argc, char** argv)
{
runningYoloV3();
//runningYoloV4();
return 0;
}
void runningYoloV4() {
String modelPath = "./cfg/yolov4_coco.weights";
String configPath = "./cfg/yolov4_coco.cfg";
String classesFile = "./data/coco.names";
Yolo yolov4 = Yolo(modelPath, configPath, classesFile, true);
yolov4.loadModel();
VideoCapture cap;
cap.open(0);
Mat frame;
while (waitKey(1) < 0) {
cap >> frame;
if (frame.empty()) {
waitKey();
break;
}
double start_time = (double)cv::getTickCount();
std::vector yoloRet;
yolov4.runningYolo(frame, yoloRet);
yolov4.drowBoxes(frame, yoloRet);
double end_time = (double)cv::getTickCount();
double fps = cv::getTickFrequency() / (end_time - start_time);
double spend_time = (end_time - start_time) / cv::getTickFrequency();
std::string FPS = "FPS:" + cv::format("%.2f", fps) + " spend time:" + cv::format("%.2f", spend_time * 1000) + "ms";
putText(frame, FPS, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
imshow("YoloV4 detect results", frame);
//yolov4.saveVider(frame, yoloRet);
}
}
void runningYoloV3() {
String modelPath = "./cfg/yolov3_coco.weights";
String configPath = "./cfg/yolov3_coco.cfg";
String classesFile = "./data/coco.names";
Yolo yolov3 = Yolo(modelPath, configPath, classesFile, true);
yolov3.loadModel();
VideoCapture cap;
cap.open(0);
Mat frame;
while (waitKey(1) < 0) {
cap >> frame;
if (frame.empty()) {
waitKey();
break;
}
double start_time = (double)cv::getTickCount();
std::vector yoloRet;
yolov3.runningYolo(frame, yoloRet);
yolov3.drowBoxes(frame, yoloRet);
double end_time = (double)cv::getTickCount();
double fps = cv::getTickFrequency() / (end_time - start_time);
double spend_time = (end_time - start_time) / cv::getTickFrequency();
std::string FPS = "FPS:" + cv::format("%.2f", fps) + " spend time:" + cv::format("%.2f", spend_time * 1000) + "ms";
putText(frame, FPS, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
imshow("YoloV3 detect results", frame);
}
}
yolo.h
#pragma once
#include
#include "opencv2/dnn.hpp"
#include "opencv2/imgproc.hpp"
#include "opencv2/highgui/highgui.hpp"
typedef struct YoloDetSt {
std::string label;
float confidences;
cv::Rect rect;
}YoloDetSt;
class Yolo {
public:
Yolo(std::string& modelPath, std::string& configPath, std::string& classesFile, bool isGpu);
~Yolo();
int loadModel();
int runningYolo(cv::Mat& img, std::vector& yoloRet);
void drowBoxes(cv::Mat& img, std::vector& yoloRet);
void saveVider(cv::Mat img, std::vector& yoloRet);
private:
std::string m_modelPath;
std::string m_configPath;
std::string m_classesFile;
std::vector m_outNames;
bool m_isGpu = false;
std::vector m_classes;
cv::dnn::Net m_net;
// Yolo参数设置
float m_confThreshold = 0.5;
float m_nmsThreshold = 0.4;
float m_scale = 0.00392;
cv::Scalar m_mean = { 0,0,0 };
bool m_swapRB = true;
int m_inpWidth = 416;
int m_inpHeight = 416;
// 检测的图片保存成视频的参数
int m_saveH = 0;
int m_saveW = 0;
cv::VideoWriter m_viderWriter;
std::string m_viderName;
int m_frames = 0;
void postprocess(cv::Mat& frame, const std::vector& outs, cv::dnn::Net& net, std::vector& yoloRet);
std::string getLocNameTime(); // 返回格式化时间:20200426_150925
void setViderWriterPara(const cv::Mat& img);
};
yolo.cpp
#include
#include "Yolo.h"
Yolo::Yolo(std::string& modelPath, std::string& configPath, std::string& classesFile, bool isGpu) {
m_modelPath = modelPath;
m_configPath = configPath;
m_classesFile = classesFile;
m_isGpu = isGpu;
}
Yolo::~Yolo() {
m_viderWriter.release();
}
int Yolo::loadModel() {
int backendId;
int targetId;
// cpu or gpu
if (m_isGpu) {
backendId = cv::dnn::DNN_BACKEND_CUDA;
targetId = cv::dnn::DNN_TARGET_CUDA;
}
else {
backendId = cv::dnn::DNN_BACKEND_OPENCV;
targetId = cv::dnn::DNN_TARGET_CPU;
}
// Open file with classes names.
if (!m_classesFile.empty()) {
std::ifstream ifs(m_classesFile.c_str());
if (!ifs.is_open()) {
std::string error = "File " + m_classesFile + " not found";
std::cout << error << std::endl;
return -1;
}
std::string line;
while (std::getline(ifs, line)) {
m_classes.push_back(line);
}
}
// Load a model.
m_net = cv::dnn::readNet(m_modelPath, m_configPath);
m_net.setPreferableBackend(backendId);
m_net.setPreferableTarget(targetId);
m_outNames = m_net.getUnconnectedOutLayersNames();
return 0;
}
int Yolo::runningYolo(cv::Mat& img, std::vector& yoloRet) {
// Create a 4D blob from a frame.
cv::Mat blob;
cv::Mat frame;
cv::Size inpSize(m_inpWidth > 0 ? m_inpWidth : img.cols,
m_inpHeight > 0 ? m_inpHeight : img.rows);
cv::dnn::blobFromImage(img, blob, m_scale, inpSize, m_mean, m_swapRB, false);
// Run a model.
m_net.setInput(blob);
if (m_net.getLayer(0)->outputNameToIndex("im_info") != -1) // Faster-RCNN or R-FCN
{
cv::resize(img, img, inpSize);
cv::Mat imInfo = (cv::Mat_(1, 3) << inpSize.height, inpSize.width, 1.6f);
m_net.setInput(imInfo, "im_info");
}
std::vector outs;
m_net.forward(outs, m_outNames);
postprocess(img, outs, m_net, yoloRet);
return 0;
}
void Yolo::postprocess(cv::Mat& frame, const std::vector& outs, cv::dnn::Net& net, std::vector& yoloRet) {
static std::vector outLayers = net.getUnconnectedOutLayers();
static std::string outLayerType = net.getLayer(outLayers[0])->type;
std::vector classIds;
std::vector confidences;
std::vector boxes;
if (net.getLayer(0)->outputNameToIndex("im_info") != -1)
{
// Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom]
CV_Assert(outs.size() == 1);
float* data = (float*)outs[0].data;
for (size_t i = 0; i < outs[0].total(); i += 7) {
float confidence = data[i + 2];
if (confidence > m_confThreshold) {
int left = (int)data[i + 3];
int top = (int)data[i + 4];
int right = (int)data[i + 5];
int bottom = (int)data[i + 6];
int width = right - left + 1;
int height = bottom - top + 1;
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
boxes.push_back(cv::Rect(left, top, width, height));
confidences.push_back(confidence);
}
}
}
else if (outLayerType == "DetectionOutput") {
// Network produces output blob with a shape 1x1xNx7 where N is a number of
// detections and an every detection is a vector of values
// [batchId, classId, confidence, left, top, right, bottom]
CV_Assert(outs.size() == 1);
float* data = (float*)outs[0].data;
for (size_t i = 0; i < outs[0].total(); i += 7) {
float confidence = data[i + 2];
if (confidence > m_confThreshold) {
int left = (int)(data[i + 3] * frame.cols);
int top = (int)(data[i + 4] * frame.rows);
int right = (int)(data[i + 5] * frame.cols);
int bottom = (int)(data[i + 6] * frame.rows);
int width = right - left + 1;
int height = bottom - top + 1;
classIds.push_back((int)(data[i + 1]) - 1); // Skip 0th background class id.
boxes.push_back(cv::Rect(left, top, width, height));
confidences.push_back(confidence);
}
}
}
else if (outLayerType == "Region") {
for (size_t i = 0; i < outs.size(); ++i) {
// Network produces output blob with a shape NxC where N is a number of
// detected objects and C is a number of classes + 4 where the first 4
// numbers are [center_x, center_y, width, height]
float* data = (float*)outs[i].data;
for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols) {
cv::Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
cv::Point classIdPoint;
double confidence;
minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
if (confidence > m_confThreshold) {
int centerX = (int)(data[0] * frame.cols);
int centerY = (int)(data[1] * frame.rows);
int width = (int)(data[2] * frame.cols);
int height = (int)(data[3] * frame.rows);
int left = centerX - width / 2;
int top = centerY - height / 2;
classIds.push_back(classIdPoint.x);
confidences.push_back((float)confidence);
boxes.push_back(cv::Rect(left, top, width, height));
}
}
}
}
else {
std::cout << "Unknown output layer type: " + outLayerType << std::endl;
}
std::vector indices;
cv::dnn::NMSBoxes(boxes, confidences, m_confThreshold, m_nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i) {
int idx = indices[i];
std::string label;
if (!m_classes.empty()) {
CV_Assert(classIds[idx] < (int)m_classes.size());
}
yoloRet.push_back(YoloDetSt{ m_classes[classIds[idx]], confidences[idx], boxes[idx] });
}
}
void Yolo::drowBoxes(cv::Mat& img, std::vector& yoloRet) {
for (int i = 0; i < yoloRet.size(); i++) {
cv::rectangle(img, yoloRet[i].rect, cv::Scalar(0, 0, 255));
std::string label = cv::format("%.2f", yoloRet[i].confidences);
label = yoloRet[i].label + ": " + label;
int baseLine;
cv::Size labelSize = cv::getTextSize(label, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int top = cv::max(yoloRet[i].rect.y, labelSize.height);
rectangle(img, cv::Point(yoloRet[i].rect.x, top - labelSize.height),
cv::Point(yoloRet[i].rect.x + labelSize.width, top + baseLine), cv::Scalar(255, 255, 255), cv::FILLED);
putText(img, label, cv::Point(yoloRet[i].rect.x, top), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar());
}
}
// 返回格式化时间:20200426_150925
std::string Yolo::getLocNameTime() {
struct tm t; //tm结构指针
time_t now; //声明time_t类型变量
time(&now); //获取系统日期和时间
localtime_s(&t, &now); //获取当地日期和时间
std::string time_name = cv::format("%d", t.tm_year + 1900) + cv::format("%.2d", t.tm_mon + 1) + cv::format("%.2d", t.tm_mday) + "_" +
cv::format("%.2d", t.tm_hour) + cv::format("%.2d", t.tm_min) + cv::format("%.2d", t.tm_sec);
return time_name;
}
void Yolo::setViderWriterPara(const cv::Mat& img) {
m_saveH = img.size().height;
m_saveW = img.size().width;
m_viderName = "./data/" + getLocNameTime() + ".avi";
m_frames = 0;
m_viderWriter = cv::VideoWriter(m_viderName, cv::VideoWriter::fourcc('M', 'J', 'P', 'G'), 25.0, cv::Size(m_saveW, m_saveH));
}
void Yolo::saveVider(cv::Mat img, std::vector& yoloRet) {
drowBoxes(img, yoloRet);
if ((m_saveH == 0) && (m_saveW == 0)) {
setViderWriterPara(img);
m_viderWriter << img;
}
else {
if ((m_saveH != img.size().height) || (m_saveW != img.size().width)) {
cv::resize(img, img, cv::Size(m_saveW, m_saveH));
m_viderWriter << img;
}
else {
m_viderWriter << img;
}
}
++m_frames;
if (m_frames == 25 * 60 * 10) { // 每十分钟从新录制新视频
m_saveH = 0;
m_saveW = 0;
m_viderWriter.release();
}
}