目标跟踪:KCF跟踪并使用yolov4进行重检测

一、运行环境

windows下VS2019
opencv4.5.5+contrib4.5.5 (使用cmake VC16编译安装)
官方下载的yolov4模型和预训练参数

二、算法思想

使用KCF,跟踪失败时,采用YOLO tiny检测。认为所有的检测结果中与丢失前一帧-的目标框DIoU最高的为跟踪目标。用yolo检测到的目标重新初始化跟踪器。

三、代码实现

github:https://github.com/sunny553/KCF-YOLOv4/tree/main
主函数:

#include "yolo.h"
#include "track.h"

using namespace cv;

int main(int argc, char* argv[])
{
	Net_config yolov4_tiny = { 0.3, 0.2, 320, 320,"./cfg/coco.names",
		"./cfg/yolov4-tiny.cfg",
		"./cfg/yolov4-tiny.weights", "yolov4 - tiny" };
	YOLO model(yolov4_tiny);
	
	string imgpath = argv[1];
	VideoCapture cap(imgpath);
	Mat firframe,frame;	
	cv::Rect roi, tracking_rect,last_rect;

	while (1) {
		cap >> firframe;
		if (!firframe.data)
		{
			return 0;
		}	
		roi = model.detect_roi(firframe);
		if (roi.width != 0) {
			break;
		}
	}
	
	Ptr<TrackerKCF> tracker = TrackerKCF::create();
	tracker->init(firframe, roi);
	bool isfound = tracker->update(firframe, roi);
	last_rect = roi;
	namedWindow("tracker", WINDOW_AUTOSIZE | WINDOW_KEEPRATIO);
	for (;;)
	{
		cap >> frame;
		if (!frame.data)
		{
			break;
		}

		bool isfound = tracker->update(frame,tracking_rect);
		if (!isfound)
		{
			cout << "Yolo is used Looking for targets...\n";
			while (1) {				
				if (!frame.data)
				{
					return 0;
				}
				roi = model.detect_optimal_roi(frame,last_rect);
				rectangle(frame, roi, Scalar(255, 0, 0), 2, 1);
				imshow("tracker", frame);
				waitKey(1);
				if (roi.width != 0) {
					break;
				}
				cap >> frame;
			}
			tracker->~TrackerKCF();
			tracker = TrackerKCF::create();
			tracker->init(frame, roi);
			isfound = tracker->update(frame, roi);
		}
		else{
			last_rect =tracking_rect;
			rectangle(frame, tracking_rect, Scalar(255, 0, 0), 2, 1);
			imshow("tracker", frame);
			waitKey(1);
		}
	}
	destroyAllWindows();
	return 0;
}

注意:我认为opencv给的kcf还是有问题的
问题1:第一次使用update时,roi并没有更新。不知道为什么,所以我初始化一次就紧接update才能实现跟踪目标。直接运行官方代码也有这个问题。
问题2:一个KCFtracker类只能init一次。当二次init时,会出现update异常中断。这个问题已经通过析构KCFtracker类然后再重新构造类解决。
问题1说明:
初始帧:
目标跟踪:KCF跟踪并使用yolov4进行重检测_第1张图片
目标跟踪:KCF跟踪并使用yolov4进行重检测_第2张图片
你看!!!这个第二帧根本没动好不好,呜呜!骗人的官方文档!

头文件yolo.h

#pragma once
#include 
#include 
#include 
#include 
#include 
#include 

using namespace cv;
using namespace dnn;
using namespace std;

struct Net_config
{
	float confThreshold; // 类别置信阈值
	float nmsThreshold;  // 非极大值抑制阈值
	int inpWidth;  // Width of network's input image
	int inpHeight; // Height of network's input image
	string classesFile;
	string modelConfiguration;
	string modelWeights;
	string netname;
};

class YOLO
{
public:
	YOLO(Net_config config);
	void detect(Mat& frame, map<int, Rect>& flag);	
	cv::Rect detect_roi(cv::Mat& img);
	cv::Rect detect_optimal_roi(cv::Mat& img,cv::Rect last_roi);
	
private:
	float confThreshold;
	float nmsThreshold;
	int inpWidth;
	int inpHeight;
	char netname[20];
	vector<string> classes;
	Net net;
	void postprocess(Mat& frame, const vector<Mat>& outs, map<int, Rect>& flag);
	void drawPred(int classId, float conf,
		int left, int top, int right, int bottom, Mat& frame,int i);
	float DIoU(cv::Rect yolo_rect, cv::Rect last_roi);
};



//Net_config yolo_nets[4] = {
//	{0.5, 0.4, 416, 416,"coco.names",
//	"yolov3/yolov3.cfg", "yolov3/yolov3.weights", "yolov3"},
//
//	{0.5, 0.4, 608, 608,"coco.names",
//	"yolov4/yolov4.cfg", "yolov4/yolov4.weights", "yolov4"},
//
//	{0.5, 0.4, 320, 320,"coco.names",
//	"yolo-fastest/yolo-fastest-xl.cfg",
//	"yolo-fastest/yolo-fastest-xl.weights", "yolo-fastest"},
//
//	{0.5, 0.4, 320, 320,"coco.names",
//	"yolobile/csdarknet53s-panet-spp.cfg",
//	"yolobile/yolobile.weights", "yolobile"}
//};

头文件track.h

#include 
#include 
#include 
#include 
#include 
#include 

yolo.c

#include "yolo.h"

YOLO::YOLO(Net_config config)
{
	cout << "Net use " << config.netname << endl;
	this->confThreshold = config.confThreshold;
	this->nmsThreshold = config.nmsThreshold;
	this->inpWidth = config.inpWidth;
	this->inpHeight = config.inpHeight;
	strcpy_s(this->netname, config.netname.c_str());

	ifstream ifs(config.classesFile.c_str());
	string line;
	while (getline(ifs, line)) this->classes.push_back(line);

	this->net = readNetFromDarknet(config.modelConfiguration, config.modelWeights);
	this->net.setPreferableBackend(DNN_BACKEND_OPENCV);
	this->net.setPreferableTarget(DNN_TARGET_CPU);
}

void YOLO::postprocess(Mat& frame, const vector<Mat>& outs,map<int, Rect> &flag)
// Remove the bounding boxes with low confidence using non-maxima suppression
{
	vector<int> classIds;
	vector<float> confidences;
	vector<Rect> boxes;
	//不同的模型的输出可能不一样,yolo的输出outs是[[[x,y,w,h,...],[],...[]]],
	//之所以多一维,是因为模型输入的frame是四维的,第一维表示帧数,如果只有一张图片推理,那就是1
	for (size_t i = 0; i < outs.size(); ++i)
	{
		// Scan through all the bounding boxes output from the network and keep only the
		// ones with high confidence scores. Assign the box's class label as the class
		// with the highest score for the box.
		//data是指针,每次从存储一个框的信息的地址跳到另一个框的地址
		float* data = (float*)outs[i].data;
		for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols)
		{
			Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
			Point classIdPoint;
			double confidence;
			// Get the value and location of the maximum score
			// 找到最大的score的索引,刚好对应80个种类的索引
			minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
			if (confidence > this->confThreshold)
			{
				int centerX = (int)(data[0] * frame.cols);
				int centerY = (int)(data[1] * frame.rows);
				int width = (int)(data[2] * frame.cols);
				int height = (int)(data[3] * frame.rows);
				int left = centerX - width / 2;
				int top = centerY - height / 2;

				classIds.push_back(classIdPoint.x);
				confidences.push_back((float)confidence);
				boxes.push_back(Rect(left, top, width, height));
			}
		}
	}

	// Perform non maximum suppression to eliminate redundant overlapping boxes with
	// lower confidences
	vector<int> indices;
	NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
	for (size_t i = 0; i < indices.size(); ++i)
	{
		int idx = indices[i];
		Rect box = boxes[idx];
		this->drawPred(classIds[idx], confidences[idx], box.x, box.y,
			box.x + box.width, box.y + box.height, frame,i);
		flag[i]=box;
	}
}

void YOLO::drawPred(int classId, float conf,
	int left, int top, int right, int bottom, Mat& frame,int i)
	// Draw the predicted bounding box
{
	//Draw a rectangle displaying the bounding box
	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 3);

	//Get the label for the class name and its confidence
	string label = format("%.2f", conf);
	if (!this->classes.empty())
	{
		CV_Assert(classId < (int)this->classes.size());
		label = this->classes[classId] + ":" + label;
	}
	label = to_string(i+1) + ":" + label;
	//Display the label at the top of the bounding box
	int baseLine;
	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.3, 1, &baseLine);
	top = max(top, labelSize.height);
	putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75,
		Scalar(0, 255, 0), 1);
}

void YOLO::detect(Mat& frame, map<int, Rect>& flag)
{
	Mat blob;
	blobFromImage(frame, blob, 1 / 255.0,
		Size(this->inpWidth, this->inpHeight),
		Scalar(0, 0, 0), true, false);

	this->net.setInput(blob);
	vector<Mat> outs;
	this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
	this->postprocess(frame, outs,flag);

	vector<double> layersTimes;
	double freq = getTickFrequency() / 1000;
	double t = net.getPerfProfile(layersTimes) / freq;
	string label = format("%s Inference time : %.2f ms", this->netname, t);
	putText(frame, label, Point(0, 30), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 255), 2);
	//imwrite(format("%s_out.jpg", this->netname), frame);
}

cv::Rect YOLO::detect_roi(cv::Mat& img) {
	map<int, Rect> flag;
	//cv::Mat copy_img;
	//img.copyTo(copy_img);
	detect(img,flag);
	static const string kWinName = "All Deep learning object detection in OpenCV";
	namedWindow(kWinName, WINDOW_AUTOSIZE | WINDOW_KEEPRATIO);
	imshow(kWinName, img);
	waitKey(0);
	destroyWindow(kWinName);
	if (flag.size() == 0) {
		cout << "没有检测到目标" << endl;
		return cv::Rect(0, 0, 0, 0);
	}
	cout << "选择跟踪目标编号" << endl;
	int target_flag;
	cin >> target_flag;	
	/*rectangle(copy_img, flag[target_flag-1], cv::Scalar(255, 0, 0));
	namedWindow("KCF Initial ROI", WINDOW_AUTOSIZE | WINDOW_KEEPRATIO);
	imshow("KCF Initial ROI", copy_img);
	waitKey(0);	*/
	destroyAllWindows();

	return flag[target_flag-1];
}

cv::Rect YOLO::detect_optimal_roi(cv::Mat& img, cv::Rect last_roi) {
	map<int, Rect> flag;
	cv::Mat copy_img;
	img.copyTo(copy_img);
	cv::Rect best_fit_rect;
	detect(img, flag);
	if (flag.size() == 0) {
		return best_fit_rect;
	}
	float iou, best_iou;
	best_iou = -1;
	for (int i = 0; i < flag.size(); i++) {
		iou=DIoU(flag[i], last_roi);

		if (iou > -1 && iou > best_iou) {
			best_fit_rect = flag[i];
			best_iou = iou;
		}
	}
	copy_img.copyTo(img);
	return best_fit_rect;
}

float YOLO::DIoU(cv::Rect rect1, cv::Rect rect2) {
	cv::Rect rect_minimum_bounding_box = rect1 | rect2;
	cv::Rect rect_intersection = rect1 & rect2;
	float iou = float(rect_intersection.area()) / (float(rect1.area()) + float(rect2.area()) - float(rect_intersection.area()));   //简单交并比
	cv::Point2f center1, center2;
	center1.x = rect1.x + rect1.width / 2.0;
	center1.y = rect1.y + rect1.height / 2.0;
	center2.x = rect2.x + rect2.width / 2.0;
	center2.y = rect2.y + rect2.height / 2.0;
	float d = (center1.x - center2.x) * (center1.x - center2.x) + (center1.y - center2.y) * (center1.y - center2.y);
	float c = rect_minimum_bounding_box.width * rect_minimum_bounding_box.width + rect_minimum_bounding_box.height * rect_minimum_bounding_box.height;
	return iou - d / c;

}

四、实现结果

图片
目标跟踪:KCF跟踪并使用yolov4进行重检测_第3张图片
目标跟踪:KCF跟踪并使用yolov4进行重检测_第4张图片

目标跟踪:KCF跟踪并使用yolov4进行重检测_第5张图片

你可能感兴趣的:(opencv,opencv,计算机视觉,目标跟踪)