yolov7-opencv

参考文章

https://github.com/WongKinYiu/yolov7/issues/49

python-opencv调用示例

yolov7地址 https://github.com/WongKinYiu/yolov7

onnx导出

# 这里直接用的main分支的export.py脚本

# 官方pt导出
python export.py --weights ./weights/yolov7.pt --grid  --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640

# 导出自己训练的pt
python export.py --weights ./runs/train/exp/weights/best.pt  --grid  --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640


pythonopencv版本

>>> cv2.__version__
'4.7.0'

python调用

import cv2
import argparse
import numpy as np

class yolov5():
    def __init__(self, yolo_type, confThreshold=0.5, nmsThreshold=0.5):
        # 根据文件名设置类别,可手工定义
        with open('coco.names', 'rt') as f:
            self.classes = f.read().rstrip('\n').split('\n') 
        # self.classes=['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
        # 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
        # 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
        # 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
        # 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
        # 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
        # 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
        # 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
        # 'hair drier', 'toothbrush']
        self.colors = [np.random.randint(0, 255, size=3).tolist() for _ in range(len(self.classes))]
        num_classes = len(self.classes)
        self.anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
        self.nl = len(self.anchors)
        self.na = len(self.anchors[0]) // 2
        self.no = num_classes + 5 
        self.stride = np.array([8., 16., 32.])
        self.inpWidth = 640
        self.inpHeight = 640
        self.net = cv2.dnn.readNetFromONNX(yolo_type + '.onnx')
        self.confThreshold = confThreshold
        self.nmsThreshold = nmsThreshold
    
    def _make_grid(self, nx=20, ny=20):
        xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
        return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32)

    def letterbox(self,im, new_shape=(640, 640), color=(114, 114, 114), auto=True,scaleup=True, stride=32):
        # Resize and pad image while meeting stride-multiple constraints
        shape = im.shape[:2]  # current shape [height, width]
        if isinstance(new_shape, int):
            new_shape = (new_shape, new_shape)
        # Scale ratio (new / old)
        r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
        # if not scaleup:  # only scale down, do not scale up (for better val mAP)
        #     r = min(r, 1.0)
        # Compute padding
        ratio = r, r  # width, height ratios
        new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
        dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
        # if auto:  # minimum rectangle
        #     dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
        dw /= 2  # divide padding into 2 sides
        dh /= 2
        if shape[::-1] != new_unpad:  # resize
            im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
        top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
        left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
       
        im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
        return im, ratio, (dw, dh)


    def xywh2xyxy(self,x):
        # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
       # y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
        y = np.copy(x)
        y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
        y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
        y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
        y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
        return y

    def non_max_suppression(self,prediction, conf_thres=0.25,agnostic=False):
        xc = prediction[..., 4] > conf_thres  # candidates
        # Settings
        min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
        max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
       
        output = [np.zeros((0, 6))] * prediction.shape[0]

        for xi, x in enumerate(prediction):  # image index, image inference
            # Apply constraints
            # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
            x = x[xc[xi]]  # confidence
            if not x.shape[0]:
                continue
            # Compute conf
            x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
            # Box (center x, center y, width, height) to (x1, y1, x2, y2)
            box = self.xywh2xyxy(x[:, :4])
            # Detections matrix nx6 (xyxy, conf, cls)
            conf = np.max(x[:, 5:], axis=1)
            j = np.argmax(x[:, 5:],axis=1)
            #转为array:  x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
            re = np.array(conf.reshape(-1)> conf_thres)
            #转为维度
            conf =conf.reshape(-1,1)
            j = j.reshape(-1,1)
            #numpy的拼接
            x = np.concatenate((box,conf,j),axis=1)[re]
            # Check shape
            n = x.shape[0]  # number of boxes
            if not n:  # no boxes
                continue
            elif n > max_nms:  # excess boxes
                x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence
            # Batched NMS
            c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
            boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
            #转为list 使用opencv自带nms
            boxes = boxes.tolist()
            scores = scores.tolist()
            i = cv2.dnn.NMSBoxes(boxes, scores, self.confThreshold, self.nmsThreshold)
            #i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
            output[xi] = x[i]
        return output

    def detect(self, srcimg): 
        im = srcimg.copy()
        im, ratio, wh = self.letterbox(srcimg, self.inpWidth, stride=self.stride, auto=False)
        # Sets the input to the network
        blob = cv2.dnn.blobFromImage(im, 1 / 255.0,(self.inpWidth, self.inpHeight),[0,0,0],swapRB=True, crop=False)
        self.net.setInput(blob)
        outs = self.net.forward(self.net.getUnconnectedOutLayersNames())[0]
        #NMS
        pred = self.non_max_suppression(outs, self.confThreshold,agnostic=False)
        #draw box
        for i in pred[0]:
            left = int((i[0] - wh[0])/ratio[0])
            top = int((i[1]-wh[1])/ratio[1])
            width = int((i[2] - wh[0])/ratio[0])
            height = int((i[3]-wh[1])/ratio[1])
            conf = i[4]
            classId = i[5]
            #frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height)
            cv2.rectangle(srcimg, (int(left), int(top)), (int(width),int(height)), (0, 0, 255), thickness=2)
            print("当前找到%s left %d top %d width %d height %d" %(self.classes[int(classId)],left,top,width,height))
            label = '%.2f' % conf
            label = '%s:%s' % (self.classes[int(classId)], label)
            # Display the label at the top of the bounding box
            labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            top = max(top, labelSize[1])
            #cv2.rectangle(srcimg, (int(left), int(top - round(1.5 * labelSize[1]))), (int(left + round(1.5 * labelSize[0])), int(top + baseLine)), (255,255,255), cv2.FILLED)
            cv2.putText(srcimg, label, (int(left-20),int(top - 10)), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), thickness=2)
        cv2.imshow('show', srcimg)
        cv2.waitKey(0)  

if __name__ == "__main__":
    parser = argparse.ArgumentParser()
    parser.add_argument("--imgpath", type=str, default='data/20230124_110919.jpg', help="image path")
    parser.add_argument('--net',type=str,default='weights/bestv7',help='model name')
    parser.add_argument('--confThreshold', default=0.3, type=float, help='class confidence')
    parser.add_argument('--nmsThreshold', default=0.5, type=float, help='nms iou thresh')
    args = parser.parse_args()

    model = yolov5(args.net, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold)
    srcimg = cv2.imread(args.imgpath)
    model.detect(srcimg)
    

这里官方pt和自定义数据训练的pt导出的onnx都可正常载入并且正确识别

cpp-opencv调用示例

下载地址https://opencv.org/releases/

这里下载的opencv版本为4.5.5

onnx导出

# 这里使用的是  https://github.com/WongKinYiu/yolov7/tree/u5 分支的export.py脚本

python export.py  --weights ./weights/yolov7.pt --include onnx

cpp调用

#include 
#include 
#include 
#include 
#include 
#include 

using namespace cv;
using namespace dnn;
using namespace std;

struct Net_config
{
	float confThreshold; // Confidence threshold
	float nmsThreshold;  // Non-maximum suppression threshold
	string modelpath;
};

class YOLOV7
{
public:
	YOLOV7(Net_config config);
	void detect(Mat& frame);
private:
	int inpWidth;
	int inpHeight;
	vector class_names;
	int num_class;

	float confThreshold;
	float nmsThreshold;
	Net net;
	void drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid);
};

YOLOV7::YOLOV7(Net_config config)
{
	this->confThreshold = config.confThreshold;
	this->nmsThreshold = config.nmsThreshold;

	this->net = readNet(config.modelpath);
	/*
	ifstream ifs("coco.names");
	string line;
	while (getline(ifs, line)) this->class_names.push_back(line);*/
	
	this->class_names = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light","fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow","elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee","skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard","tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple","sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch","potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone","microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear","hair drier", "toothbrush" };
	this->num_class = class_names.size();

	size_t pos = config.modelpath.find("_");
	int len = config.modelpath.length() - 6 - pos;
	string hxw = config.modelpath.substr(pos + 1, len);
	pos = hxw.find("x");
	string h = hxw.substr(0, pos);
	len = hxw.length() - pos;
	string w = hxw.substr(pos + 1, len);
	this->inpHeight = stoi(h);
	this->inpWidth = stoi(w);
}

void YOLOV7::drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid)   // Draw the predicted bounding box
{
	//Draw a rectangle displaying the bounding box
	rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);

	//Get the label for the class name and its confidence
	string label = format("%.2f", conf);
	label = this->class_names[classid] + ":" + label;

	//Display the label at the top of the bounding box
	int baseLine;
	Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
	top = max(top, labelSize.height);
	//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
	putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
}

void YOLOV7::detect(Mat& frame)
{
	Mat blob = blobFromImage(frame, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
	this->net.setInput(blob);
	vector outs;
	this->net.forward(outs, this->net.getUnconnectedOutLayersNames());

	int num_proposal = outs[0].size[0];
	int nout = outs[0].size[1];
	if (outs[0].dims > 2)
	{
		num_proposal = outs[0].size[1];
		nout = outs[0].size[2];
		outs[0] = outs[0].reshape(0, num_proposal);
	}
	/generate proposals
	vector confidences;
	vector boxes;
	vector classIds;
	float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth;
	int n = 0, row_ind = 0; ///cx,cy,w,h,box_score,class_score
	float* pdata = (float*)outs[0].data;
	for (n = 0; n < num_proposal; n++)   ///ÌØÕ÷ͼ³ß¶È
	{
		float box_score = pdata[4];
		if (box_score > this->confThreshold)
		{
			Mat scores = outs[0].row(row_ind).colRange(5, nout);
			Point classIdPoint;
			double max_class_socre;
			// Get the value and location of the maximum score
			minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
			max_class_socre *= box_score;
			if (max_class_socre > this->confThreshold)
			{
				const int class_idx = classIdPoint.x;
				float cx = pdata[0] * ratiow;  ///cx
				float cy = pdata[1] * ratioh;   ///cy
				float w = pdata[2] * ratiow;   ///w
				float h = pdata[3] * ratioh;  ///h

				int left = int(cx - 0.5 * w);
				int top = int(cy - 0.5 * h);

				confidences.push_back((float)max_class_socre);
				boxes.push_back(Rect(left, top, (int)(w), (int)(h)));
				classIds.push_back(class_idx);
			}
		}
		row_ind++;
		pdata += nout;
	}

	// Perform non maximum suppression to eliminate redundant overlapping boxes with
	// lower confidences
	vector indices;
	dnn::NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
	for (size_t i = 0; i < indices.size(); ++i)
	{
		int idx = indices[i];
		Rect box = boxes[idx];
		this->drawPred(confidences[idx], box.x, box.y,
			box.x + box.width, box.y + box.height, frame, classIds[idx]);
	}
}

int main()
{
    // 由于python导出的onnx的img-size为640 640 这里命名也是640x640
	Net_config YOLOV7_nets = { 0.3, 0.5, "D:/test2/test2/models/yolov7_640x640.onnx" };   choices=["models/yolov7_736x1280.onnx", "models/yolov7-tiny_384x640.onnx", "models/yolov7_480x640.onnx", "models/yolov7_384x640.onnx", "models/yolov7-tiny_256x480.onnx", "models/yolov7-tiny_256x320.onnx", "models/yolov7_256x320.onnx", "models/yolov7-tiny_256x640.onnx", "models/yolov7_256x640.onnx", "models/yolov7-tiny_480x640.onnx", "models/yolov7-tiny_736x1280.onnx", "models/yolov7_256x480.onnx"]
	YOLOV7 net(YOLOV7_nets);
	string imgpath = "D/test2/test2/images/bus.jpg";
	Mat srcimg = imread(imgpath);
	net.detect(srcimg);

	static const string kWinName = "Deep learning object detection in OpenCV";
	namedWindow(kWinName, WINDOW_NORMAL);
	imshow(kWinName, srcimg);
	waitKey(0);
	destroyAllWindows();
}

备注:这里测试官方的yolov7.pt转换的onnx可正常加载识别,自行训练的pt转换的onnx加载报错如下

Error: Unspecified error (> Node [[email protected]]:(/model.105/Expand_1_output_0) parse error: OpenCV(4.5.5) C:\build\master_winpack-build-win64-vc15\opencv\modules\dnn\src\onnx\onnx_importer.cpp:2389: error: (-213:The function/feature is not implemented) Expand op doesn't support multiple axes for constant input in function 'cv::dnn::dnn4_v20211220::ONNXImporter::parseExpand'

你可能感兴趣的:(YOLO,opencv,python)