参考文章
https://github.com/WongKinYiu/yolov7/issues/49
yolov7地址 https://github.com/WongKinYiu/yolov7
# 这里直接用的main分支的export.py脚本
# 官方pt导出
python export.py --weights ./weights/yolov7.pt --grid --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640
# 导出自己训练的pt
python export.py --weights ./runs/train/exp/weights/best.pt --grid --simplify --topk-all 100 --iou-thres 0.65 --conf-thres 0.35 --img-size 640 640
>>> cv2.__version__
'4.7.0'
import cv2
import argparse
import numpy as np
class yolov5():
def __init__(self, yolo_type, confThreshold=0.5, nmsThreshold=0.5):
# 根据文件名设置类别,可手工定义
with open('coco.names', 'rt') as f:
self.classes = f.read().rstrip('\n').split('\n')
# self.classes=['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
# 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
# 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
# 'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
# 'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
# 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
# 'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
# 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
# 'hair drier', 'toothbrush']
self.colors = [np.random.randint(0, 255, size=3).tolist() for _ in range(len(self.classes))]
num_classes = len(self.classes)
self.anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
self.nl = len(self.anchors)
self.na = len(self.anchors[0]) // 2
self.no = num_classes + 5
self.stride = np.array([8., 16., 32.])
self.inpWidth = 640
self.inpHeight = 640
self.net = cv2.dnn.readNetFromONNX(yolo_type + '.onnx')
self.confThreshold = confThreshold
self.nmsThreshold = nmsThreshold
def _make_grid(self, nx=20, ny=20):
xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32)
def letterbox(self,im, new_shape=(640, 640), color=(114, 114, 114), auto=True,scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
# if not scaleup: # only scale down, do not scale up (for better val mAP)
# r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
# if auto: # minimum rectangle
# dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
def xywh2xyxy(self,x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
# y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y = np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def non_max_suppression(self,prediction, conf_thres=0.25,agnostic=False):
xc = prediction[..., 4] > conf_thres # candidates
# Settings
min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
output = [np.zeros((0, 6))] * prediction.shape[0]
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
# x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0 # width-height
x = x[xc[xi]] # confidence
if not x.shape[0]:
continue
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
box = self.xywh2xyxy(x[:, :4])
# Detections matrix nx6 (xyxy, conf, cls)
conf = np.max(x[:, 5:], axis=1)
j = np.argmax(x[:, 5:],axis=1)
#转为array: x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
re = np.array(conf.reshape(-1)> conf_thres)
#转为维度
conf =conf.reshape(-1,1)
j = j.reshape(-1,1)
#numpy的拼接
x = np.concatenate((box,conf,j),axis=1)[re]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
elif n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
#转为list 使用opencv自带nms
boxes = boxes.tolist()
scores = scores.tolist()
i = cv2.dnn.NMSBoxes(boxes, scores, self.confThreshold, self.nmsThreshold)
#i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
output[xi] = x[i]
return output
def detect(self, srcimg):
im = srcimg.copy()
im, ratio, wh = self.letterbox(srcimg, self.inpWidth, stride=self.stride, auto=False)
# Sets the input to the network
blob = cv2.dnn.blobFromImage(im, 1 / 255.0,(self.inpWidth, self.inpHeight),[0,0,0],swapRB=True, crop=False)
self.net.setInput(blob)
outs = self.net.forward(self.net.getUnconnectedOutLayersNames())[0]
#NMS
pred = self.non_max_suppression(outs, self.confThreshold,agnostic=False)
#draw box
for i in pred[0]:
left = int((i[0] - wh[0])/ratio[0])
top = int((i[1]-wh[1])/ratio[1])
width = int((i[2] - wh[0])/ratio[0])
height = int((i[3]-wh[1])/ratio[1])
conf = i[4]
classId = i[5]
#frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height)
cv2.rectangle(srcimg, (int(left), int(top)), (int(width),int(height)), (0, 0, 255), thickness=2)
print("当前找到%s left %d top %d width %d height %d" %(self.classes[int(classId)],left,top,width,height))
label = '%.2f' % conf
label = '%s:%s' % (self.classes[int(classId)], label)
# Display the label at the top of the bounding box
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
#cv2.rectangle(srcimg, (int(left), int(top - round(1.5 * labelSize[1]))), (int(left + round(1.5 * labelSize[0])), int(top + baseLine)), (255,255,255), cv2.FILLED)
cv2.putText(srcimg, label, (int(left-20),int(top - 10)), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 0), thickness=2)
cv2.imshow('show', srcimg)
cv2.waitKey(0)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--imgpath", type=str, default='data/20230124_110919.jpg', help="image path")
parser.add_argument('--net',type=str,default='weights/bestv7',help='model name')
parser.add_argument('--confThreshold', default=0.3, type=float, help='class confidence')
parser.add_argument('--nmsThreshold', default=0.5, type=float, help='nms iou thresh')
args = parser.parse_args()
model = yolov5(args.net, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold)
srcimg = cv2.imread(args.imgpath)
model.detect(srcimg)
这里官方pt和自定义数据训练的pt导出的onnx都可正常载入并且正确识别
下载地址https://opencv.org/releases/
这里下载的opencv版本为4.5.5
# 这里使用的是 https://github.com/WongKinYiu/yolov7/tree/u5 分支的export.py脚本
python export.py --weights ./weights/yolov7.pt --include onnx
#include
#include
#include
#include
#include
#include
using namespace cv;
using namespace dnn;
using namespace std;
struct Net_config
{
float confThreshold; // Confidence threshold
float nmsThreshold; // Non-maximum suppression threshold
string modelpath;
};
class YOLOV7
{
public:
YOLOV7(Net_config config);
void detect(Mat& frame);
private:
int inpWidth;
int inpHeight;
vector class_names;
int num_class;
float confThreshold;
float nmsThreshold;
Net net;
void drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid);
};
YOLOV7::YOLOV7(Net_config config)
{
this->confThreshold = config.confThreshold;
this->nmsThreshold = config.nmsThreshold;
this->net = readNet(config.modelpath);
/*
ifstream ifs("coco.names");
string line;
while (getline(ifs, line)) this->class_names.push_back(line);*/
this->class_names = { "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light","fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow","elephant", "bear", "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee","skis", "snowboard", "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard","tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", "apple","sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", "chair", "couch","potted plant", "bed", "dining table", "toilet", "tv", "laptop", "mouse", "remote", "keyboard", "cell phone","microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors", "teddy bear","hair drier", "toothbrush" };
this->num_class = class_names.size();
size_t pos = config.modelpath.find("_");
int len = config.modelpath.length() - 6 - pos;
string hxw = config.modelpath.substr(pos + 1, len);
pos = hxw.find("x");
string h = hxw.substr(0, pos);
len = hxw.length() - pos;
string w = hxw.substr(pos + 1, len);
this->inpHeight = stoi(h);
this->inpWidth = stoi(w);
}
void YOLOV7::drawPred(float conf, int left, int top, int right, int bottom, Mat& frame, int classid) // Draw the predicted bounding box
{
//Draw a rectangle displaying the bounding box
rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 0, 255), 2);
//Get the label for the class name and its confidence
string label = format("%.2f", conf);
label = this->class_names[classid] + ":" + label;
//Display the label at the top of the bounding box
int baseLine;
Size labelSize = getTextSize(label, FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
top = max(top, labelSize.height);
//rectangle(frame, Point(left, top - int(1.5 * labelSize.height)), Point(left + int(1.5 * labelSize.width), top + baseLine), Scalar(0, 255, 0), FILLED);
putText(frame, label, Point(left, top), FONT_HERSHEY_SIMPLEX, 0.75, Scalar(0, 255, 0), 1);
}
void YOLOV7::detect(Mat& frame)
{
Mat blob = blobFromImage(frame, 1 / 255.0, Size(this->inpWidth, this->inpHeight), Scalar(0, 0, 0), true, false);
this->net.setInput(blob);
vector outs;
this->net.forward(outs, this->net.getUnconnectedOutLayersNames());
int num_proposal = outs[0].size[0];
int nout = outs[0].size[1];
if (outs[0].dims > 2)
{
num_proposal = outs[0].size[1];
nout = outs[0].size[2];
outs[0] = outs[0].reshape(0, num_proposal);
}
/generate proposals
vector confidences;
vector boxes;
vector classIds;
float ratioh = (float)frame.rows / this->inpHeight, ratiow = (float)frame.cols / this->inpWidth;
int n = 0, row_ind = 0; ///cx,cy,w,h,box_score,class_score
float* pdata = (float*)outs[0].data;
for (n = 0; n < num_proposal; n++) ///ÌØÕ÷ͼ³ß¶È
{
float box_score = pdata[4];
if (box_score > this->confThreshold)
{
Mat scores = outs[0].row(row_ind).colRange(5, nout);
Point classIdPoint;
double max_class_socre;
// Get the value and location of the maximum score
minMaxLoc(scores, 0, &max_class_socre, 0, &classIdPoint);
max_class_socre *= box_score;
if (max_class_socre > this->confThreshold)
{
const int class_idx = classIdPoint.x;
float cx = pdata[0] * ratiow; ///cx
float cy = pdata[1] * ratioh; ///cy
float w = pdata[2] * ratiow; ///w
float h = pdata[3] * ratioh; ///h
int left = int(cx - 0.5 * w);
int top = int(cy - 0.5 * h);
confidences.push_back((float)max_class_socre);
boxes.push_back(Rect(left, top, (int)(w), (int)(h)));
classIds.push_back(class_idx);
}
}
row_ind++;
pdata += nout;
}
// Perform non maximum suppression to eliminate redundant overlapping boxes with
// lower confidences
vector indices;
dnn::NMSBoxes(boxes, confidences, this->confThreshold, this->nmsThreshold, indices);
for (size_t i = 0; i < indices.size(); ++i)
{
int idx = indices[i];
Rect box = boxes[idx];
this->drawPred(confidences[idx], box.x, box.y,
box.x + box.width, box.y + box.height, frame, classIds[idx]);
}
}
int main()
{
// 由于python导出的onnx的img-size为640 640 这里命名也是640x640
Net_config YOLOV7_nets = { 0.3, 0.5, "D:/test2/test2/models/yolov7_640x640.onnx" }; choices=["models/yolov7_736x1280.onnx", "models/yolov7-tiny_384x640.onnx", "models/yolov7_480x640.onnx", "models/yolov7_384x640.onnx", "models/yolov7-tiny_256x480.onnx", "models/yolov7-tiny_256x320.onnx", "models/yolov7_256x320.onnx", "models/yolov7-tiny_256x640.onnx", "models/yolov7_256x640.onnx", "models/yolov7-tiny_480x640.onnx", "models/yolov7-tiny_736x1280.onnx", "models/yolov7_256x480.onnx"]
YOLOV7 net(YOLOV7_nets);
string imgpath = "D/test2/test2/images/bus.jpg";
Mat srcimg = imread(imgpath);
net.detect(srcimg);
static const string kWinName = "Deep learning object detection in OpenCV";
namedWindow(kWinName, WINDOW_NORMAL);
imshow(kWinName, srcimg);
waitKey(0);
destroyAllWindows();
}
备注:这里测试官方的yolov7.pt转换的onnx可正常加载识别,自行训练的pt转换的onnx加载报错如下
Error: Unspecified error (> Node [[email protected]]:(/model.105/Expand_1_output_0) parse error: OpenCV(4.5.5) C:\build\master_winpack-build-win64-vc15\opencv\modules\dnn\src\onnx\onnx_importer.cpp:2389: error: (-213:The function/feature is not implemented) Expand op doesn't support multiple axes for constant input in function 'cv::dnn::dnn4_v20211220::ONNXImporter::parseExpand'