1、修改YOLOv5的代码
将
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Focus, self).__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
# self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
# return self.conv(self.contract(x))
修改为:
class Focus(nn.Module):
# Focus wh information into c-space
def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): # ch_in, ch_out, kernel, stride, padding, groups
super(Focus, self).__init__()
self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
self.contract = Contract(gain=2)
def forward(self, x): # x(b,c,w,h) -> y(b,4c,w/2,h/2)
# return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
return self.conv(self.contract(x))
2、训练模型,得到best.pt文件,更改名字为YOLOv5s.bt
3、首先下载https://github.com/ultralytics/yolov5 的源码到本地,在yolov5-master主目录里新建一个pth.py文件,把下面的代码复制到.py文件里。
import torch
from collections import OrderedDict
import pickle
import os
device = 'cuda' if torch.cuda.is_available() else 'cpu'
if __name__=='__main__':
choices = ['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x']
modelfile = choices[0]+'.pt'
utl_model = torch.load(modelfile, map_location=device)
utl_param = utl_model['model'].model
torch.save(utl_param.state_dict(), os.path.splitext(modelfile)[0]+'_param.pth')
own_state = utl_param.state_dict()
print(len(own_state))
numpy_param = OrderedDict()
for name in own_state:
numpy_param[name] = own_state[name].data.cpu().numpy()
print(len(numpy_param))
with open(os.path.splitext(modelfile)[0]+'_numpy_param.pkl', 'wb') as fw:
pickle.dump(numpy_param, fw)
运行后生成yolov5s_param.pth。
4、粘贴到yolov5-dnn-cpp-python-main\convert-onnx文件夹,运行转换成onnx文件即可。 https://github.com/hpc203/yolov5-dnn-cpp-python
5、运行以下代码跑摄像头:
import cv2
import argparse
import numpy as np
class yolov5():
def __init__(self, yolo_type, confThreshold=0.5, nmsThreshold=0.5, objThreshold=0.5):
with open('coco.names', 'rt') as f:
self.classes = f.read().rstrip('\n').split('\n') ###这个是在coco数据集上训练的模型做opencv部署的,如果你在自己的数据集上训练出的模型做opencv部署,那么需要修改self.classes
self.colors = [np.random.randint(0, 255, size=3).tolist() for _ in range(len(self.classes))]
num_classes = len(self.classes)
anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]]
self.nl = len(anchors)
self.na = len(anchors[0]) // 2
self.no = num_classes + 5
self.grid = [np.zeros(1)] * self.nl
self.stride = np.array([8., 16., 32.])
self.anchor_grid = np.asarray(anchors, dtype=np.float32).reshape(self.nl, -1, 2)
self.inpWidth = 640
self.inpHeight = 640
self.net = cv2.dnn.readNet(yolo_type + '.onnx')
self.confThreshold = confThreshold
self.nmsThreshold = nmsThreshold
self.objThreshold = objThreshold
def _make_grid(self, nx=20, ny=20):
xv, yv = np.meshgrid(np.arange(ny), np.arange(nx))
return np.stack((xv, yv), 2).reshape((-1, 2)).astype(np.float32)
def postprocess(self, frame, outs):
frameHeight = frame.shape[0]
frameWidth = frame.shape[1]
ratioh, ratiow = frameHeight / self.inpHeight, frameWidth / self.inpWidth
# Scan through all the bounding boxes output from the network and keep only the
# ones with high confidence scores. Assign the box's class label as the class with the highest score.
classIds = []
confidences = []
boxes = []
for detection in outs:
scores = detection[5:]
classId = np.argmax(scores)
confidence = scores[classId]
if confidence > self.confThreshold and detection[4] > self.objThreshold:
center_x = int(detection[0] * ratiow)
center_y = int(detection[1] * ratioh)
width = int(detection[2] * ratiow)
height = int(detection[3] * ratioh)
left = int(center_x - width / 2)
top = int(center_y - height / 2)
classIds.append(classId)
confidences.append(float(confidence))
boxes.append([left, top, width, height])
# Perform non maximum suppression to eliminate redundant overlapping boxes with
# lower confidences.
indices = cv2.dnn.NMSBoxes(boxes, confidences, self.confThreshold, self.nmsThreshold)
for i in indices:
i = i[0]
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
frame = self.drawPred(frame, classIds[i], confidences[i], left, top, left + width, top + height)
return frame
def drawPred(self, frame, classId, conf, left, top, right, bottom):
# Draw a bounding box.
cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), thickness=4)
label = '%.2f' % conf
label = '%s:%s' % (self.classes[classId], label)
# Display the label at the top of the bounding box
labelSize, baseLine = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
top = max(top, labelSize[1])
# cv.rectangle(frame, (left, top - round(1.5 * labelSize[1])), (left + round(1.5 * labelSize[0]), top + baseLine), (255,255,255), cv.FILLED)
cv2.putText(frame, label, (left, top - 10), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), thickness=2)
return frame
def detect(self, srcimg):
blob = cv2.dnn.blobFromImage(srcimg, 1 / 255.0, (self.inpWidth, self.inpHeight), [0, 0, 0], swapRB=True, crop=False)
# Sets the input to the network
self.net.setInput(blob)
# Runs the forward pass to get output of the output layers
outs = self.net.forward(self.net.getUnconnectedOutLayersNames())[0]
# inference output
outs = 1 / (1 + np.exp(-outs)) ###sigmoid
row_ind = 0
for i in range(self.nl):
h, w = int(self.inpHeight/self.stride[i]), int(self.inpWidth/self.stride[i])
length = int(self.na * h * w)
if self.grid[i].shape[2:4] != (h,w):
self.grid[i] = self._make_grid(w, h)
outs[row_ind:row_ind+length, 0:2] = (outs[row_ind:row_ind+length, 0:2] * 2. - 0.5 + np.tile(self.grid[i],(self.na, 1))) * int(self.stride[i])
outs[row_ind:row_ind+length, 2:4] = (outs[row_ind:row_ind+length, 2:4] * 2) ** 2 * np.repeat(self.anchor_grid[i],h*w, axis=0)
row_ind += length
return outs
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--imgpath", type=str, default='bus.jpg', help="image path")
parser.add_argument('--net_type', default='yolov5s', choices=['yolov5s', 'yolov5l', 'yolov5m', 'yolov5x'])
parser.add_argument('--confThreshold', default=0.5, type=float, help='class confidence')
parser.add_argument('--nmsThreshold', default=0.5, type=float, help='nms iou thresh')
parser.add_argument('--objThreshold', default=0.5, type=float, help='object confidence')
args = parser.parse_args()
yolonet = yolov5(args.net_type, confThreshold=args.confThreshold, nmsThreshold=args.nmsThreshold, objThreshold=args.objThreshold)
vid = cv2.VideoCapture(0)
count = 0
xun = 0
while True:
_, frame = vid.read()
dets = yolonet.detect(frame)
srcimg = yolonet.postprocess(frame, dets)
# if count == 0:
# dets = yolonet.detect(frame)
# srcimg = yolonet.postprocess(frame, dets)
# count += 1
# else:
# count = (count + 1) % 10
cv2.imshow("result", srcimg)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
vid.release()
cv2.destroyAllWindows()