YOLO-V3实时检测实现(opencv+python实现)_Keep_Trying_Go的博客-CSDN博客_opencv yolov3
import os
import cv2
import numpy as np
import matplotlib.pyplot as plt
import time
# 读取网络配置文件和权重文件
net = cv2.dnn.readNet(model='./weights/yolov3-tiny.weights',
config='./config/yolov3-tiny.cfg')
# 由yolo-v3的结构可知,最终有三个尺度的输出
layerName = net.getLayerNames()
# 存储输出的三个尺度名称,用于后面进行前向推断的
ThreeOutput_layers_name = []
for i in net.getUnconnectedOutLayers():
ThreeOutput_layers_name.append(layerName[i - 1])
# 因为yolo-v3中检测包含80个类别,所以首先获取类别
with open('./data/coco.names', 'r') as fp:
classes = fp.read().splitlines()
# 指定过滤的置信度阈值:confidence
Confidence_thresh = 0.2
# 指定非极大值抑制的值:对候选框进行筛选
Nms_thresh = 0.35
# 检测的过程已经图形的绘制
def Forward_Predict(frame):
# 参数情况:图像 ,归一化,缩放的大小,是否对RGB减去一个常数,R和B交换(因为R和B是反着的,所以需要交换),是否裁剪
blob = cv2.dnn.blobFromImage(frame, 1 / 255, (64, 64), (0, 0, 0), swapRB=True, crop=False)
# 获取图像的高宽
height, width, channel = frame.shape
# 设置网络输入
net.setInput(blob)
# 进行前向推断:采用的最后三个尺度输出层作为前向推断
predict = net.forward(ThreeOutput_layers_name)
# 存放预测框的坐标
boxes = []
# 存在预测物体的置信度
confid_object = []
# 存放预测的类别
class_prob = []
# 存放预测物体的id
class_id = []
# 存放预测类别的名称
class_names = []
# 根据输出的是三个尺度,所以分别遍历三个尺度
for scale in predict:
for box in scale:
# 获取坐标值和高宽
# 首先获取矩形中心坐标值(这里需要映射回原图)
center_x = int(box[0] * width)
center_y = int(box[1] * height)
# 计算框的高宽
w = int(box[2] * width)
h = int(box[3] * height)
# 获取矩形框的左上角坐标
left_x = int(center_x - w / 2)
left_y = int(center_y - h / 2)
boxes.append([left_x, left_y, w, h])
# 获取检测物体的置信度
confid_object.append(float(box[4]))
# 获取概率最大值
# 首先获取最高值概率的下标
index = np.argmax(box[5:])
class_id.append(index)
class_names.append(classes[index])
class_prob.append(box[index])
confidences = np.array(class_prob) * np.array(confid_object)
# 计算非极大值抑制
all_index = cv2.dnn.NMSBoxes(boxes, confidences, Confidence_thresh, Nms_thresh)
try:
for i in all_index.flatten():
x, y, w, h = boxes[i]
# 四舍五入,保留2位小数
confidence = str(round(confidences[i], 2))
# 绘制矩形框
cv2.rectangle(img=frame, pt1=(x, y), pt2=(x + w, y + h),
color=(0, 255, 0), thickness=2)
text = class_names[i] + ' ' + confidence
cv2.putText(img=frame, text=text, org=(x, y - 10),
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=1.0, color=(0, 0, 255), thickness=2)
except IOError:
pass
return frame
# 实时的检测
def detect_time():
cap = cv2.VideoCapture(0)
while cap.isOpened():
OK, frame = cap.read()
if not OK:
break
# 将图片进行一下翻转,因为Opencv读取的图片和我们正常是反着的
frame = cv2.flip(src=frame, flipCode=2)
frame = cv2.resize(src=frame, dsize=(64, 64))
try:
t1 = time.time()
dst = Forward_Predict(frame)
cv2.namedWindow("detect", cv2.WINDOW_NORMAL)
cv2.imshow('detect', dst)
t2 = time.time()
print(f'Done. ({(1E3 * (t2 - t1)):.1f}ms) Inference')
except :
cv2.imshow('detect', frame)
key = cv2.waitKey(1)
if key == 27:
break
cap.release()
# 单张图片的检测
def signa_Picture(image_path='images/smile.jpg'):
img = cv2.imread(image_path)
img = cv2.resize(src=img, dsize=(416, 416))
dst = Forward_Predict(img)
cv2.imshow('detect', dst)
key = cv2.waitKey(0)
if key == 27:
exit()
cv2.destroyAllWindows()
if __name__ == '__main__':
print('Pycharm')
# signa_Picture()
detect_time()