本文使用的yolov3权重文件为作者利用自己小数据集自己训练的模型,可识别垃圾桶、落地花坛、电动栅栏、木箱,共四类目标。数据集图片太少,模型性能较差,仅能初步满足检测需求,且采用opencv调用摄像头的方法延迟高,实时性差,有待后续优化。
学习路长啊,一点一点摸索吧,小白开始耍了,哈哈哈!
配置好环境后,即可直接运行realtime_detection.py
,默认直接调用摄像头,开始检测,摄像头画面显示如下图所示。
将capture = cv2.VideoCapture(0)
注释掉,在该行代码前添加 # 即可,修改成如下所示。
# 读入待检测的图像
# 0是代表摄像头编号,只有一个的话默认为0
# capture = cv2.VideoCapture(0)
将capture = cv2.VideoCapture('./test_video/0031.mp4')
解注释,删除# ,注意#后面还有个空格也需删除。
将地址./test_video/0031.mp4替换为需要检测的视频地址即可。
修改成如下所示。
# 读取录制好的视频
capture = cv2.VideoCapture('./test_video/0031.mp4')
注释掉以下两句代码
capture = cv2.VideoCapture(0)
capture = cv2.VideoCapture('./test_video/0031.mp4')
解注释以下两行代码,即可使用IP摄像头
ip_camera_url = 'http://admin:[email protected]:8097'
capture = cv2.VideoCapture(ip_camera_url)
使用opencv调用IP摄像头的具体方法参考链接:
https://blog.csdn.net/urnotY/article/details/108454247
detection_save.py
文件仅增加了将检测结果保存下来的功能,其他功能与realtime_detection.py
相同,根据是否需要保存检测结果的视频文件自行选择。文件保存在output文件夹下。
realtime_detection.py
完整代码如下:
import numpy as np
import cv2
import os
import time
def video_demo():
# 加载已经训练好的模型路径,可以是绝对路径或者相对路径
weightsPath = "./yolov3/object_18900.weights"
configPath = "./yolov3/object.cfg"
labelsPath = "./yolov3/object.names"
# 加载官方模型路径
# weightsPath = "./yolov3/yolov3.weights"
# configPath = "./yolov3/yolov3.cfg"
# labelsPath = "./yolov3/coco.names"
# 初始化一些参数
LABELS = open(labelsPath).read().strip().split("\n") # 物体类别
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") # 颜色
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
# 读入待检测的图像
# 0是代表摄像头编号,只有一个的话默认为0
capture = cv2.VideoCapture(0)
# 读取录制好的视频
# capture = cv2.VideoCapture('./test_video/0031.mp4')
# 使用 ip camera app 调用视频
# 制定URL http://用户名:密码@IP地址:端口号/
# ip_camera_url = 'http://admin:[email protected]:8081'
# 创建一个VideoCapture
# capture = cv2.VideoCapture(ip_camera_url)
while (True):
boxes = []
confidences = []
classIDs = []
ref, image = capture.read()
(H, W) = image.shape[:2]
# 得到 YOLO需要的输出层
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# 从输入图像构造一个blob,然后通过加载的模型,给我们提供边界框和相关概率
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
net.setInput(blob)
layerOutputs = net.forward(ln)
# 在每层输出上循环
for output in layerOutputs:
# 对每个检测进行循环
for detection in output:
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
# 过滤掉那些置信度较小的检测结果
if confidence > 0.5:
# 框后接框的宽度和高度
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
# 边框的左上角
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# 更新检测出来的框
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
# 极大值抑制
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
if len(idxs) > 0:
for i in idxs.flatten():
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# 在原图上绘制边框和类别
color = [int(c) for c in COLORS[classIDs[i]]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
cv2.imshow("Image", image)
# 等待30ms显示图像,若过程中按“ESC”退出
c = cv2.waitKey(30) & 0xff
if c == 27:
capture.release()
break
video_demo()
detection_save.py
完整代码如下:
import numpy as np
import cv2
import os
import time
def video_demo():
# 加载已经训练好的模型路径,可以是绝对路径或者相对路径
weightsPath = "./yolov3/object_18900.weights"
configPath = "./yolov3/object.cfg"
labelsPath = "./yolov3/object.names"
# 加载官方模型路径
# weightsPath = "./yolov3/yolov3.weights"
# configPath = "./yolov3/yolov3.cfg"
# labelsPath = "./yolov3/coco.names"
# 初始化一些参数
LABELS = open(labelsPath).read().strip().split("\n") # 物体类别
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype="uint8") # 颜色
net = cv2.dnn.readNetFromDarknet(configPath, weightsPath)
# 读入待检测的图像
# 0是代表摄像头编号,只有一个的话默认为0
capture = cv2.VideoCapture(0)
# # 读取录制好的视频
# capture = cv2.VideoCapture('./test_video/0031.mp4')
# 使用 ip camera app 调用视频
# 制定URL http://用户名:密码@IP地址:端口号/
# ip_camera_url = 'http://admin:[email protected]:8097'
# # 创建一个VideoCapture
# capture = cv2.VideoCapture(ip_camera_url)
# 设置保存视频帧率
fps = 30
# 获取窗口大小
size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
video_time = time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())
videowrite = cv2.VideoWriter('output/MySaveVideo-' + video_time + '.avi', cv2.VideoWriter_fourcc('I', '4', '2', '0'), 30, size)
while (True):
boxes = []
confidences = []
classIDs = []
ref, image = capture.read()
(H, W) = image.shape[:2]
# 得到 YOLO需要的输出层
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]
# 从输入图像构造一个blob,然后通过加载的模型,给我们提供边界框和相关概率
blob = cv2.dnn.blobFromImage(image, 1 / 255.0, (416, 416), swapRB=True, crop=False)
net.setInput(blob)
layerOutputs = net.forward(ln)
# 在每层输出上循环
for output in layerOutputs:
# 对每个检测进行循环
for detection in output:
scores = detection[5:]
classID = np.argmax(scores)
confidence = scores[classID]
# 过滤掉那些置信度较小的检测结果
if confidence > 0.5:
# 框后接框的宽度和高度
box = detection[0:4] * np.array([W, H, W, H])
(centerX, centerY, width, height) = box.astype("int")
# 边框的左上角
x = int(centerX - (width / 2))
y = int(centerY - (height / 2))
# 更新检测出来的框
boxes.append([x, y, int(width), int(height)])
confidences.append(float(confidence))
classIDs.append(classID)
# 极大值抑制
idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.2, 0.3)
if len(idxs) > 0:
for i in idxs.flatten():
(x, y) = (boxes[i][0], boxes[i][1])
(w, h) = (boxes[i][2], boxes[i][3])
# 在原图上绘制边框和类别
color = [int(c) for c in COLORS[classIDs[i]]]
cv2.rectangle(image, (x, y), (x + w, y + h), color, 2)
text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
videowrite.write(image)
cv2.imshow("Image", image)
# 等待30ms显示图像,若过程中按“ESC”退出
c = cv2.waitKey(30) & 0xff
if c == 27:
capture.release()
break
video_demo()
注:本笔记仅为记录调试学习过程,防止遗忘,部分代码参考网络资源。如有侵权,立即删除!