如果答案是“想”,那么请往下看
前文讲过,如何使用PaddleDetection训练自己的数据集
解决思路:将视频按帧抽取存为图片,输入自己训练好的目标检测模型进行预测,将得到的物体预测框画在图片上,保存图片,最后将图片合成视频。
本文主要参考了博客https://blog.csdn.net/yzl819819/article/details/104743106
以及视频人脸检测项目https://aistudio.baidu.com/aistudio/projectdetail/757390
1.首先导出训练好的模型及训练参数,使用PaddleDetection的export_model.py
# 导出模型
!python PaddleDetection/tools/export_model.py -c PaddleDetection/configs/yolov4/yolov4_cspdarknet_voc.yml -o weights=output/yolov4_cspdarknet_voc/best_model.pdparams
得到三个文件,如图
2.将PaddleDetection中的infer.py封装为一个类inference(),用于预测单张图片
import numpy as np
import time
import cv2
import paddle.fluid as fluid
from PIL import Image
from PIL import ImageDraw
# 画图展示目标物体边界框
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.image import imread
import math
# # 定义画矩形框的程序
# def draw_rectangle(currentAxis,bbox, edgecolor = 'red', facecolor = 'yellow', fill=False, linestyle='-'):
# # currentAxis,坐标轴,通过plt.gca()获取
# # bbox,边界框,包含四个数值的list, [x1, y1, x2, y2]
# # edgecolor,边框线条颜色
# # facecolor,填充颜色
# # fill, 是否填充
# # linestype,边框线型
# # patches.Rectangle需要传入左上角坐标、矩形区域的宽度、高度等参数
# rect=patches.Rectangle((bbox[0], bbox[1]), bbox[2]-bbox[0]+1, bbox[3]-bbox[1]+1, linewidth=2,
# edgecolor=edgecolor,facecolor=facecolor,fill=fill, linestyle=linestyle)
# currentAxis.add_patch(rect)
train_parameters = {
"label_dict": {0:"HS",1:"DS",2:"B",3:"SMac",4:"TelP",5:"HSN",6:"DSN",7:"Number of TelP",8:"Unclear"},
"use_gpu": True,
"anchors": [[12, 16], [19, 36], [40, 28], [36, 75], [76, 55],[72, 146], [142, 110], [192, 243], [459, 401]],
"anchor_mask": [[0, 1, 2], [3, 4, 5], [6, 7, 8]],
"input_size": [3, 608, 608], # 原版的边长大小为608,为了提高训练速度和预测速度,此处压缩为448
}
target_size = train_parameters['input_size']
anchors = train_parameters['anchors']
anchor_mask = train_parameters['anchor_mask']
label_dict = train_parameters['label_dict']
print(label_dict[1])
place = fluid.CUDAPlace(0) if train_parameters['use_gpu'] else fluid.CPUPlace()
exe = fluid.Executor(place)
path="output/yolov4_cspdarknet_voc"##存放步骤1导出模型的路径
[inference_program, feed_target_names, fetch_targets] = fluid.io.load_inference_model(dirname=path, executor=exe,model_filename='__model__', params_filename='__params__')
class inference():
def __init__(self):
print("8888888888")
def draw_bbox_image(self, img, boxes, labels,scores, save_name):
print(save_name)
"""
给图片画上外接矩形框
:param img:
:param boxes:
:param save_name:
:param labels
:return:
"""
draw = ImageDraw.Draw(img)
# plt.figure(figsize=(10, 10))
# plt.imshow(img)
# currentAxis=plt.gca()
colors = ['red', 'green', 'blue', 'violet', 'yellow', 'darkblue', 'purple','orange','brown']
for box, label,score in zip(boxes, labels, scores):
print(box, label, score)
if(score >0.7):
xmin, ymin, xmax, ymax = box[0], box[1], box[2], box[3]
# draw_rectangle(currentAxis, box, edgecolor =colors[label])
draw.rectangle((xmin, ymin, xmax, ymax), fill=None, outline=colors[label],width=2)
draw.text((xmin, ymin), label_dict[label], colors[label])
img.save(save_name)
def resize_img(self, img, target_size):#将图片resize到target_size
"""
保持比例的缩放图片
:param img:
:param target_size:
:return:
"""
img = img.resize(target_size[1:], Image.BILINEAR)
return img
def read_image(self,img_path):
origin = Image.open(img_path)
img = self.resize_img(origin, target_size)
resized_img = img.copy()
if img.mode != 'RGB':
img = img.convert('RGB')
img = np.array(img).astype('float32').transpose((2, 0, 1)) # HWC to CHW 让矩阵进行方向的转置
img = img / 255.0
img[0, :, :] -= 0
img[1, :, :] -= 0
img[2, :, :] -= 0
img[0, :, :] /=1
img[1, :, :] /=1
img[2, :, :] /=1
img = img[np.newaxis, :]
return origin, img, resized_img
def infer(self, image_path, idx):
"""
预测,将结果保存到一副新的图片中
:param image_path:
:return:
"""
origin, tensor_img, resized_img = self.read_image(image_path)
input_w, input_h = origin.size[0], origin.size[1]
image_shape = np.array([input_h, input_w], dtype='int32')
t1 = time.time()
batch_outputs = exe.run(inference_program,
feed={feed_target_names[0]: tensor_img,
feed_target_names[1]: image_shape[np.newaxis, :]},
fetch_list=fetch_targets,
return_numpy=False)
period = time.time() - t1
print("predict cost time:{0}".format("%2.2f sec" % period))
bboxes = np.array(batch_outputs[0])
if bboxes.shape[1] != 6:
print("No object found in {}".format(image_path))
return
labels = bboxes[:, 0].astype('int32')
scores = bboxes[:, 1].astype('float32')
boxes = bboxes[:, 2:].astype('float32')
last_dot_index = image_path.rfind('.')
# idx = image_path[last_dot_index-4:last_dot_index]
out_path = 'video2img/output/' #视频转为图片的输出路径
out_path =out_path + idx
# out_path = 'video2img/output'
self.draw_bbox_image(origin, boxes, labels, scores, out_path)
# if __name__ == '__main__':
# image_path= "work/VOC2012/JPEGImages/4370.jpg"
# a=inference()
# a.infer(image_path)
3.将视频抽帧为图像,画预测框,将输出图像合称为视频
import cv2
import os
def CutVideo2Image(video_path, img_path):
#将视频输出为图像
#video_path为输入视频文件路径
#img_path为输出图像文件夹路径
cap = cv2.VideoCapture(video_path)
index = 0
while(True):
ret,frame = cap.read()
if ret:
cv2.imwrite(img_path+'/%d.jpg'%index, frame)
index += 1
else:
break
cap.release()
def GetObj(in_path, out_path):
#物体检测
#in_path为输入图像文件夹的路径
#out_path为输出图像文件夹的路径
files = os.listdir(in_path)
bbox_buffer = []
count = 0 #统计人数
a=inference()
for i in range(len(files)):
#文件中的每张图片
img_paths = os.path.join(in_path+'/%d.jpg' % i)
result = a.infer(img_paths,'/%d.jpg' % i)
def CombVideo(in_path, out_path, size):
#将图片合成视频
#in_path为输入图像文件夹路径
#out_path为输出视频文件路径
fourcc = cv2.VideoWriter_fourcc(*'mp4v')
out = cv2.VideoWriter(out_path,fourcc, 30.0, size)
files = os.listdir(in_path)
for i in range(len(files)):
img = cv2.imread(in_path + '/%d.jpg' % i)
# img = cv2.resize(img, size)
out.write(img)
out.release()
video_initial = '4.mp4' #视频路径
video_finish = 'video4_finish.mp4' #合成视频路径
images_initial = 'video2img/input' #视频抽取得到的图像输出路径
images_final = 'video2img/output' #存放画完预测框的图像路径
if __name__ == '__main__':
CutVideo2Image(video_initial, images_initial)
GetObj(images_initial, images_final)
CombVideo(images_final, video_finish, (1920,1080))
以上代码运行目录为/home/aistudio
%cd ~/