(工程1WiderPerson)首先下载行人数据集:这里我使用了WiderPerson数据集,下载数据集。
把数据集整理出图片和xml文件:把下面的py文件运行3遍,第一遍运行train.txt文件,第二遍运行val.txt文件,并且注释掉make_voc_dir函数的调用,自己可以根据自己的场景需要进行数据集的筛选,第三遍运行test.txt文件,并注释掉with open(label_path) as file和with open(xml_path, ‘wb’) as f里面的内容。
import os
import numpy as np
import scipy.io as sio
import shutil
from lxml.etree import Element, SubElement, tostring
from xml.dom.minidom import parseString
import cv2
def make_voc_dir():
# labels 目录若不存在,创建labels目录。若存在,则清空目录
if not os.path.exists('../VOC2007/Annotations'):
os.makedirs('../VOC2007/Annotations')
if not os.path.exists('../VOC2007/ImageSets'):
os.makedirs('../VOC2007/ImageSets')
os.makedirs('../VOC2007/ImageSets/Main')
if not os.path.exists('../VOC2007/JPEGImages'):
os.makedirs('../VOC2007/JPEGImages')
if __name__ == '__main__':
# < class_label =1: pedestrians > 行人
# < class_label =2: riders > 骑车的
# < class_label =3: partially-visible persons > 遮挡的部分行人
# < class_label =4: ignore regions > 一些假人,比如图画上的人
# < class_label =5: crowd > 拥挤人群,直接大框覆盖了
classes = {'1': 'pedestrians',
'2': 'riders',
'3': 'partially',
'4':'ignore',
'5':'crowd'
}#这里如果自己只要人,可以把1-5全标记为people,也可以根据自己场景需要筛选
VOCRoot = '../VOC2007'
widerDir = 'C:/Users/邓卓/Desktop/WiderPerson' # 数据集所在的路径
wider_path = 'C:/Users/邓卓/Desktop/WiderPerson/train.txt'#这里第一次train,第二次test
#这个函数第一次用注释掉,后面就要加注释了
#make_voc_dir()
with open(wider_path, 'r') as f:
imgIds = [x for x in f.read().splitlines()]
for imgId in imgIds:
objCount = 0 # 一个标志位,用来判断该img是否包含我们需要的标注
filename = imgId + '.jpg'
img_path = '../WiderPerson/images/' + filename
print('Img :%s' % img_path)
img = cv2.imread(img_path)
width = img.shape[1] # 获取图片尺寸
height = img.shape[0] # 获取图片尺寸 360
node_root = Element('annotation')
node_folder = SubElement(node_root, 'folder')
node_folder.text = 'JPEGImages'
node_filename = SubElement(node_root, 'filename')
node_filename.text = 'VOC2007/JPEGImages/%s' % filename
node_size = SubElement(node_root, 'size')
node_width = SubElement(node_size, 'width')
node_width.text = '%s' % width
node_height = SubElement(node_size, 'height')
node_height.text = '%s' % height
node_depth = SubElement(node_size, 'depth')
node_depth.text = '3'
label_path = img_path.replace('images', 'Annotations') + '.txt'
with open(label_path) as file:
line = file.readline()
count = int(line.split('\n')[0]) # 里面行人个数
line = file.readline()
while line:
cls_id = line.split(' ')[0]
xmin = int(line.split(' ')[1]) + 1
ymin = int(line.split(' ')[2]) + 1
xmax = int(line.split(' ')[3]) + 1
ymax = int(line.split(' ')[4].split('\n')[0]) + 1
line = file.readline()
cls_name = classes[cls_id]
obj_width = xmax - xmin
obj_height = ymax - ymin
difficult = 0
if obj_height <= 6 or obj_width <= 6:
difficult = 1
node_object = SubElement(node_root, 'object')
node_name = SubElement(node_object, 'name')
node_name.text = cls_name
node_difficult = SubElement(node_object, 'difficult')
node_difficult.text = '%s' % difficult
node_bndbox = SubElement(node_object, 'bndbox')
node_xmin = SubElement(node_bndbox, 'xmin')
node_xmin.text = '%s' % xmin
node_ymin = SubElement(node_bndbox, 'ymin')
node_ymin.text = '%s' % ymin
node_xmax = SubElement(node_bndbox, 'xmax')
node_xmax.text = '%s' % xmax
node_ymax = SubElement(node_bndbox, 'ymax')
node_ymax.text = '%s' % ymax
node_name = SubElement(node_object, 'pose')
node_name.text = 'Unspecified'
node_name = SubElement(node_object, 'truncated')
node_name.text = '0'
image_path = VOCRoot + '/JPEGImages/' + filename
xml = tostring(node_root, pretty_print=True) # 'annotation'
dom = parseString(xml)
xml_name = filename.replace('.jpg', '.xml')
xml_path = VOCRoot + '/Annotations/' + xml_name
with open(xml_path, 'wb') as f:
f.write(xml)
# widerDir = '../WiderPerson' # 数据集所在的路径
shutil.copy(img_path, '../VOC2007/JPEGImages/' + filename)
运行3遍之后会在自己的同级目录生成VOC2007文件夹,里面就包括train,val和test的图像和以及train和val的xml文件。
(工程2yolov5)下载yolov5官方文件,在工程下创建一个people_data文件夹(名字可以自定义),将VOC2007文件夹里面的三个文件复制粘贴进去(这里有个坑:要把JPEGImages文件名改为images,后面有关的做相应改变)。划分数据集,创建split_train_val.py文件,更改自己的xml和txt文件夹目录。
import random
import os
import argparse
# annotations_path and save_txt_path
def get_opt():
parser = argparse.ArgumentParser()
parser.add_argument('--xml_path', default='C:/Users/邓卓/Desktop/yolov5-master/people_data/Annotations/',
type=str, help='input xml file ')
parser.add_argument('--txt_path', default="C:/Users/邓卓/Desktop/yolov5-master/people_data/ImageSets/Main/",
type=str, help='output txt file')
opt = parser.parse_args()
return opt
opt = get_opt()
# xml_path
xml_file = opt.xml_path
# save_txt_path
save_txt_file = opt.txt_path
# 若save_txt_path不存在,则手动创建
if not os.path.exists(save_txt_file):
os.makedirs(save_txt_file)
# 迭代xml_path路径下所有的文件返回包含该目录下所有文件的list(无序)
total_xml = os.listdir(xml_file)
# 获取包含所有数据list的长度
num = len(total_xml)
# list的范围,后续用于迭代向txt文件中写入数据(image)
list_index = range(num)
# 采集的数据集中训练数据和验证数据的总占比
train_val_percent = 1
# 训练数据的占比
train_percent = 0.99
# 采集的数据集中训练数据和验证数据的数量
tv = int(num * train_val_percent)
# 训练数据的数量,int()向下取整
tr = int(tv * train_percent)
# 从总数据中随机抽取训练集和验证集数据
train_val = random.sample(list_index, tv)
# 从训练集和验证集中随机抽取训练集数据
train = random.sample(train_val, tr)
# 创建train_val.txt,train.txt,test.txt,val.txt
file_train_vale = open(save_txt_file + 'train_val.txt', 'w')
file_train = open(save_txt_file + "train.txt", 'w')
file_test = open(save_txt_file + "test.txt", 'w')
file_val = open(save_txt_file + "val.txt", 'w')
# train_val.txt将训练集和验证集数据写入
# train.txt将训练集数据写入
# test.txt将测试集数据写入
# val.txt将验证集数据写入
for i in list_index:
# [:-4]将图片格式去掉,比如.jpg
data_name = total_xml[i][:-4] + '\n'
# 若该index存在于train_val中,则写入
if i in train_val:
file_train_vale.write(data_name)
if i in train:
file_train.write(data_name)
else:
file_val.write(data_name)
else:
file_test.write(data_name)
# 文件流关闭
file_train_vale.close()
file_train.close()
file_test.close()
file_val.close()
在此目录环境下运行文件
python split_train_val.py
创建voc_label.py文件,将文件生成label标签并生成路径文件txt
# -*- coding: utf-8 -*-
import xml.etree.ElementTree as ET
import os
from os import getcwd
sets = ['train', 'val', 'test']
classes = ["pedestrians", "riders",'partially','ignore','crowd'] # 改成自己的类别
abs_path = os.getcwd()
print(abs_path)
def convert(size, box):
dw = 1. / (size[0])
dh = 1. / (size[1])
x = (box[0] + box[1]) / 2.0 - 1
y = (box[2] + box[3]) / 2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x * dw
w = w * dw
y = y * dh
h = h * dh
return x, y, w, h
def convert_annotation(image_id):
in_file = open('C:/Users/邓卓/Desktop/yolov5-master/people_data/Annotations/%s.xml' % (image_id), encoding='UTF-8')
out_file = open('C:/Users/邓卓/Desktop/yolov5-master/people_data/labels/%s.txt' % (image_id), 'w')
tree = ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
# difficult = obj.find('difficult').text
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text),
float(xmlbox.find('ymax').text))
b1, b2, b3, b4 = b
# 标注越界修正
if b2 > w:
b2 = w
if b4 > h:
b4 = h
b = (b1, b2, b3, b4)
bb = convert((w, h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
for image_set in sets:
if not os.path.exists('C:/Users/邓卓/Desktop/yolov5-master/people_data/labels/'):
os.makedirs('C:/Users/邓卓/Desktop/yolov5-master/people_data/labels/')
image_ids = open('C:/Users/邓卓/Desktop/yolov5-master/people_data/ImageSets/Main/%s.txt' % (image_set)).read().strip().split()
list_file = open('people_data/%s.txt' % (image_set), 'w')
for image_id in image_ids:
list_file.write( 'C:/Users/邓卓/Desktop/yolov5-master/people_data/JPEGImages/%s.jpg\n' % (image_id))
convert_annotation(image_id)
list_file.close()
配置文件:在工程的data文件夹下面创建一个yaml文件,并如下写,冒号后面空一格。
kmeans找出9个最好的anchors
# -*- coding: utf-8 -*-
# 根据标签文件求先验框
import os
import numpy as np
import xml.etree.cElementTree as et
#from kmeans import kmeans, avg_iou
#import numpy as np
def iou(box, clusters):
"""
Calculates the Intersection over Union (IoU) between a box and k clusters.
:param box: tuple or array, shifted to the origin (i. e. width and height)
:param clusters: numpy array of shape (k, 2) where k is the number of clusters
:return: numpy array of shape (k, 0) where k is the number of clusters
"""
x = np.minimum(clusters[:, 0], box[0])
y = np.minimum(clusters[:, 1], box[1])
if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
#raise ValueError("Box has no area") # 如果报这个错,可以把这行改成pass即可
pass
intersection = x * y
box_area = box[0] * box[1]
cluster_area = clusters[:, 0] * clusters[:, 1]
iou_ = intersection / (box_area + cluster_area - intersection)
return iou_
def avg_iou(boxes, clusters):
"""
Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
:param boxes: numpy array of shape (r, 2), where r is the number of rows
:param clusters: numpy array of shape (k, 2) where k is the number of clusters
:return: average IoU as a single float
"""
return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])
def translate_boxes(boxes):
"""
Translates all the boxes to the origin.
:param boxes: numpy array of shape (r, 4)
:return: numpy array of shape (r, 2)
"""
new_boxes = boxes.copy()
for row in range(new_boxes.shape[0]):
new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
return np.delete(new_boxes, [0, 1], axis=1)
def kmeans(boxes, k, dist=np.median):
"""
Calculates k-means clustering with the Intersection over Union (IoU) metric.
:param boxes: numpy array of shape (r, 2), where r is the number of rows
:param k: number of clusters
:param dist: distance function
:return: numpy array of shape (k, 2)
"""
rows = boxes.shape[0]
distances = np.empty((rows, k))
last_clusters = np.zeros((rows,))
np.random.seed()
# the Forgy method will fail if the whole array contains the same rows
clusters = boxes[np.random.choice(rows, k, replace=False)]
while True:
for row in range(rows):
distances[row] = 1 - iou(boxes[row], clusters)
nearest_clusters = np.argmin(distances, axis=1)
if (last_clusters == nearest_clusters).all():
break
for cluster in range(k):
clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)
last_clusters = nearest_clusters
return clusters
#if __name__ == '__main__':
a = np.array([[1, 2, 3, 4], [5, 7, 6, 8]])
print(translate_boxes(a))
FILE_ROOT = "C:/Users/邓卓/Desktop/yolov5-master/people_data/" # 根路径
ANNOTATION_ROOT = "Annotations" # 数据集标签文件夹路径
ANNOTATION_PATH = FILE_ROOT + ANNOTATION_ROOT
ANCHORS_TXT_PATH = "C:/Users/邓卓/Desktop/yolov5-master/data/anchors.txt"
CLUSTERS = 9#kmeans方法分9类
CLASS_NAMES = ["pedestrians", "riders",'partially','ignore','crowd']
def load_data(anno_dir, class_names):
xml_names = os.listdir(anno_dir)
boxes = []
for xml_name in xml_names:
xml_pth = os.path.join(anno_dir, xml_name)
tree = et.parse(xml_pth)
width = float(tree.findtext("./size/width"))
height = float(tree.findtext("./size/height"))
for obj in tree.findall("./object"):
cls_name = obj.findtext("name")
if cls_name in class_names:
xmin = float(obj.findtext("bndbox/xmin")) / width
ymin = float(obj.findtext("bndbox/ymin")) / height
xmax = float(obj.findtext("bndbox/xmax")) / width
ymax = float(obj.findtext("bndbox/ymax")) / height
box = [xmax - xmin, ymax - ymin]
boxes.append(box)
else:
continue
return np.array(boxes)
if __name__ == '__main__':
anchors_txt = open(ANCHORS_TXT_PATH, "w")
train_boxes = load_data(ANNOTATION_PATH, CLASS_NAMES)
count = 1
best_accuracy = 0
best_anchors = []
best_ratios = []
for i in range(10):
print(i) ##### 可以修改,不要太大,否则时间很长
anchors_tmp = []
clusters = kmeans(train_boxes, k=CLUSTERS)
idx = clusters[:, 0].argsort()
clusters = clusters[idx]
# print(clusters)
for j in range(CLUSTERS):
anchor = [round(clusters[j][0] * 640, 2), round(clusters[j][1] * 640, 2)]
anchors_tmp.append(anchor)
print(f"Anchors:{anchor}")
temp_accuracy = avg_iou(train_boxes, clusters) * 100
print("Train_Accuracy:{:.2f}%".format(temp_accuracy))
ratios = np.around(clusters[:, 0] / clusters[:, 1], decimals=2).tolist()
ratios.sort()
print("Ratios:{}".format(ratios))
print(20 * "*" + " {} ".format(count) + 20 * "*")
count += 1
if temp_accuracy > best_accuracy:
best_accuracy = temp_accuracy
best_anchors = anchors_tmp
best_ratios = ratios
anchors_txt.write("Best Accuracy = " + str(round(best_accuracy, 2)) + '%' + "\r\n")
anchors_txt.write("Best Anchors = " + str(best_anchors) + "\r\n")
anchors_txt.write("Best Ratios = " + str(best_ratios))
anchors_txt.close()
在工程的data文件夹下选择yaml文件,修改nc和anchors即可
在yolo官网下载对应模型的pt文件,在工程下新建weights文件夹,将pt文件放入,修改train.py文件
开始训练
python train.py --device '0'
这里我出现问题了,路径不能有中文,所以我需要更改用户,具体可以参考博文https://blog.csdn.net/weixin_43267344/article/details/109582664。
训练生成pt文件(在runs/train/exp),我是直接使用摄像头来进行检测,这里我用了网络摄像头easyn,具体设置可以参考博文。
使用摄像头时修改1:修改dataset.py文件如下图
2.修改detect文件里的参数,主要是一下以下两个。
1.Unable to find a valid cuDNN algorithm to run convolution
调小batchsize
2.视频测试时出现灰屏,模型改小就没问题了。
3.train特别慢:原因是:yolo5源码默认开启CPU多线程加载图片,所以很慢,需要在源码中修改,–workers = 0
4.detect时视频卡顿的优化方法:
修改datasets里面的workers和num_threads使得线程数变大;修改detect里面的strides使得帧率稍稍变小。
[1]https://blog.csdn.net/qq_36756866/article/details/109111065
[2]https://blog.csdn.net/qq_42495740/article/details/118577143