YOLO作为机器视觉领域的常用深度学习算法,具有检测实时性强、精度高的特点。本文将介绍如何从零开始搭建YOLO环境,实现对自制数据集的目标检测。
一、硬件环境搭建
若无GPU环境则可跳过此步骤
opencv根据选择安装(后期opencv可调用yolo的权重模型 提供图像智能化检测以及视频输出接口)
如何在linux环境下搭建GPU环境 参考章节2与章节3
Windows则登录英伟达驱动官网下载对应驱动安装
确定显卡驱动对应的CUDA版本后 登录CUDA官网下载安装即可
运行cmd,输入nvcc --version 即可查看版本号 判断是否安装成功
二、Darknet环境搭建
Darkent作为yolo的原生框架具有轻量、代码透明、易扩展的优点,适合学习和嵌入式移植使用。
$ git clone https://github.com/pjreddie/darknet.git
$ cd darknet
$ vim Makefile//darknet 目录下的 makefile 文件前几行根据实际环境更改为:
GPU=1 //若成功安装显卡驱动 则设置为1
CUDNN=1 //若成功安装cuda与cudnn 则设置为1
OPENCV=1 //若成功安装opencv 则设置为1
...
ifeq($(GPU),1) //重新设置cuda路径
COMMON+= -DGPU -I/usr/local/cuda/include/ //默认cuda安装位置
CFLAGS+= -DGPU
LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -Lcudart -lcublas -lcurand
...
$ make
安装完成后可以通过官方提供的权重模型进行测试
$ wget https://pjreddie.com/media/files/yolo.weights //模型下载指令
$ ./darknet detect cfg/yolo.cfg yolo.weights data/dog.jpg //测试指令
三、自制VOC数据集
若有问题 详情见海参图像数据及制作
1 批量修改JPG图片文件名
# -*- coding:utf8 -*-
import os
class BatchRename():
'''
批量重命名文件夹中的图片文件
'''
def __init__(self):
#我的图片文件夹路径horse
self.path = '/home/lbf/BIM-VOC/VOCdevkit/VOC2019/JPEGImages'
def rename(self):
filelist = os.listdir(self.path)
total_num = len(filelist)
i = 1 # 起始 index
n = 6 # 总共几位 例如 000001.jpg 为6位
for item in filelist:
if item.endswith('.jpg'):
n = 6 - len(str(i))
src = os.path.join(os.path.abspath(self.path), item)
dst = os.path.join(os.path.abspath(self.path), str(0)*n + str(i) + '.jpg')
try:
os.rename(src, dst)
print('converting %s to %s ...' % (src, dst))
i = i + 1
except:
continue
print('total %d to rename & converted %d jpgs' % (total_num, i))
if __name__ == '__main__':
demo = BatchRename()
demo.rename()
2 建立如下文件树 并移动图片文件到JPEGImages目录
--VOC2019 (年份可自定义)
--Annotations (XML文件)
--ImageSets (txt文件)
--Main
--JPEGImages (图片)
3 使用labelimg标注工具 标记图像目标区域
提取码abcd
4 生成TXT文件
import os
import random
trainval_percent = 0.66
train_percent = 0.95
xmlfilepath = 'Annotations'
txtsavepath = 'ImageSets\Main'
total_xml = os.listdir(xmlfilepath)
num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train=random.sample(trainval,tr)
ftrainval = open('ImageSets/Main/trainval.txt', 'w')
ftest = open('ImageSets/Main/test.txt', 'w')
ftrain = open('ImageSets/Main/train.txt', 'w')
fval = open('ImageSets/Main/val.txt', 'w')
for i in list:
name=total_xml[i][:-4]+'\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)
ftrainval.close()
ftrain.close()
fval.close()
ftest .close()
5 根据数据集具体信息修改voc_label.py
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
# 此处年份信息根据之前设定的文件夹名保持一致
sets=[('2019', 'train'), ('2019', 'val'), ('2019', 'test')]
# 此处类别信息修改为要检测的对象信息
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(year, image_id):
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
for year, image_set in sets:
if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
list_file = open('%s_%s.txt'%(year, image_set), 'w')
for image_id in image_ids:
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
convert_annotation(year, image_id)
list_file.close()
# os.system("cat 2019_train.txt 2019_val.txt 2012_train.txt 2012_val.txt > train.txt")
# os.system("cat 2019_train.txt 2019_val.txt 2019_test.txt 2012_train.txt 2012_val.txt > train.all.txt")
6 修改data文件夹中的voc.names文件与cfg文件夹中的voc.data文件。也是按自己需求修改
voc.names
person //检测目标标签名
dog
voc.data
classes= 2 //类别数
train = /home/ben/Extra/darknet/scripts/2019_train.txt //训练样本的绝对路径文件
valid = /home/ben/Extra/darknet/scripts/2019_test.txt //包含绝对路径的测试图像路径(测试mAP主要文件)
names = /home/ben/Extra/darknet/data/voc.names //上一步修改的voc.names文件 检测类别名称
backup =/home/ben/Extra/darknet/backup //指示训练后生成的权重放在哪
ps. 若后期移动数据集需要修改xml文件中的path路径 可参考以下code
# coding=utf-8
import os
import os.path
import xml.dom.minidom
import re
# 替换xml文件存放路径
file_path = "/home/ben/Extra/darknet/scripts/VOCdeckit/VOC2019/labelImg-master/NEU-DET/ANNOTATIONS"
# 替换图片保存路径
picture_path = "/home/ben/Extra/darknet/scripts/VOCdeckit/VOC2019/labelImg-master/NEU-DET/IMAGES/"
class ChangePath():
def __init__(self):
self.path = './ANNOTATIONS/'# 原xml文件路径
def chpath(self):
LINES = []
total_num = 0
filelist = os.listdir(self.path)
for item in filelist:
if item.endswith('.xml'):
with open((self.path+item),'r') as fp:
findpath = False
### 此处面向没有path信息的xml文件
### 若为修改原path路径 可将if语句后修改为
### if line.startswith(' ') and findpath == False:
### LINES.append((' '+picture_path+os.path.splitext(item)[0]+'.jpg \n'))
### findpath = True
for line in fp:
if line.startswith(' ') and findpath == False:# 找到filename行
LINES.append(line) # 保存原filename行
LINES.append((' '+picture_path+os.path.splitext(item)[0]+'.jpg \n'))# 增加一行path信息
findpath = True
else:
LINES.append(line)
fp.close()
with open((self.path+item),'w') as fp:
fp.writelines(LINES)
while(len(LINES)>0):
LINES.pop(0)
print('%s文件已更改完成!'%item)
total_num = total_num +1
fp.close()
print('共计%d个文件'%total_num)
if __name__ == '__main__':
change = ChangePath()
change.chpath()
四、网络参数修改
参数信息详情参考YOLOV3参数详解
主要修改如下信息(YOLOV3网络有三处 YOLO-TINY网络有2处)
...
[convolutional]
size=1
stride=1
pad=1
filters=30 //修改每个YOLO层之前最后一层卷积层核参数个数,
//计算公式是依旧自己数据的类别数filter=num×(classes + coords + 1)=5×(classes+4+1)
//num = 5的意义是5个坐标,论文中的tx,ty,tw,th,to
activation=linear
[yolo]
mask = 6,7,8
anchors = XXXX
classes=10 //需要识别的类别个数 比如只识别行人 则此处为1
num=9 //每个grid cell预测几个box,和anchors的数量一致
五、训练效果可视化
主要观察IOU与Loss值信息变化
可视化中间参数需要用到训练时保存的log文件(命令中的路径根据自己实际修改):
./darknet detector train pds/fish/cfg/fish.data pds/fish/cfg/yolov3-fish.cfg darknet53.conv.74 2>1 | tee visualization/train_yolov3.log
详情参考 YOLO-V3可视化训练过程中的参数,绘制loss、IOU、avg Recall等的曲线图
以下为参考code
extract_log.py
# coding=utf-8
# 该文件用来提取训练log,去除不可解析的log后使log文件格式化,生成新的log文件供可视化工具绘图
import inspect
import os
import random
import sys
def extract_log(log_file,new_log_file,key_word):
with open(log_file, 'r') as f:
with open(new_log_file, 'w') as train_log:
#f = open(log_file)
#train_log = open(new_log_file, 'w')
for line in f:
# 去除多gpu的同步log
if 'Syncing' in line:
continue
# 去除除零错误的log
if 'nan' in line:
continue
if key_word in line:
train_log.write(line)
f.close()
train_log.close()
extract_log('train_yolov3.log','train_log_loss.txt','images')
extract_log('train_yolov3.log','train_log_iou.txt','IOU')
iou绘制
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib inline
lines = 319815 #根据train_log_iou.txt的行数修改
# result = pd.read_csv('train_log_iou.txt', skiprows=[x for x in range(lines) if (x%10==0 or x%10==9) ] ,error_bad_lines=False, names=['Region Avg IOU', 'Class', 'Obj', 'No Obj', 'Avg Recall','count'])
result = pd.read_csv('train_log_iou.txt', skiprows=[x for x in range(lines) if (x%100!=99) ] ,error_bad_lines=False, names=['Region Avg IOU', 'Class', 'Obj', 'No Obj', 'Avg Recall','count'])
result.head()
result['Region Avg IOU']=result['Region Avg IOU'].str.split(': ').str.get(1)
result['Class']=result['Class'].str.split(': ').str.get(1)
result['Obj']=result['Obj'].str.split(': ').str.get(1)
result['No Obj']=result['No Obj'].str.split(': ').str.get(1)
result['Avg Recall']=result['Avg Recall'].str.split(': ').str.get(1)
result['count']=result['count'].str.split(': ').str.get(1)
result.head()
result.tail()
# print(result.head())
# print(result.tail())
# print(result.dtypes)
print(result['Region Avg IOU'])
result['Region Avg IOU']=pd.to_numeric(result['Region Avg IOU'])
result['Class']=pd.to_numeric(result['Class'])
result['Obj']=pd.to_numeric(result['Obj'])
result['No Obj']=pd.to_numeric(result['No Obj'])
result['Avg Recall']=pd.to_numeric(result['Avg Recall'])
result['count']=pd.to_numeric(result['count'])
result.dtypes
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(result['Region Avg IOU'].values,label='平均交并比值')
# ax.plot(result['Class'].values,label='Class')
# ax.plot(result['Obj'].values,label='Obj')
# ax.plot(result['No Obj'].values,label='No Obj')
# ax.plot(result['Avg Recall'].values,label='Avg Recall')
# ax.plot(result['count'].values,label='count')
ax.legend(loc='best')
ax.set_title('平均交并比曲线')
ax.set_xlabel('批次')
ax.set_ylabel('交并比')
# fig.savefig('Avg IOU')
fig.savefig('Region Avg IOU', dpi=256)
loss绘制
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
#%matplotlib inline
lines =20000 #改为自己生成的train_log_loss.txt中的行数
result = pd.read_csv('train_log_loss.txt', skiprows=[x for x in range(lines) if ((x%10 != 9) |(x<1000))] ,error_bad_lines=False, names=['loss', 'avg', 'rate', 'seconds', 'images'])
#result = pd.read_csv('train_log_loss.txt', skiprows=[x for x in range(lines) if ((x%10 != 9 and x%10 != 5) or x<1000)] ,error_bad_lines=False, names=['loss', 'avg', 'rate', 'seconds', 'images'])
result.head()
result['loss']=result['loss'].str.split(' ').str.get(1)
result['avg']=result['avg'].str.split(' ').str.get(1)
result['rate']=result['rate'].str.split(' ').str.get(1)
result['seconds']=result['seconds'].str.split(' ').str.get(1)
result['images']=result['images'].str.split(' ').str.get(1)
result.head()
result.tail()
# print(result.head())
# print(result.tail())
# print(result.dtypes)
print(result['loss'])
print(result['avg'])
print(result['rate'])
print(result['seconds'])
print(result['images'])
result['loss']=pd.to_numeric(result['loss'])
result['avg']=pd.to_numeric(result['avg'])
result['rate']=pd.to_numeric(result['rate'])
result['seconds']=pd.to_numeric(result['seconds'])
result['images']=pd.to_numeric(result['images'])
result.dtypes
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
ax.plot(result['avg'].values,label='平均损失值')
# ax.plot(result['loss'].values,label='loss')
ax.legend(loc='best') #图列自适应位置
ax.set_title('损失值曲线')
ax.set_xlabel('批次')
ax.set_ylabel('损失值')
fig.savefig('avg_loss', dpi=256)
# fig.savefig('loss')
六、其他
神经网络部分知识点
学习率设定技巧
anchor box
kmeans使用方法:修改k-means-yolo.py中的cluster数量,比较IOU变化率,选择拐点处k值即可
k-means-yolo.py
import glob
import os
import sys
import xml.etree.ElementTree as ET
import numpy as np
from kmeans import kmeans, avg_iou
# 根文件夹
ROOT_PATH = '/root/darknet/scripts/VOCdevkit/VOC2007/'
# 聚类的数目 通过不断调整此处值 比较IOU变化率 选择拐点处值作为k值即可
CLUSTERS = 6
# 模型中图像的输入尺寸,默认是一样的
SIZE = 640
# 加载YOLO格式的标注数据
def load_dataset(path):
jpegimages = os.path.join(path, 'JPEGImages')
if not os.path.exists(jpegimages):
print('no JPEGImages folders, program abort')
sys.exit(0)
labels_txt = os.path.join(path, 'labels')
if not os.path.exists(labels_txt):
print('no labels folders, program abort')
sys.exit(0)
label_file = os.listdir(labels_txt)
print('label count: {}'.format(len(label_file)))
dataset = []
for label in label_file:
with open(os.path.join(labels_txt, label), 'r') as f:
txt_content = f.readlines()
for line in txt_content:
line_split = line.split(' ')
roi_with = float(line_split[len(line_split)-2])
roi_height = float(line_split[len(line_split)-1])
if roi_with == 0 or roi_height == 0:
continue
dataset.append([roi_with, roi_height])
# print([roi_with, roi_height])
return np.array(dataset)
data = load_dataset(ROOT_PATH)
out = kmeans(data, k=CLUSTERS)
print(out)
print("Accuracy: {:.2f}%".format(avg_iou(data, out) * 100))
print("Boxes:\n {}-{}".format(out[:, 0] * SIZE, out[:, 1] * SIZE))
ratios = np.around(out[:, 0] / out[:, 1], decimals=2).tolist()
print("Ratios:\n {}".format(sorted(ratios)))
k-means.py
import numpy as np
def iou(box, clusters):
"""
Calculates the Intersection over Union (IoU) between a box and k clusters.
:param box: tuple or array, shifted to the origin (i. e. width and height)
:param clusters: numpy array of shape (k, 2) where k is the number of clusters
:return: numpy array of shape (k, 0) where k is the number of clusters
"""
x = np.minimum(clusters[:, 0], box[0])
y = np.minimum(clusters[:, 1], box[1])
if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
raise ValueError("Box has no area")
intersection = x * y
box_area = box[0] * box[1]
cluster_area = clusters[:, 0] * clusters[:, 1]
iou_ = intersection / (box_area + cluster_area - intersection)
return iou_
def avg_iou(boxes, clusters):
"""
Calculates the average Intersection over Union (IoU) between a numpy array of boxes and k clusters.
:param boxes: numpy array of shape (r, 2), where r is the number of rows
:param clusters: numpy array of shape (k, 2) where k is the number of clusters
:return: average IoU as a single float
"""
return np.mean([np.max(iou(boxes[i], clusters)) for i in range(boxes.shape[0])])
def translate_boxes(boxes):
"""
Translates all the boxes to the origin.
:param boxes: numpy array of shape (r, 4)
:return: numpy array of shape (r, 2)
"""
new_boxes = boxes.copy()
for row in range(new_boxes.shape[0]):
new_boxes[row][2] = np.abs(new_boxes[row][2] - new_boxes[row][0])
new_boxes[row][3] = np.abs(new_boxes[row][3] - new_boxes[row][1])
return np.delete(new_boxes, [0, 1], axis=1)
def kmeans(boxes, k, dist=np.median):
"""
Calculates k-means clustering with the Intersection over Union (IoU) metric.
:param boxes: numpy array of shape (r, 2), where r is the number of rows
:param k: number of clusters
:param dist: distance function
:return: numpy array of shape (k, 2)
"""
rows = boxes.shape[0]
distances = np.empty((rows, k))
last_clusters = np.zeros((rows,))
np.random.seed()
# the Forgy method will fail if the whole array contains the same rows
clusters = boxes[np.random.choice(rows, k, replace=False)]
while True:
for row in range(rows):
distances[row] = 1 - iou(boxes[row], clusters)
nearest_clusters = np.argmin(distances, axis=1)
if (last_clusters == nearest_clusters).all():
break
for cluster in range(k):
clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)
last_clusters = nearest_clusters
return clusters