1.1.下载源码
git clone https://github.com/pjreddie/darknet
cd darknet
1.2.修改Makefile文件
更改参数:
GPU=1
CUDNN=1
OPENCV=1
NVCC=/usr/local/cuda/bin/nvcc
1.3运行测试命令
./darknet detect cfg/yolov3.cfg yolov3.weights data/dog.jpg
如果报错:
./darknet: error while loading shared libraries: libcudart.so.9.0: cannot open shared object file: No such file or directory
可以运行以下命令解决:
sudo ldconfig /usr/local/cuda/lib64
2.1准备voc训练数据集
按下列文件夹结构,将训练数据集放到各个文件夹下面,生成4个训练、测试和验证txt文件列表
VOCdevkit
+VOC2007
++Annotations
++ImageSets
+++Main
++JPEGImages
Annotations中是所有的xml文件
JPEGImages中是所有的训练图片
Main中是2个txt文件:train.txt和val.txt(里面的图片名没有后缀),用“generate-train-txt.py”生成:
#coding=utf-8
import os
import random
root_path="/media/wyq/719ffd70-9553-244f-ac13-2d8a0a86e395/数据集/训练数据集+模型/for-rubbish/yolov3/VOCdevkit/VOC2007/JPEGImages"
folderlist = os.listdir(root_path)
folderlist.sort(reverse=False)
file_num=len(folderlist)
file_list=range(file_num)
trainval_percent = 1
train_percent = 0.8
txtsavepath = 'ImageSets/Main'
ftrainval = open(txtsavepath+'/trainval.txt', 'w')
ftest = open(txtsavepath+'/test.txt', 'w')
ftrain = open(txtsavepath+'/train.txt', 'w')
fval = open(txtsavepath+'/val.txt', 'w')
for i in file_list:
xmlfilepath = 'Annotations/' + folderlist[i]
total_xml = os.listdir(xmlfilepath)
total_xml.sort(reverse=False)
num=len(total_xml)
list=range(num)
tv=int(num*trainval_percent)
tr=int(tv*train_percent)
trainval= random.sample(list,tv)
train=random.sample(trainval,tr)
for j in list:
name= folderlist[i] + '/' + total_xml[j][:-4]+'\n'
if j in trainval:
ftrainval.write(name)
if j in train:
ftrain.write(name)
else:
fval.write(name)
else:
ftest.write(name)
ftrainval.close()
ftrain.close()
fval.close()
ftest .close()
2.2修改并执行 voc_label.py
# voc_label.py
# 生成2007_train.txt 和 2007_test.txt,分别存放了训练集和测试集图片的路径。
import xml.etree.ElementTree as ET
import pickle
import os
from os import listdir, getcwd
from os.path import join
sets=[('2007', 'train'),('2007', 'val')] #[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
classes = ["pbag","butt","box","bottle","cup"] #["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
def convert(size, box):
dw = 1./(size[0])
dh = 1./(size[1])
x = (box[0] + box[1])/2.0 - 1
y = (box[2] + box[3])/2.0 - 1
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(year, image_id):
in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult)==1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
for year, image_set in sets:
if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
list_file = open('%s_%s.txt'%(year, image_set), 'w')
for image_id in image_ids:
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
convert_annotation(year, image_id)
list_file.close()
#os.system("cat 2007_train.txt 2007_val.txt > train.txt")
#os.system("cat 2007_train.txt 2007_val.txt 2007_test.txt > train.all.txt")
2.3修改配置文件
①下载预训练模型:
wget https://pjreddie.com/media/files/darknet53.conv.74
②修改 cfg/voc.data:
classes= 5
train = /media/wyq/719ffd70-9553-244f-ac13-2d8a0a86e395/数据集/训练数据集+模型/for-rubbish/yolov3/2007_train.txt
valid = /media/wyq/719ffd70-9553-244f-ac13-2d8a0a86e395/数据集/训练数据集+模型/for-rubbish/yolov3/2007_val.txt
names = data/voc.names
backup = backup
③修改data/voc.names
pbag
butt
box
bottle
cup
④修改cfg/yolov3-voc.cfg
[net]
# Testing
# batch=1
# subdivisions=1
# Training # 训练模式,每次前向的图片数目 = batch/subdivisions
batch=64 # 一批训练样本的样本数量,每batch个样本更新一次参数
subdivisions=16 # batch/subdivisions作为一次性送入训练器的样本数量
# 如果内存不够大,将batch分割为subdivisions个子batch
# 上面这两个参数如果电脑内存小,则把batch改小一点,batch越大,训练效果越好
# subdivisions越大,可以减轻显卡压力
width=608 #640 #416
height=608 #480 #416
channels=3 # 以上三个参数为输入图像的参数信息 width和height影响网络对输入图像的分辨率,
# 从而影响precision,只可以设置成32的倍数
momentum=0.9 # DeepLearning1中最优化方法中的动量参数,这个值影响着梯度下降到最优值得速度
decay=0.0005 # 权重衰减正则项,防止过拟合
angle=0 # 通过旋转角度来生成更多训练样本
saturation = 1.5 # 通过调整饱和度来生成更多训练样本
exposure = 1.5 # 通过调整曝光量来生成更多训练样本
hue=.1 # 通过调整色调来生成更多训练样本
learning_rate=0.001 # 学习率决定着权值更新的速度,设置得太大会使结果超过最优值,太小会使下降速度过慢。
# 如果仅靠人为干预调整参数,需要不断修改学习率。刚开始训练时可以将学习率设置的高一点,
# 而一定轮数之后,将其减小
# 在训练过程中,一般根据训练轮数设置动态变化的学习率。
# 刚开始训练时:学习率以 0.01 ~ 0.001 为宜。
# 一定轮数过后:逐渐减缓。
# 接近训练结束:学习速率的衰减应该在100倍以上。
# 学习率的调整参考https://blog.csdn.net/qq_33485434/article/details/80452941
burn_in=1000 # 学习率控制的参数,在迭代次数小于burn_in时,其学习率的更新有一种方式,大于burn_in时,才采用policy的更新方式
max_batches = 50200 # 迭代次数,训练达到max_batches后停止学习
policy=steps # 学习率策略,有policy:constant, steps, exp, poly, step, sig, RANDOM,constant等方式
steps=40000,45000 # 学习率变动步长,下面这两个参数steps和scale是设置学习率的变化,比如迭代到40000次时,学习率衰减十倍。
scales=.1,.1 # 学习率变动因子,45000次迭代时,学习率又会在前一个学习率的基础上衰减十倍
[convolutional]
batch_normalize=1 # BN
filters=32 # 输出特征图的数量
size=3 # 卷积核尺寸
stride=1 # 卷积核步长
pad=1 # 如果pad为0,padding由 padding参数指定
# 如果pad为1,padding大小为size/2,padding应该是对输入图像左边缘拓展的像素数量
activation=leaky # 激活函数的类型
# Downsample
... ...
[convolutional]
size=1
stride=1
pad=1
filters=30 # 修改为3*(classes+5)即3*(5+5)=30
# 每一个[region/yolo]层前的最后一个卷积层中的 filters=num(yolo层个数)*(classes+5)
# 5的意义是5个坐标,论文中的tx,ty,tw,th,to
activation=linear
# 在yoloV2中yolo层叫region层
[yolo]
mask = 6,7,8
anchors = 20, 23, 32, 50, 57, 41, 54, 80, 96, 94, 73,147, 140,162, 178,250, 298,332
# 22, 19, 36, 39, 70, 37, 57, 65, 75,113, 111, 81, 141,145, 224,186, 307,284
# 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
# anchors是可以事先通过cmd指令计算出来的,是和图片数量,width,height以及cluster(应该就是下面的num的值,
# 即想要使用的anchors的数量)相关的预选框,可以手工挑选,也可以通过k means 从训练样本中学出
classes=5 # 类别
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1 # 1,如果显存很小,将random设置为0,关闭多尺度训练
# random设置成1,可以增加检测精度precision
[route]
layers = -4
... ...
[convolutional]
size=1
stride=1
pad=1
filters=30 # 75,修改为3*(classes+5)即3*(5+5)=30
activation=linear
[yolo]
mask = 3,4,5
anchors = 20, 23, 32, 50, 57, 41, 54, 80, 96, 94, 73,147, 140,162, 178,250, 298,332
# 22, 19, 36, 39, 70, 37, 57, 65, 75,113, 111, 81, 141,145, 224,186, 307,284
# 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=5 # 类别
num=9 # 每个grid cell预测几个box,和anchors的数量一致。当想要使用更多anchors时需要调大num,
# 且如果调大num后训练时Obj趋近0的话可以尝试调大object_scale
jitter=.3 # 利用数据抖动产生更多数据,YOLOv2中使用的是crop,filp,以及net层的angle,flip是随机的,
# jitter就是crop的参数,tiny-yolo-voc.cfg中jitter=.3,就是在0~0.3中进行crop
ignore_thresh = .5 # 决定是否需要计算IOU误差的参数,大于thresh,IOU误差不会夹在cost function中
truth_thresh = 1
random=1 # 1,如果显存很小,将random设置为0,关闭多尺度训练;
# 如果为1,每次迭代图片大小随机从320到608,步长为32,如果为0,每次训练大小与输入大小一致
[route]
layers = -4
... ...
[convolutional]
size=1
stride=1
pad=1
filters=30 # 75,修改为3*(classes+5)即3*(5+5)=30
activation=linear
[yolo]
mask = 0,1,2
anchors = 20, 23, 32, 50, 57, 41, 54, 80, 96, 94, 73,147, 140,162, 178,250, 298,332
# 22, 19, 36, 39, 70, 37, 57, 65, 75,113, 111, 81, 141,145, 224,186, 307,284
# 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=5 # 类别
num=9
jitter=.3 # 数据扩充的抖动操作
ignore_thresh = .5 # 文章中的阈值1
truth_thresh = 1 # 文章中的阈值2
random=1 #1,如果显存很小,将random设置为0,关闭多尺度训练;
#https://github.com/AlexeyAB/darknet (AlexeyAB版本yolov3)介绍的关键点:
# 图片样本的获取可以自己找,也可以下载相关数据集,coco,voc数据集中可能也含有相关能用的样本
#
# Note: If during training you see nan values for avg (loss) field - then training goes wrong,
# but if nan is in some other lines - then training goes well.
#
# When should I stop training:
# When you see that average loss 0.xxxxxx avg no longer decreases at many iterations then you should stop training.
# Once training is stopped, you should take some of last .weights-files from darknet\build\darknet\x64\backup and choose the best of them.
#
# Overfitting - is case when you can detect objects on images from training-dataset,
# but can't detect objects on any others images. You should get weights from Early Stopping Point.
#
# IoU (intersect of union) - average instersect of union of objects and detections for a certain threshold = 0.24
#
# How to improve object detection:
# Before training:
# set flag random=1 in your .cfg-file - it will increase precision by training Yolo for different resolutions.
# increase network resolution in your .cfg-file (height=608, width=608 or any value multiple of 32) - it will increase precision.
# recalculate anchors for your dataset for width and height from cfg-file:
# darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416 then set the same 9 anchors in each of 3 [yolo]-layers in your cfg-file
# 设置锚点
#
# desirable that your training dataset include images with objects at diffrent:
# scales, rotations, lightings, from different sides, on different backgrounds
# 样本特点尽量多样化,亮度,旋转,背景,目标位置,尺寸
#
# desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty .txt files)
# 可以添加没有标注框的图片和其空的txt文件,作为negative数据
#
# for training with a large number of objects in each image, add the parameter max=200 or higher value in the last layer [region] in your cfg-file
#
# to speedup training (with decreasing detection accuracy) do Fine-Tuning instead of Transfer-Learning,
# set param stopbackward=1 in one of the penultimate convolutional layers before the 1-st [yolo]-layer,
# for example here: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L598
# 可以在第一个[yolo]层之前的倒数第二个[convolutional]层末尾添加 stopbackward=1,以此提升训练速度
#
# After training - for detection:
# Increase network-resolution by set in your .cfg-file (height=608 and width=608) or (height=832 and width=832)
# or (any value multiple of 32) - this increases the precision and makes it possible to detect small objects,
# you do not need to train the network again, just use .weights-file already trained for 416x416 resolution.
# 即使在用416*416训练完之后,也可以在cfg文件中设置较大的width和height,增加网络对图像的分辨率,从而更可能检测出图像中的小目标,而不需要重新训练
#
# if error Out of memory occurs then in .cfg-file you should increase subdivisions=16, 32 or 64
# Out of memory的错误需要通过增大subdivisions来解决
#官网介绍中的关键点:
# 图片样本的获取可以自己找,也可以下载相关数据集,coco,voc数据集中可能也含有相关能用的样本
#
# Note: If during training you see nan values for avg (loss) field - then training goes wrong,
# but if nan is in some other lines - then training goes well.
#
# When should I stop training:
# When you see that average loss 0.xxxxxx avg no longer decreases at many iterations then you should stop training.
# Once training is stopped, you should take some of last .weights-files from darknet\build\darknet\x64\backup and choose the best of them.
#
# Overfitting - is case when you can detect objects on images from training-dataset,
# but can't detect objects on any others images. You should get weights from Early Stopping Point.
#
# IoU (intersect of union) - average instersect of union of objects and detections for a certain threshold = 0.24
#
# How to improve object detection:
# Before training:
# set flag random=1 in your .cfg-file - it will increase precision by training Yolo for different resolutions.
# increase network resolution in your .cfg-file (height=608, width=608 or any value multiple of 32) - it will increase precision.
# recalculate anchors for your dataset for width and height from cfg-file:
# darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416 then set the same 9 anchors in each of 3 [yolo]-layers in your cfg-file
# 设置锚点
#
# desirable that your training dataset include images with objects at diffrent:
# scales, rotations, lightings, from different sides, on different backgrounds
# 样本特点尽量多样化,亮度,旋转,背景,目标位置,尺寸
#
# desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty .txt files)
# 可以添加没有标注框的图片和其空的txt文件,作为negative数据
#
# for training with a large number of objects in each image, add the parameter max=200 or higher value in the last layer [region] in your cfg-file
#
# to speedup training (with decreasing detection accuracy) do Fine-Tuning instead of Transfer-Learning,
# set param stopbackward=1 in one of the penultimate convolutional layers before the 1-st [yolo]-layer,
# for example here: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L598
# 可以在第一个[yolo]层之前的倒数第二个[convolutional]层末尾添加 stopbackward=1,以此提升训练速度
#
# After training - for detection:
# Increase network-resolution by set in your .cfg-file (height=608 and width=608) or (height=832 and width=832)
# or (any value multiple of 32) - this increases the precision and makes it possible to detect small objects,
# you do not need to train the network again, just use .weights-file already trained for 416x416 resolution.
# 即使在用416*416训练完之后,也可以在cfg文件中设置较大的width和height,增加网络对图像的分辨率,从而更可能检测出图像中的小目标,而不需要重新训练
#
# if error Out of memory occurs then in .cfg-file you should increase subdivisions=16, 32 or 64
# Out of memory的错误需要通过增大subdivisions来解决
【更改输入图像大小,重新设置anchors】
得下载AlexeyAB版本的yolov3(自带计算工具):
https://github.com/AlexeyAB/darknet
通过相同的方式编译,再运行命令:
./darknet detector calc_anchors cfg/voc.data -num_of_clusters 9 -width 640 -height 480
将生成的结果替换.cfg文件中的anchors
2.4开始训练
./darknet detector train cfg/voc.data cfg/yolov3-voc.cfg darknet53.conv.74 -gpus 0
终端输出:
Region 106 Avg IOU: 0.061216, Class: 0.626917, Obj: 0.302514, No Obj: 0.458097, .5R: 0.000000, .75R: 0.000000, count: 4
Region 82 Avg IOU: -nan, Class: -nan, Obj: -nan, No Obj: 0.492722, .5R: -nan, .75R: -nan, count: 0
Region 94 Avg IOU: 0.143258, Class: 0.711037, Obj: 0.514405, No Obj: 0.513470, .5R: 0.000000, .75R: 0.000000, count: 2
Region 106 Avg IOU: 0.263153, Class: 0.434489, Obj: 0.432080, No Obj: 0.457346, .5R: 0.000000, .75R: 0.000000, count: 2
4: 557.440735, 558.787842 avg, 0.000000 rate, 1.717108 seconds, 256 images
Loaded: 1.211514 seconds
...
Region 106 Avg IOU: 0.104959, Class: 0.519788, Obj: 0.193895, No Obj: 0.326036, .5R: 0.000000, .75R: 0.000000, count: 3
Region 82 Avg IOU: 0.143481, Class: 0.373649, Obj: 0.561790, No Obj: 0.452239, .5R: 0.000000, .75R: 0.000000, count: 2
Region 94 Avg IOU: 0.359399, Class: 0.912672, Obj: 0.276435, No Obj: 0.434352, .5R: 0.000000, .75R: 0.000000, count: 1
Region 106 Avg IOU: 0.116004, Class: 0.368650, Obj: 0.245646, No Obj: 0.324594, .5R: 0.000000, .75R: 0.000000, count: 2
Region 82 Avg IOU: 0.397991, Class: 0.361905, Obj: 0.791880, No Obj: 0.451690, .5R: 0.000000, .75R: 0.000000, count: 1
Region 94 Avg IOU: 0.388091, Class: 0.398659, Obj: 0.498814, No Obj: 0.434391, .5R: 0.333333, .75R: 0.000000, count: 3
Region 106 Avg IOU: 0.084905, Class: 0.563230, Obj: 0.281137, No Obj: 0.325844, .5R: 0.000000, .75R: 0.000000, count: 3
Region 82 Avg IOU: -nan, Class: -nan, Obj: -nan, No Obj: 0.453356, .5R: -nan, .75R: -nan, count: 0
Region 94 Avg IOU: 0.069556, Class: 0.379951, Obj: 0.705520, No Obj: 0.433259, .5R: 0.000000, .75R: 0.000000, count: 2
Region 106 Avg IOU: 0.017326, Class: 0.713215, Obj: 0.136531, No Obj: 0.325978, .5R: 0.000000, .75R: 0.000000, count: 3
100: 338.473236, 562.332581 avg, 0.000000 rate, 1.687635 seconds, 6400 images
Saving weights to backup/yolov3-voc.backup
Saving weights to backup/yolov3-voc_100.weights
Resizing
416
Loaded: 0.878086 seconds
4.1测试图片的命令
./darknet detector test cfg/voc.data cfg/yolov3-voc.cfg backup/yolov3-voc_10000.weights /home/wyq/Documents/myCode/build-ssd-Desktop-Debug/635.png
4.2测试并保存视频的命令
./darknet detector demo cfg/my-voc.data cfg/my-yolov3-voc.cfg backup/my-yolov3-voc_10000.weights -thresh 0.4 -ext_output /media/wyq/719ffd70-9553-244f-ac13-2d8a0a86e395/数据集/原始数据集/rubbish/rubbish-dataset-20190304-nianhui/video0/006.mp4 -out_filename /...输出视频.mp4/.avi
其中,-ext_output 是为了在终端输出bbox,thresh可以根据检测的score控制画面上的bbox(是否)显示(此处只显示score>0.4的检测结果),增加-out_filename可以将检测结果保存成视频。
4.3其他命令
1) 批量测试:
./darknet detector test cfg/voc.data cfg/yolo-voc.cfg ./svt/backup/yolo-voc_final.weights
/*不现实评价指标,输入图片路径,只显示框好后的图片和类别、置信率*/
Enter Image Path:后面输入你的txt文件路径(你准备好的所有测试图片的路径全部存放在一个txt文件里),你可以复制voc.data文件里的valid后面的路径
2) 生成预测结果 :
./darknet detector valid cfg/voc.data cfg/yolo-voc.cfg backup/yolo-voc_final.weights
/*在终端只返回用时,在./results/comp4_det_test_[类名].txt里保存测试结果*/
3) 生成预测结果 :
./darknet detector recall cfg/voc.data cfg/yolo-voc.cfg backup/yolo-voc_final.weights(这个命令需修改dectector.c文件)
/*依次ID:图片序号从0开始,correct:累计检测正确的总框数,total:累计的总ground truth数,RPs/Img: 累计的总proposals/已检测图片数,IOU,Recall: correct / total,proposals:累计的总框数,Precision: correct / proposals*/
***detector.c修改(example文件夹下)
validate_detector_recall函数定义和调用改为:
void validate_detector_recall(char *datacfg, char *cfgfile, char *weightfile)
validate_detector_recall(datacfg, cfg, weights);
validate_detector_recall内的plist和paths的如下初始化代码:
list *plist = get_paths("data/voc.2007.test");
char **paths = (char **)list_to_array(plist);
修改为:
list *options = read_data_cfg(datacfg);
char *valid_images = option_find_str(options, "valid", "data/train.list");
list *plist = get_paths(valid_images);
char **paths = (char **)list_to_array(plist);
上述修改完之后务必记住要在darknet下重新make一下就可以进行recall命令了。