本文默认大家已经安装好TF-gpu环境,没有安装好的伙伴请先看下文
一文多图搞定Ubuntu安装Anaconda+GPU Driver+CUDA+CUDNN+TensorFlow-gpu+Pytorch(附网盘链接)
import os
import random
trainval_percent = 0.1 # 比例可调
train_percent = 0.9 # 同上
xmlfilepath = 'Annotations' # 标注文件路径
txtsavepath = 'ImageSets/Main' # 划分后的数据集列表路径
total_xml = os.listdir(xmlfilepath)
num = len(total_xml)
list = range(num)
tv = int(num * trainval_percent)
tr = int(tv * train_percent)
trainval = random.sample(list, tv)
train = random.sample(trainval, tr)
ftrainval = open('ImageSets/Main/trainval.txt', 'w')
ftest = open('ImageSets/Main/test.txt', 'w')
ftrain = open('ImageSets/Main/train.txt', 'w')
fval = open('ImageSets/Main/val.txt', 'w')
for i in list:
name = total_xml[i][:-4] + '\n'
if i in trainval:
ftrainval.write(name)
if i in train:
ftest.write(name)
else:
fval.write(name)
else:
ftrain.write(name)
ftrainval.close()
ftrain.close()
fval.close()
ftest.close()
$ python split.py
$ sudo apt install libopencv-dev
$ git clone https://github.com/pjreddie/darknet
$ cd darknet
$ make
$ wget https://pjreddie.com/media/files/yolov3.weights
$ ./darknet detect cfg/yolov3.cfg yolov3.weights data/dog.jpg
为了方便数据集维护,此处不讲数据集放在darknet文件夹下,而是建立软链接。
$ cd darknet
$ mkdir VOCdevkit
$ ln -s /path/to/your/dataset/fold/VOC2007/ /path/to/your/darknet/fold/VOCdevkit/VOC2007
sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
classes = ["car","person"]
执行改文件,在darknet文件夹下生成3个txt文件:2007_val.txt(验证样本集路径),2007_train.txt和2007_test.txt。同时在数据集文件夹内生成了labels文件夹,里面存放着yolo格式(类别序号+左上角x归一化+左上角y归一化+w归一化+h归一化)的训练数据txt文件。
$ python voc_label.py
car
person
classes= 2 #classes为训练样本集的类别总数
train = /darknet/2007_train.txt # train的路径为训练样本集所在的路径,上一步中生成
valid = /darknet/2007_val.txt # valid的路径为验证样本集所在的路径,上一步中生成
names = data/voc.names # names的路径为data/voc.names文件所在的路径
backup = backup
[net]
# Testing 注释掉测试部分
# batch=1
# subdivisions=1
# Training 训练部分
batch=64
subdivisions=16 # 显存大可设为8
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001 # 初始学习率0.01~0.001,根据训练过程中loss曲线图及时调整(loss振荡->减小lr;loss几乎不变且不收敛->增大lr)
burn_in=1000 # 迭代次数超过1000,开始采用policy学习率更新方式
max_batches = 4000 # 最大迭代次数:2000x类别数,最少为4000
policy=steps # 学习率更新方式
steps=3200,3600 # max_batches的80%,90%
scales=.1,.1 # 与steps配合使用,更新学习率
......
[convolutional]
size=1
stride=1
pad=1
filters=21 #---------------修改为3*(5+classes)即3*(5+2)=18
activation=linear
[yolo]
mask = 6,7,8
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=2 #---------------修改为标签类别个数,2类
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
......
[convolutional]
size=1
stride=1
pad=1
filters=21 #---------------修改同上
activation=linear
[yolo]
mask = 3,4,5
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=2 #---------------修改同上
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
......
[convolutional]
size=1
stride=1
pad=1
filters=21 #---------------修改同上
activation=linear
[yolo]
mask = 0,1,2
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=2 #---------------修改同上
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
$ wget https://pjreddie.com/media/files/darknet53.conv.74
执行下面的命令
$ ./darknet detector train cfg/voc.data cfg/yolov3-voc.cfg darknet53.conv.74 2>1 | tee visualization/train_yolov3.log # 权重文件可以及时更换自己的中间结果,保存日志
训练过程中输出信息如下
Region 106 Avg IOU: 0.794182, Class: 0.999382, Obj: 0.966953, No Obj:
0.000240, .5R: 1.000000, .75R: 0.750000, count: 4
299: 18.237528, 28.675016 avg, 0.000008 rate, 0.065416 seconds, 299 images
注意:每训练一定的iterations或epoch,记得暂停训练,用验证集进行验证,及时画出现存模型的loss曲线图,据此调整lr;同时计算现存模型的AP/mAP,如果达到要求,即可停止训练。
$ python extract_log.py # 代码如下
import inspect
import os
import random
import sys
def extract_log(log_file,new_log_file,key_word):
with open(log_file, 'r') as f:
with open(new_log_file, 'w') as train_log:
for line in f:
# 去除多gpu的同步log
if 'Syncing' in line:
continue
# 去除除零错误的log
if 'nan' in line:
continue
if key_word in line:
train_log.write(line)
f.close()
train_log.close()
extract_log('train_yolov3.log','train_log_loss.txt','images') # 注意修改自己的日志名
extract_log('train_yolov3.log','train_log_iou.txt','IOU') # 注意修改自己的日志名
$ python train_loss_visualization.py # 代码如下
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# 根据自己的train_log_loss.txt中的行数修改lines
lines = 4500
start_ite = 6000 # train_log_loss.txt里面的最小迭代次数
end_ite = 15000 # train_log_loss.txt里面的最大迭代次数
step = 10 # 跳行数,决定画图的稠密程度
igore = 0 # 当开始的loss较大时,你需要忽略前igore次迭代,注意这里是迭代次数
data_path = './train_log_loss.txt' # train_log_loss.txt的路径
result_path = './avg_loss' # 保存结果的路径
names = ['loss', 'avg', 'rate', 'seconds', 'images']
result = pd.read_csv(data_path, skiprows=[x for x in range(lines) if (x<lines*1.0/((end_ite - start_ite)*1.0)*igore or x%step!=9)], error_bad_lines=False, names=names)
result.head()
for name in names:
result[name] = result[name].str.split(' ').str.get(1)
result.head()
result.tail()
for name in names:
result[name] = pd.to_numeric(result[name])
result.dtypes
# print(result['avg'].values)
fig = plt.figure()
ax = fig.add_subplot(1, 1, 1)
# 设置横坐标
x_num = len(result['avg'].values)
tmp = (end_ite-start_ite - igore)/(x_num*1.0)
x = []
for i in range(x_num):
x.append(i*tmp + start_ite + igore)
# print('total = %d\n' %x_num)
# print('start = %d, end = %d\n' %(x[0], x[-1]))
ax.plot(x, result['avg'].values, label='avg_loss')
# ax.plot(result['loss'].values, label='loss')
plt.grid()
ax.legend(loc = 'best')
ax.set_title('The loss curves')
ax.set_xlabel('batches')
fig.savefig(result_path)
#fig.savefig('loss')
classes= 2
train = /darknet/2007_train.txt
valid = /darknet/2007_val.txt
names = data/voc.names
backup = /darknet/results # 更换输出文件夹
# Testing
batch=1
subdivisions=1
# Training 注释掉训练部分
# batch=64
# subdivisions=16
width=416
height=416
.....
.....
$ ./darknet detector valid cfg/voc.data cfg/yolov3-voc.cfg backup/yolov3-voc_1000.weights # 每次记得更换权重文件
生成results/comp4_det_test_car.txt以及comp4_det_test_person.txt文件,重命名为comp4_det_val_car.txt,comp4_det_val_person.txt。
注意:删除缓存文件annots.pkl
import xml.etree.ElementTree as ET
import os
import cPickle
import numpy as np
def parse_rec(filename):
""" Parse a PASCAL VOC xml file """
tree = ET.parse(filename)
objects = []
for obj in tree.findall('object'):
obj_struct = {}
obj_struct['name'] = obj.find('name').text
obj_struct['pose'] = obj.find('pose').text
obj_struct['truncated'] = int(obj.find('truncated').text)
obj_struct['difficult'] = int(obj.find('difficult').text)
bbox = obj.find('bndbox')
obj_struct['bbox'] = [int(bbox.find('xmin').text),
int(bbox.find('ymin').text),
int(bbox.find('xmax').text),
int(bbox.find('ymax').text)]
objects.append(obj_struct)
return objects
def voc_ap(rec, prec, use_07_metric=False):
""" ap = voc_ap(rec, prec, [use_07_metric])
Compute VOC AP given precision and recall.
If use_07_metric is true, uses the
VOC 07 11 point method (default:False).
"""
if use_07_metric:
# 11 point metric
ap = 0.
for t in np.arange(0., 1.1, 0.1):
if np.sum(rec >= t) == 0:
p = 0
else:
p = np.max(prec[rec >= t])
ap = ap + p / 11.
else:
# correct AP calculation
# first append sentinel values at the end
mrec = np.concatenate(([0.], rec, [1.]))
mpre = np.concatenate(([0.], prec, [0.]))
# compute the precision envelope
for i in range(mpre.size - 1, 0, -1):
mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i])
# to calculate area under PR curve, look for points
# where X axis (recall) changes value
i = np.where(mrec[1:] != mrec[:-1])[0]
# and sum (\Delta recall) * prec
ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1])
return ap
def voc_eval(detpath,
annopath,
imagesetfile,
classname,
cachedir,
ovthresh=0.5,
use_07_metric=False):
"""rec, prec, ap = voc_eval(detpath,
annopath,
imagesetfile,
classname,
[ovthresh],
[use_07_metric])
Top level function that does the PASCAL VOC evaluation.
detpath: Path to detections
detpath.format(classname) should produce the detection results file.
annopath: Path to annotations
annopath.format(imagename) should be the xml annotations file.
imagesetfile: Text file containing the list of images, one image per line.
classname: Category name (duh)
cachedir: Directory for caching the annotations
[ovthresh]: Overlap threshold (default = 0.5)
[use_07_metric]: Whether to use VOC07's 11 point AP computation
(default False)
"""
# assumes detections are in detpath.format(classname)
# assumes annotations are in annopath.format(imagename)
# assumes imagesetfile is a text file with each line an image name
# cachedir caches the annotations in a pickle file
# first load gt
if not os.path.isdir(cachedir):
os.mkdir(cachedir)
cachefile = os.path.join(cachedir, 'annots.pkl')
# read list of images
with open(imagesetfile, 'r') as f:
lines = f.readlines()
imagenames = [x.strip() for x in lines]
if not os.path.isfile(cachefile):
# load annots
recs = {}
for i, imagename in enumerate(imagenames):
recs[imagename] = parse_rec(annopath.format(imagename))
if i % 100 == 0:
print 'Reading annotation for {:d}/{:d}'.format(
i + 1, len(imagenames))
# save
print 'Saving cached annotations to {:s}'.format(cachefile)
with open(cachefile, 'w') as f:
cPickle.dump(recs, f)
else:
# load
with open(cachefile, 'r') as f:
recs = cPickle.load(f)
# extract gt objects for this class
class_recs = {}
npos = 0
for imagename in imagenames:
R = [obj for obj in recs[imagename] if obj['name'] == classname]
bbox = np.array([x['bbox'] for x in R])
difficult = np.array([x['difficult'] for x in R]).astype(np.bool)
det = [False] * len(R)
npos = npos + sum(~difficult)
class_recs[imagename] = {'bbox': bbox,
'difficult': difficult,
'det': det}
# read dets
detfile = detpath.format(classname)
with open(detfile, 'r') as f:
lines = f.readlines()
splitlines = [x.strip().split(' ') for x in lines]
image_ids = [x[0] for x in splitlines]
confidence = np.array([float(x[1]) for x in splitlines])
BB = np.array([[float(z) for z in x[2:]] for x in splitlines])
# sort by confidence
sorted_ind = np.argsort(-confidence)
sorted_scores = np.sort(-confidence)
BB = BB[sorted_ind, :]
image_ids = [image_ids[x] for x in sorted_ind]
# go down dets and mark TPs and FPs
nd = len(image_ids)
tp = np.zeros(nd)
fp = np.zeros(nd)
for d in range(nd):
R = class_recs[image_ids[d]]
bb = BB[d, :].astype(float)
ovmax = -np.inf
BBGT = R['bbox'].astype(float)
if BBGT.size > 0:
# compute overlaps
# intersection
ixmin = np.maximum(BBGT[:, 0], bb[0])
iymin = np.maximum(BBGT[:, 1], bb[1])
ixmax = np.minimum(BBGT[:, 2], bb[2])
iymax = np.minimum(BBGT[:, 3], bb[3])
iw = np.maximum(ixmax - ixmin + 1., 0.)
ih = np.maximum(iymax - iymin + 1., 0.)
inters = iw * ih
# union
uni = ((bb[2] - bb[0] + 1.) * (bb[3] - bb[1] + 1.) +
(BBGT[:, 2] - BBGT[:, 0] + 1.) *
(BBGT[:, 3] - BBGT[:, 1] + 1.) - inters)
overlaps = inters / uni
ovmax = np.max(overlaps)
jmax = np.argmax(overlaps)
if ovmax > ovthresh:
if not R['difficult'][jmax]:
if not R['det'][jmax]:
tp[d] = 1.
R['det'][jmax] = 1
else:
fp[d] = 1.
else:
fp[d] = 1.
# compute precision recall
fp = np.cumsum(fp)
tp = np.cumsum(tp)
rec = tp / float(npos)
# avoid divide by zero in case the first detection matches a difficult
# ground truth
prec = tp / np.maximum(tp + fp, np.finfo(np.float64).eps)
ap = voc_ap(rec, prec, use_07_metric)
return rec, prec, ap
import os, sys, argparse
import numpy as np
import cPickle
from voc_eval import voc_eval
def parse_args():
"""
Parse input arguments
"""
parser = argparse.ArgumentParser(description='Re-evaluate results')
parser.add_argument('output_dir', nargs=1, help='results directory',
type=str)
parser.add_argument('--voc_dir', dest='voc_dir', default='data/VOCdevkit', type=str)
parser.add_argument('--year', dest='year', default='2007', type=str)
parser.add_argument('--image_set', dest='image_set', default='test', type=str)
parser.add_argument('--classes', dest='class_file', default='data/voc.names', type=str)
if len(sys.argv) == 1:
parser.print_help()
sys.exit(1)
args = parser.parse_args()
return args
def get_voc_results_file_template(image_set, out_dir = 'results'):
filename = 'comp4_det_' + image_set + '_{:s}.txt'
path = os.path.join(out_dir, filename)
return path
def do_python_eval(devkit_path, year, image_set, classes, output_dir = 'results'):
annopath = os.path.join(
devkit_path,
'VOC' + year,
'Annotations',
'{:s}.xml')
imagesetfile = os.path.join(
devkit_path,
'VOC' + year,
'ImageSets',
'Main',
image_set + '.txt')
cachedir = os.path.join(devkit_path, 'annotations_cache')
aps = []
# The PASCAL VOC metric changed in 2010
# use_07_metric = True if int(year) < 2010 else False
use_07_metric = False
print 'VOC07 metric? ' + ('Yes' if use_07_metric else 'No')
if not os.path.isdir(output_dir):
os.mkdir(output_dir)
for i, cls in enumerate(classes):
if cls == '__background__':
continue
filename = get_voc_results_file_template(image_set).format(cls)
rec, prec, ap = voc_eval(
filename, annopath, imagesetfile, cls, cachedir, ovthresh=0.5,
use_07_metric=use_07_metric)
aps += [ap]
print('AP for {} = {:.4f}'.format(cls, ap))
with open(os.path.join(output_dir, cls + '_pr.pkl'), 'w') as f:
cPickle.dump({'rec': rec, 'prec': prec, 'ap': ap}, f)
print('mAP = {:.4f}'.format(np.mean(aps)))
print('~~~~~~~~')
print('Results:')
for ap in aps:
print('{:.3f}'.format(ap))
print('{:.3f}'.format(np.mean(aps)))
print('~~~~~~~~')
if __name__ == '__main__':
args = parse_args()
output_dir = os.path.abspath(args.output_dir[0])
with open(args.class_file, 'r') as f:
lines = f.readlines()
classes = [t.strip('\n') for t in lines]
print 'Evaluating detections'
do_python_eval(args.voc_dir, args.year, args.image_set, classes, output_dir)
$ python reval_voc.py --voc_dir ./VOCdevkit --year 2007 --image_set val --classes ./data/voc.names valformodel # 数据集文件夹路径;VOC格式年份;此次验证集数据列表(注意无拓展名txt);类别文件路径;输出文件夹
可打印出给类别的AP以及mAP
注意:根据loss以及mAP及时调整lr等超参数,利用现存权重文件继续训练,跳转本文第四步继续训练,如果验证集的loss与mAP满足要求,则跳转本文第7步
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen)
{
list *options = read_data_cfg(datacfg);
char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list);
image **alphabet = load_alphabet();
network *net = load_network(cfgfile, weightfile, 0);
set_batch_network(net, 1);
srand(2222222);
double time;
char buff[256];
char *input = buff;
float nms=.45;
int i=0;
while(1){
if(filename){
strncpy(input, filename, 256);
image im = load_image_color(input,0,0);
image sized = letterbox_image(im, net->w, net->h);
//image sized = resize_image(im, net->w, net->h);
//image sized2 = resize_max(im, net->w);
//image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h);
//resize_network(net, sized.w, sized.h);
layer l = net->layers[net->n-1];
float *X = sized.data;
time=what_time_is_it_now();
network_predict(net, X);
printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time);
int nboxes = 0;
detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes);
//printf("%d\n", nboxes);
//if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes);
free_detections(dets, nboxes);
if(outfile)
{
save_image(im, outfile);
}
else{
save_image(im, "predictions");
#ifdef OPENCV
//make_window("predictions", 512, 512, 0);
//show_image(im, "predictions", 0);
#endif
}
free_image(im);
free_image(sized);
if (filename) break;
}
else {
printf("Enter Image Path: ");
fflush(stdout);
input = fgets(input, 256, stdin);
if(!input) return;
strtok(input, "\n");
list *plist = get_paths(input);
char **paths = (char **)list_to_array(plist);
printf("Start Testing!\n");
int m = plist->size;
if(access("/root/project/Accident_Detection20200618/darknet/out",0)==-1)//修改成自己的路径
{
if (mkdir("/root/project/Accident_Detection20200618/darknet/out",0777))//修改成自己的路径
{
printf("creat file bag failed!!!");
}
}
for(i = 0; i < m; ++i){
char *path = paths[i];
image im = load_image_color(path,0,0);
image sized = letterbox_image(im, net->w, net->h);
//image sized = resize_image(im, net->w, net->h);
//image sized2 = resize_max(im, net->w);
//image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h);
//resize_network(net, sized.w, sized.h);
layer l = net->layers[net->n-1];
float *X = sized.data;
time=what_time_is_it_now();
network_predict(net, X);
printf("Try Very Hard:");
printf("%s: Predicted in %f seconds.\n", path, what_time_is_it_now()-time);
int nboxes = 0;
detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes);
//printf("%d\n", nboxes);
//if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes);
free_detections(dets, nboxes);
if(outfile){
save_image(im, outfile);
}
else{
char b[2048];
sprintf(b,"/root/project/Accident_Detection20200618/darknet/out/%s",GetFilename(path));//修改成自己的路径
save_image(im, b);
printf("save %s successfully!\n",GetFilename(path));
#ifdef OPENCV
// make_window("predictions", 512, 512, 0);
// show_image(im, "predictions", 0);
#endif
}
free_image(im);
free_image(sized);
if (filename) break;
}
}
}
}
#include "darknet.h"
#include
#include
#include
#include
static int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};
char *GetFilename(char *p)
{
static char name[20]={""};
char *q = strrchr(p,'/') + 1;
strncpy(name,q,6); //6改为你的测试集的图片的名字长度(不包括后缀)
return name;
}
$ make clean
$ make
classes= 2
train = /darknet/2007_train.txt
test = /darknet/2007_test.txt 测试集txt文件路径
names = data/voc.names
backup = backup
$ ./darknet detector test cfg/voc.data cfg/yolov3-voc.cfg backup/yolov3-voc_final.weights
layer filters size input output
0 conv 32 3 x 3 / 1 416 x 416 x 3 -> 416 x 416 x 32 0.299 BFLOPs
1 conv 64 3 x 3 / 2 416 x 416 x 32 -> 208 x 208 x 64 1.595 BFLOPs
.......
104 conv 256 3 x 3 / 1 52 x 52 x 128 -> 52 x 52 x 256 1.595 BFLOPs
105 conv 255 1 x 1 / 1 52 x 52 x 256 -> 52 x 52 x 255 0.353 BFLOPs
106 detection
Loading weights from yolov3.weights...Done!
Enter Image Path: 此处输入上文测试集txt文件路径
所有的图片将保存在data/out文件夹