目录
模型训练概览
网络结构文件加载
预训练权重文件加载
数据加载
数据增强
网络训练
前段时间在darknet框架上训练了tiny-yolov2,对于darknet这个C语言框架有了些自己的认识,遂记录于此。darknet在github上的url:https://github.com/pjreddie/darknet,darknet官网:https://pjreddie.com/darknet/,官网有介绍darknet编译及yolo系列所有模型详细介绍。
为叙述方便,后面我把darknet根目录用root代替。darknet项目从github下载完编译成功后会在root目录生成了darknet可执行文件,root/examples/darknet.c的main函数就是程序执行入口,根据传入的参数argv[1]与字符串比较进入run_detector函数,这个函数里面主要是解析我们传入的各项参数,诸如gpu、datacfg(数据配置文件)、cfg(网络结构文件)、weights(预训练权重文件)等参数。根据我们传入的‘train’参数进入train_detector函数
char *datacfg = argv[3];
char *cfg = argv[4];
char *weights = (argc > 5) ? argv[5] : 0;
char *filename = (argc > 6) ? argv[6]: 0;
if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
else if(0==strcmp(argv[2], "demo")) {
list *options = read_data_cfg(datacfg);
int classes = option_find_int(options, "classes", 20);
char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list);
demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen);
}
模型训练概览
train_detector里包含了模型训练的所有步骤,包括:解析cfg网络结构文件、加载预训练模型、多线程加载数据、模型训练(前向计算、反向传播、参数更新)、模型保存。
void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
{
list *options = read_data_cfg(datacfg);//解析数据配置文件
char *train_images = option_find_str(options, "train", "data/train.list");//获取训练数据目录
char *backup_directory = option_find_str(options, "backup", "/backup/");//获取模型保存目录
srand(time(0));
char *base = basecfg(cfgfile);
printf("%s\n", base);
float avg_loss = -1;
network **nets = calloc(ngpus, sizeof(network));
srand(time(0));
int seed = rand();
int i;
//多gpu训练,这个我没用过,我用单gpu训练,因此只迭代一次
for(i = 0; i < ngpus; ++i){
srand(seed);
#ifdef GPU
cuda_set_device(gpus[i]);//设置训练时选用那块gpu
#endif
//解析cfg文件创建网络结构,如果weightfile不为NULL,就用这个预训练权重文件初始化网络模型参数
nets[i] = load_network(cfgfile, weightfile, clear);
nets[i]->learning_rate *= ngpus;
}
srand(time(0));
network *net = nets[0];
//这里有个概念需要弄清楚,训练的batchsize和参数更新的batchsize不等同,考虑到gpu显存不够,
//比如每次只加载32张图片进行前向计算得到loss,重复这个过程8次并把8次的loss相加进行一次反向
//传播并更新模型权重。这个过程等同于一次加载256张图片进行训练。这里的net->batch就是举例的32
//net->subdivisions相当于举例的8,我的gpu只用了1块,所以ngpus=1
int imgs = net->batch * net->subdivisions * ngpus;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);
data train, buffer;
layer l = net->layers[net->n - 1];//取出最后一层(检测层)
int classes = l.classes;//目标分类的类别数
float jitter = l.jitter;//后续数据增强会用到
list *plist = get_paths(train_images);
//int N = plist->size;
char **paths = (char **)list_to_array(plist);
load_args args = get_base_args(net);
args.coords = l.coords;//坐标点个数(xmin,ymin,xmax,ymax)
args.paths = paths;//训练及所有图片路径
args.n = imgs;//batchsize
args.m = plist->size;//训练集大小
args.classes = classes;
args.jitter = jitter;
args.num_boxes = l.max_boxes;//feature map每个cell预测的候选框个数
args.d = &buffer;
args.type = DETECTION_DATA;
//args.type = INSTANCE_DATA;
args.threads = 64;//默认开多线程读取数据
pthread_t load_thread = load_data(args);
double time;
int count = 0;
//while(i*imgs < N*120){
while(get_current_batch(net) < net->max_batches){
if(l.random && count++%10 == 0){
printf("Resizing\n");
int dim = (rand() % 10 + 10) * 32;
if (get_current_batch(net)+200 > net->max_batches) dim = 608;
//int dim = (rand() % 4 + 16) * 32;
printf("%d\n", dim);
args.w = dim;
args.h = dim;
pthread_join(load_thread, 0);
train = buffer;
free_data(train);
load_thread = load_data(args);
#pragma omp parallel for
for(i = 0; i < ngpus; ++i){
resize_network(nets[i], dim, dim);
}
net = nets[0];
}
time=what_time_is_it_now();
pthread_join(load_thread, 0);//等待多线程退出
train = buffer;
load_thread = load_data(args);
/*
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[10] + 1 + k*5);
if(!b.x) break;
printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h);
}
*/
/*
int zz;
for(zz = 0; zz < train.X.cols; ++zz){
image im = float_to_image(net->w, net->h, 3, train.X.vals[zz]);
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[zz] + k*5, 1);
printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);
draw_bbox(im, b, 1, 1,0,0);
}
show_image(im, "truth11");
cvWaitKey(0);
save_image(im, "truth11");
}
*/
printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
time=what_time_is_it_now();
float loss = 0;
//train_networks:网络训练函数
#ifdef GPU
if(ngpus == 1){
loss = train_network(net, train);
} else {
loss = train_networks(nets, ngpus, train, 4);
}
#else
loss = train_network(net, train);
#endif
//动量法计算损失值
if (avg_loss < 0) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1;
//获取当前批次号,打印训练结果
i = get_current_batch(net);
printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs);
if(i%100==0){
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s.backup", backup_directory, base);
save_weights(net, buff);
}
if(i%10000==0 || (i < 1000 && i%100 == 0)){
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
}
free_data(train);
}
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
//保存模型
save_weights(net, buff);
}
网络结构文件加载
load_network函数包括两部分,一个是parse_network_cfg函数,这个函数主要是解析cfg文件,根据cfg文件构建深度学习网络,并根据cfg文件的键值对初始化一些变量、指定该网络层的前向计算函数、后向计算函数、权值更新函数。至于load_weights函数待会再说。
network load_network(char *cfg, char *weights, int clear)
{
network net = parse_network_cfg(cfg);//解析网络结构文件
if(weights && weights[0] != 0){
load_weights(&net, weights);//如果传入了与训练权重文件则加载
}
if(clear) *net.seen = 0;
return net;
}
进入到parse_network_cfg函数里面,会根据层类别不一样做不同的初始化工作,需要注意的是不同层里面一般都有个make_***_layer函数,在这里对一些变量进行了随机初始化,如卷积层初始化了卷积核W和偏置B,还指定了他们的前向、后向计算函数,权值更新函数。
network parse_network_cfg(char *filename)
{
//把cfg文件解析成一个二级链表,第一级链表存储每个网络层,第二级链表存储各网络层的变量
list *sections = read_cfg(filename);
node *n = sections->front;
if(!n) error("Config file has no sections");
network net = make_network(sections->size - 1);
net.gpu_index = gpu_index;
size_params params;
section *s = (section *)n->val;
list *options = s->options;
if(!is_network(s)) error("First section must be [net] or [network]");
//根据第一层网络初始化一些超参数,这些超参数将影响整个网络训练,比如batchsize、学习率、梯度优化方法等等
parse_net_options(options, &net);
params.h = net.h;
params.w = net.w;
params.c = net.c;
params.inputs = net.inputs;
params.batch = net.batch;
params.time_steps = net.time_steps;//这个变量是cnn网络才有的,此处忽略
params.net = net;
size_t workspace_size = 0;
//从这一层开始就是真正意义上的网络层了
n = n->next;
int count = 0;
free_section(s);
fprintf(stderr, "layer filters size input output\n");
//下面根据不同的层类型做相应的初始化
while(n){
params.index = count;
fprintf(stderr, "%5d ", count);
s = (section *)n->val;
options = s->options;
layer l = {0};
LAYER_TYPE lt = string_to_layer_type(s->type);
if(lt == CONVOLUTIONAL){
l = parse_convolutional(options, params);
}else if(lt == DECONVOLUTIONAL){
l = parse_deconvolutional(options, params);
}else if(lt == LOCAL){
l = parse_local(options, params);
}else if(lt == ACTIVE){
l = parse_activation(options, params);
}else if(lt == RNN){
l = parse_rnn(options, params);
}else if(lt == GRU){
l = parse_gru(options, params);
}else if (lt == LSTM) {
l = parse_lstm(options, params);
}else if(lt == CRNN){
l = parse_crnn(options, params);
}else if(lt == CONNECTED){
l = parse_connected(options, params);
}else if(lt == CROP){
l = parse_crop(options, params);
}else if(lt == COST){
l = parse_cost(options, params);
}else if(lt == REGION){
l = parse_region(options, params);
}else if(lt == DETECTION){
l = parse_detection(options, params);
}else if(lt == SOFTMAX){
l = parse_softmax(options, params);
net.hierarchy = l.softmax_tree;
}else if(lt == NORMALIZATION){
l = parse_normalization(options, params);
}else if(lt == BATCHNORM){
l = parse_batchnorm(options, params);
}else if(lt == MAXPOOL){
l = parse_maxpool(options, params);
}else if(lt == REORG){
l = parse_reorg(options, params);
}else if(lt == AVGPOOL){
l = parse_avgpool(options, params);
}else if(lt == ROUTE){
l = parse_route(options, params, net);
}else if(lt == SHORTCUT){
l = parse_shortcut(options, params, net);
}else if(lt == DROPOUT){
l = parse_dropout(options, params);
l.output = net.layers[count-1].output;
l.delta = net.layers[count-1].delta;
#ifdef GPU
l.output_gpu = net.layers[count-1].output_gpu;
l.delta_gpu = net.layers[count-1].delta_gpu;
#endif
}else{
fprintf(stderr, "Type not recognized: %s\n", s->type);
}
l.truth = option_find_int_quiet(options, "truth", 0);
l.onlyforward = option_find_int_quiet(options, "onlyforward", 0);
l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
l.dontload = option_find_int_quiet(options, "dontload", 0);
l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
l.smooth = option_find_float_quiet(options, "smooth", 0);
option_unused(options);
net.layers[count] = l;
if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
free_section(s);
n = n->next;
++count;
if(n){
params.h = l.out_h;
params.w = l.out_w;
params.c = l.out_c;
params.inputs = l.outputs;//上一层的输出就做为当前层的输入
}
}
free_list(sections);
layer out = get_network_output_layer(net);
net.outputs = out.outputs;
net.truths = out.outputs;
if(net.layers[net.n-1].truths) net.truths = net.layers[net.n-1].truths;
net.output = out.output;
net.input = calloc(net.inputs*net.batch, sizeof(float));
net.truth = calloc(net.truths*net.batch, sizeof(float));
#ifdef GPU
net.output_gpu = out.output_gpu;
net.input_gpu = cuda_make_array(net.input, net.inputs*net.batch);
net.truth_gpu = cuda_make_array(net.truth, net.truths*net.batch);
#endif
if(workspace_size){
//printf("%ld\n", workspace_size);
#ifdef GPU
if(gpu_index >= 0){
net.workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
}else {
net.workspace = calloc(1, workspace_size);
}
#else
net.workspace = calloc(1, workspace_size);
#endif
}
return net;
}
预训练权重文件加载
void load_weights(network *net, char *filename)
//第4个参数指定只将[0,net->n)层权重加载到模型,进入到函数里后将根据不同的层类型读取权重文件进行赋值
load_weights_upto(net, filename, 0, net->n);
}
数据加载
在数据加载函数里开了个线程用于加载数据
pthread_t load_data(load_args args)
{
pthread_t thread;
struct load_args *ptr = calloc(1, sizeof(struct load_args));
*ptr = args;
if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed");
return thread;
}
然后根据传入的args.threads创建多线程下载数据
void *load_threads(void *ptr)
{
int i;
load_args args = *(load_args *)ptr;
if (args.threads == 0) args.threads = 1;
data *out = args.d;
int total = args.n;
free(ptr);
data *buffers = calloc(args.threads, sizeof(data));
pthread_t *threads = calloc(args.threads, sizeof(pthread_t));
for(i = 0; i < args.threads; ++i){
args.d = buffers + i;
args.n = (i+1) * total/args.threads - i * total/args.threads;
threads[i] = load_data_in_thread(args);
}
//等待所有线程的数据读取完才执行后续操作
for(i = 0; i < args.threads; ++i){
pthread_join(threads[i], 0);
}
//将读取到的多个数据块归并到一起(包括数据和标签)
*out = concat_datas(buffers, args.threads);
out->shallow = 0;
for(i = 0; i < args.threads; ++i){
buffers[i].shallow = 1;
free_data(buffers[i]);
}
free(buffers);
free(threads);
return 0;
}
每个子线程里又开一个线程,我还没明白为什么要这么绕。。。
pthread_t load_data_in_thread(load_args args)
{
pthread_t thread;
struct load_args *ptr = calloc(1, sizeof(struct load_args));
*ptr = args;
if(pthread_create(&thread, 0, load_thread, ptr)) error("Thread creation failed");
return thread;
}
绕来绕去终于见到庐山真面目了,下面又是根据关键字匹配数据加载函数,我这里的type是DETECTION_DATA,跟进去看看
void *load_thread(void *ptr)
{
//printf("Loading data: %d\n", rand());
load_args a = *(struct load_args*)ptr;
if(a.exposure == 0) a.exposure = 1;
if(a.saturation == 0) a.saturation = 1;
if(a.aspect == 0) a.aspect = 1;
if (a.type == OLD_CLASSIFICATION_DATA){
*a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
} else if (a.type == REGRESSION_DATA){
*a.d = load_data_regression(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
} else if (a.type == CLASSIFICATION_DATA){
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.center);
} else if (a.type == SUPER_DATA){
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
} else if (a.type == WRITING_DATA){
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
} else if (a.type == ISEG_DATA){
*a.d = load_data_iseg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.scale, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
} else if (a.type == INSTANCE_DATA){
*a.d = load_data_mask(a.n, a.paths, a.m, a.w, a.h, a.classes, a.num_boxes, a.coords, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
} else if (a.type == SEGMENTATION_DATA){
*a.d = load_data_seg(a.n, a.paths, a.m, a.w, a.h, a.classes, a.min, a.max, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.scale);
} else if (a.type == REGION_DATA){
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
} else if (a.type == DETECTION_DATA){
//n:batchsize,paths:所有图片路径,m:所有图片数,w:图片宽,h:图片高,
//num_boxes:featuremap每个cell需要预测的框个数,classes:类别数,后面的参数用于数据增强
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
} else if (a.type == SWAG_DATA){
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
} else if (a.type == COMPARE_DATA){
*a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h);
} else if (a.type == IMAGE_DATA){
*(a.im) = load_image_color(a.path, 0, 0);
*(a.resized) = resize_image(*(a.im), a.w, a.h);
} else if (a.type == LETTERBOX_DATA){
*(a.im) = load_image_color(a.path, 0, 0);
*(a.resized) = letterbox_image(*(a.im), a.w, a.h);
} else if (a.type == TAG_DATA){
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
}
free(ptr);
return 0;
}
待续。。。