1、首先就是从darknet.c中的主函数开始运行,darkne.c中含有多种功能函数,包括了目标检测、语义分割等函数,这里主要讲YOLOv1相关代码,也就是目标检测代码,如下所示:
//通过接收外界参数来选择使用哪种功能函数
int main(int argc, char **argv)
{
//test_resize("data/bad.jpg");
//test_box();
//test_convolutional_layer();
//这里若是输入参数少于2,则输出usage:argv[0] ,这里的argv[0]就是可执行文件地址+文件名
//如果没有任何命令行参数,则打印一句提示信息:Usage: [应用程序名称] ,即告诉你要指定一个命令行参数。
if(argc < 2){
fprintf(stderr, "usage: %s \n" , argv[0]);
return 0;
}
//选择GPU的目录,即选择第几个GPU
gpu_index = find_int_arg(argc, argv, "-i", 0);
//若是出现-nogpu的参数则说明不使用GPU, gpu_index = -1
if(find_arg(argc, argv, "-nogpu")) {
gpu_index = -1;
}
//条件编译,若是未定义GPU,则gpu_index=-1;若是定义了GPU,但前面没有选择使用第几个,则默认gpu_index=0我
//的默认gpu_index=0,如果前面设置了gpu_index=1,则会对第二个GPU进行配置,但绝不能大于你的GPU目录号,
//比如我有3个GPU,则我的目录号就是0、1、2;不能超出2。至于如何配置,就是CUDA编程的内容,这里不再讲解,
//有兴趣的可以学习一下cuda编程。
#ifndef GPU
gpu_index = -1;
#else
if(gpu_index >= 0){
cuda_set_device(gpu_index);
}
#endif
//这里就是根据输入参数来选择需要的功能函数,咱们这里使用的是目标检测功能函数 run_detector,
//也可以选择run_yolo,这里只是讲解 run_detector函数,其他不再详解
if (0 == strcmp(argv[1], "average")){
average(argc, argv);
} else if (0 == strcmp(argv[1], "yolo")){
run_yolo(argc, argv);
} else if (0 == strcmp(argv[1], "super")){
run_super(argc, argv);
} else if (0 == strcmp(argv[1], "lsd")){
run_lsd(argc, argv);
} else if (0 == strcmp(argv[1], "detector")){
run_detector(argc, argv);//从此处进入run_detector函数
} else if (0 == strcmp(argv[1], "detect")){
float thresh = find_float_arg(argc, argv, "-thresh", .5);
char *filename = (argc > 4) ? argv[4]: 0;
char *outfile = find_char_arg(argc, argv, "-out", 0);
int fullscreen = find_arg(argc, argv, "-fullscreen");
test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5, outfile, fullscreen);
} else if (0 == strcmp(argv[1], "cifar")){
run_cifar(argc, argv);
} else if (0 == strcmp(argv[1], "go")){
run_go(argc, argv);
} else if (0 == strcmp(argv[1], "rnn")){
run_char_rnn(argc, argv);
} else if (0 == strcmp(argv[1], "coco")){
run_coco(argc, argv);
} else if (0 == strcmp(argv[1], "classify")){
predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
} else if (0 == strcmp(argv[1], "classifier")){
run_classifier(argc, argv);
} else if (0 == strcmp(argv[1], "regressor")){
run_regressor(argc, argv);
} else if (0 == strcmp(argv[1], "isegmenter")){
run_isegmenter(argc, argv);
} else if (0 == strcmp(argv[1], "segmenter")){
run_segmenter(argc, argv);
} else if (0 == strcmp(argv[1], "art")){
run_art(argc, argv);
} else if (0 == strcmp(argv[1], "tag")){
run_tag(argc, argv);
} else if (0 == strcmp(argv[1], "3d")){
composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0);
} else if (0 == strcmp(argv[1], "test")){
test_resize(argv[2]);
} else if (0 == strcmp(argv[1], "nightmare")){
run_nightmare(argc, argv);
} else if (0 == strcmp(argv[1], "rgbgr")){
rgbgr_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "reset")){
reset_normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "denormalize")){
denormalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "statistics")){
statistics_net(argv[2], argv[3]);
} else if (0 == strcmp(argv[1], "normalize")){
normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "rescale")){
rescale_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "ops")){
operations(argv[2]);
} else if (0 == strcmp(argv[1], "speed")){
speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
} else if (0 == strcmp(argv[1], "oneoff")){
oneoff(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "oneoff2")){
oneoff2(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "print")){
print_weights(argv[2], argv[3], atoi(argv[4]));
} else if (0 == strcmp(argv[1], "partial")){
partial(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "average")){
average(argc, argv);
} else if (0 == strcmp(argv[1], "visualize")){
visualize(argv[2], (argc > 3) ? argv[3] : 0);
} else if (0 == strcmp(argv[1], "mkimg")){
mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]);
} else if (0 == strcmp(argv[1], "imtest")){
test_resize(argv[2]);
}
else if (0 == strcmp(argv[1], "mseg")) {
run_mseg(argc, argv);
}
else {
fprintf(stderr, "Not an option: %s\n", argv[1]);
}
return 0;
}
2、从主函数跳转到run_detector函数
void run_detector(int argc, char **argv)
{
//检查是否有参数prefix,默认值是0
char *prefix = find_char_arg(argc, argv, "-prefix", 0);
//检查是否有参数thresh参数,thresh为输出的阈值,默认值是0.24
float thresh = find_float_arg(argc, argv, "-thresh", .5);
//检查是否有参数hier,默认值是0.5,只在yolov2中使用
float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
//检查是否有参数cam_index,默认为0,使用摄像头的时候使用
int cam_index = find_int_arg(argc, argv, "-c", 0);
//检查是否有参数frame_skip,默认为0
int frame_skip = find_int_arg(argc, argv, "-s", 0);
//检查是否有参数avg,默认为3
int avg = find_int_arg(argc, argv, "-avg", 3);
//如果输入参数小于4个,输出正确的命令格式:[应用程序名称][train/test/valid][cfg][weights (optional)]
if(argc < 4){
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
return;
}
//检查是否指定GPU运算,这里可以设置多个GPU,如-gpus 0,1,2
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
//检测是否有输出文件
char *outfile = find_char_arg(argc, argv, "-out", 0);
//为所有的GPU分配空间
int *gpus = 0;
int gpu = 0;
int ngpus = 0;
if(gpu_list){
printf("%s\n", gpu_list);
int len = strlen(gpu_list);
ngpus = 1;
int i;
for(i = 0; i < len; ++i){
if (gpu_list[i] == ',') ++ngpus;
}
gpus = calloc(ngpus, sizeof(int));
for(i = 0; i < ngpus; ++i){
gpus[i] = atoi(gpu_list);
gpu_list = strchr(gpu_list, ',')+1;
}
} else {
gpu = gpu_index;
gpus = &gpu;
ngpus = 1;
}
//检查clear参数
int clear = find_arg(argc, argv, "-clear");
//检测fullscreen参数
int fullscreen = find_arg(argc, argv, "-fullscreen");
int width = find_int_arg(argc, argv, "-w", 0);
int height = find_int_arg(argc, argv, "-h", 0);
//检查帧率
int fps = find_int_arg(argc, argv, "-fps", 0);
//int class = find_int_arg(argc, argv, "-class", 0);
//data文件的路径存为argv数组的第四个元素
char *datacfg = argv[3];
//cfg文件的路径存为argv数组的第五个元素
char *cfg = argv[4];
//当参数大于5个时,权重为argv数组的第六个元素的内容
char *weights = (argc > 5) ? argv[5] : 0;
//当参数大于6个时,权重为argv数组的第七个元素的内容,即示例中需检测图片的路径
char *filename = (argc > 6) ? argv[6]: 0;
//根据第三个参数(即argv[2])的内容,调用不同的函数,并传入datacfg,cfg等参数
//根据需要选择要进入的函数
//测试
if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
//训练
else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
//验证
else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
//计算recall
else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
//可视化demo
else if(0==strcmp(argv[2], "demo")) {
list *options = read_data_cfg(datacfg);
int classes = option_find_int(options, "classes", 20);
char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list);
demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen);
}
//else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
//else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
}
3、这里先选择训练train_detector
void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int ngpus, int clear)
{
list *options = read_data_cfg(datacfg);//读取datacfg文件,具体可以查看博客(https://blog.csdn.net/m0_37799466/article/details/105956850)
//获取训练图像文件和模型保存地址
char *train_images = option_find_str(options, "train", "data/train.list");
char *backup_directory = option_find_str(options, "backup", "/backup/");
srand(time(0));//用当前时间来设定rand函数所用的随机数产生演算法的种子值
char *base = basecfg(cfgfile);//获取cfg文件名
printf("%s\n", base);
float avg_loss = -1;
network **nets = calloc(ngpus, sizeof(network));//根据GPU数量申请内存空间,申请后,对空间逐一进行初始化,并设置值为0;
srand(time(0));
int seed = rand();
int i;
for(i = 0; i < ngpus; ++i)
{
srand(seed);
#ifdef GPU
cuda_set_device(gpus[i]);//根据GPU数量,对每个GPU进行cuda编程设置
#endif
nets[i] = load_network(cfgfile, weightfile, clear);//为每一个GPU加载网络模型,具体参考[博客](https://blog.csdn.net/m0_37799466/article/details/105979739)
nets[i]->learning_rate *= ngpus;//学习率需要根据GPU数量进行调节,依次在每个GPU上增大GPU数量相同的倍数;
}
srand(time(0));
network *net = nets[0];//令network指针net指向第一个GPU中的网络,如果只有一个GPU则只有nets[0];
int imgs = net->batch * net->subdivisions * ngpus;//一个batch的图像数量,即所有的GPU上的图像,没有GPU上有subdivisions个min_batch
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net->learning_rate, net->momentum, net->decay);//输出学习率和衰减率
data train, buffer;//声明两个data结构体train和buffer,用来存放训练数据
/*
typedef struct{
int w, h;
matrix X;
matrix y;
int shallow;
int *num_boxes;
box **boxes;
} data;
*/
layer l = net->layers[net->n - 1];//定义一个layer结构体并使用网络最后一层进行初始化
int classes = l.classes;//类别数
float jitter = l.jitter;//通过抖动增加噪声来抑制过拟合
list *plist = get_paths(train_images);//定义list指针plist并令它指向保存训练图像的地址单元
/*在定义char指针时,其输出的指针地址就是char的字符,而其指针指向单元的变量则是其ASCII表对应的十进制值
且当指向数组是会将数组后面的字符串都输出打印出来
list *get_paths(char *filename)
{
char *path;
FILE *file = fopen(filename, "r");
if(!file) file_error(filename);
list *lines = make_list();
while((path=fgetl(file))){
list_insert(lines, path);
}
fclose(file);
return lines;
}
*/
//int N = plist->size;
char **paths = (char **)list_to_array(plist);//定义了一个指向char指针的指针,其char指针指向第一个训练图像地址
/*返回一个指向指针的指针
void **list_to_array(list *l)
{
void **a = calloc(l->size, sizeof(void*));
int count = 0;
node *n = l->front;
while(n){
a[count++] = n->val;
n = n->next;
}
return a;
}
*/
load_args args = get_base_args(net);//定义一个load_args结构体args用来保存训练参数
/*
load_args get_base_args(network *net)
{
load_args args = {0};定义一个load_args结构体args并初始化为0
args.w = net->w;设置网络输入图像宽高
args.h = net->h;
args.size = net->w;//设置图像尺寸
args.min = net->min_crop;设置裁剪
args.max = net->max_crop;
args.angle = net->angle;设置旋转角度
args.aspect = net->aspect;设置层
args.exposure = net->exposure;设置曝光度
args.center = net->center;设置中心
args.saturation = net->saturation;设置饱和度
args.hue = net->hue;设置色调
return args;
}
//load_args结构体
typedef struct load_args{
int threads;
char **paths;
char *path;
int n;
int m;
char **labels;
int h;
int w;
int out_w;
int out_h;
int nh;
int nw;
int num_boxes;
int min, max, size;
int classes;
int background;
int scale;
int center;
int coords;
float jitter;
float angle;
float aspect;
float saturation;
float exposure;
float hue;
data *d;
image *im;
image *resized;
data_type type;
tree *hierarchy;
} load_args;
*/
args.coords = l.coords;//设置定位损失项前项因子
args.paths = paths;//设置训练图像地址
args.n = imgs;//设置多少图像更新一次参数
args.m = plist->size;//所有的训练图像数量
args.classes = classes;//设置类别
args.jitter = jitter;//设置噪声
args.num_boxes = l.max_boxes;//表示一张图像中允许的最大的检测框的数量,yolo1时需要设置为7
args.d = &buffer;//令指针指向buffer
args.type = DETECTION_DATA;//设置加载数据方式,yolo1时需要设置为REGION_DATA
//args.type = INSTANCE_DATA;
args.threads = 64;//设置线程
pthread_t load_thread = load_data(args);//加载训练数据,具体参考[博客](https://blog.csdn.net/m0_37799466/article/details/106102059)
double time;//声明时间
int count = 0;//定义计数并初始化为0
//while(i*imgs < N*120){
while(get_current_batch(net) < net->max_batches){
if(l.random && count++%10 == 0){//yolo1不会执行,yolo3时会使用
printf("Resizing\n");
int dim = (rand() % 10 + 10) * 32;
if (get_current_batch(net)+200 > net->max_batches) dim = 608;
//int dim = (rand() % 4 + 16) * 32;
printf("%d\n", dim);
args.w = dim;
args.h = dim;
pthread_join(load_thread, 0);
train = buffer;
free_data(train);
load_thread = load_data(args);
#pragma omp parallel for
for(i = 0; i < ngpus; ++i){
resize_network(nets[i], dim, dim);
}
net = nets[0];
}
time=what_time_is_it_now();//当前时间
pthread_join(load_thread, 0);//等待一个线程的结束,线程间同步的操作
train = buffer;//将buffer保存的训练数据赋给train,所有线程加载的数据,即一个大的batch
load_thread = load_data(args);//继续加载数据
printf("Loaded: %lf seconds\n", what_time_is_it_now()-time);
time=what_time_is_it_now();
float loss = 0;//定义损失值并初始化为0
#ifdef GPU
if(ngpus == 1){//根据GPU数量选择训练函数
loss = train_network(net, train);//核心函数,训练网络函数,单GPU使用,这里着重讲解这个[博客](https://blog.csdn.net/m0_37799466/article/details/106192940)
} else {
loss = train_networks(nets, ngpus, train, 4);//多GPU训练
}
#else
loss = train_network(net, train);//使用CPU训练时运行
#endif
if (avg_loss < 0) avg_loss = loss;//如果平均损失小于0,则将损失值赋给它
avg_loss = avg_loss*.9 + loss*.1;//平均损失=上批训练数据平均损失值*0.9+这批训练数据损失值*0.1;avg_loss初始值为-1
i = get_current_batch(net);// 获取当前的迭代次数
printf("%ld: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), loss, avg_loss, get_current_rate(net), what_time_is_it_now()-time, i*imgs);
if(i%100==0){//每100次保存一次权重
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s.backup", backup_directory, base);
save_weights(net, buff);//保存权重文件
}
if(i%10000==0 || (i < 1000 && i%100 == 0)){
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
save_weights(net, buff);
}
free_data(train);
}
#ifdef GPU
if(ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, "%s/%s_final.weights", backup_directory, base);
save_weights(net, buff);
}