首先下载源码,源码下载地址
由于本人是边看代码边记录,可能会有理解不对的地方,我会在看完整个源码之后,对博客内容进行修正。
Darknet代码详解(以darknet目录为相对目录):
入口:examples/darknet.c
int main(int argc, char **argv)
{
//test_resize("data/bad.jpg");
//test_box();
//test_convolutional_layer();
if(argc < 2){
fprintf(stderr, "usage: %s \n" , argv[0]);
return 0;
}
gpu_index = find_int_arg(argc, argv, "-i", 0);
if(find_arg(argc, argv, "-nogpu")) {
gpu_index = -1;
}
#ifndef GPU
gpu_index = -1;
#else
if(gpu_index >= 0){
cuda_set_device(gpu_index);
}
#endif
if (0 == strcmp(argv[1], "average")){
average(argc, argv);
} else if (0 == strcmp(argv[1], "yolo")){
run_yolo(argc, argv);
} else if (0 == strcmp(argv[1], "super")){
run_super(argc, argv);
} else if (0 == strcmp(argv[1], "lsd")){
run_lsd(argc, argv);
} else if (0 == strcmp(argv[1], "detector")){
run_detector(argc, argv);
} else if (0 == strcmp(argv[1], "detect")){
//获取threshold值
float thresh = find_float_arg(argc, argv, "-thresh", .24);
//获取输入文件名
char *filename = (argc > 4) ? argv[4]: 0;
//获取输出文件名
char *outfile = find_char_arg(argc, argv, "-out", 0);
//是否全屏展示
int fullscreen = find_arg(argc, argv, "-fullscreen");
test_detector("cfg/coco.data", argv[2], argv[3], filename, thresh, .5, outfile, fullscreen);
} else if (0 == strcmp(argv[1], "cifar")){
run_cifar(argc, argv);
} else if (0 == strcmp(argv[1], "go")){
run_go(argc, argv);
} else if (0 == strcmp(argv[1], "rnn")){
run_char_rnn(argc, argv);
} else if (0 == strcmp(argv[1], "coco")){
run_coco(argc, argv);
} else if (0 == strcmp(argv[1], "classify")){
predict_classifier("cfg/imagenet1k.data", argv[2], argv[3], argv[4], 5);
} else if (0 == strcmp(argv[1], "classifier")){
run_classifier(argc, argv);
} else if (0 == strcmp(argv[1], "attention")){
run_attention(argc, argv);
} else if (0 == strcmp(argv[1], "regressor")){
run_regressor(argc, argv);
} else if (0 == strcmp(argv[1], "segmenter")){
run_segmenter(argc, argv);
} else if (0 == strcmp(argv[1], "art")){
run_art(argc, argv);
} else if (0 == strcmp(argv[1], "tag")){
run_tag(argc, argv);
} else if (0 == strcmp(argv[1], "3d")){
composite_3d(argv[2], argv[3], argv[4], (argc > 5) ? atof(argv[5]) : 0);
} else if (0 == strcmp(argv[1], "test")){
test_resize(argv[2]);
} else if (0 == strcmp(argv[1], "captcha")){
run_captcha(argc, argv);
} else if (0 == strcmp(argv[1], "nightmare")){
run_nightmare(argc, argv);
} else if (0 == strcmp(argv[1], "rgbgr")){
rgbgr_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "reset")){
reset_normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "denormalize")){
denormalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "statistics")){
statistics_net(argv[2], argv[3]);
} else if (0 == strcmp(argv[1], "normalize")){
normalize_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "rescale")){
rescale_net(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "ops")){
operations(argv[2]);
} else if (0 == strcmp(argv[1], "speed")){
speed(argv[2], (argc > 3 && argv[3]) ? atoi(argv[3]) : 0);
} else if (0 == strcmp(argv[1], "oneoff")){
oneoff(argv[2], argv[3], argv[4]);
} else if (0 == strcmp(argv[1], "oneoff2")){
oneoff2(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "partial")){
partial(argv[2], argv[3], argv[4], atoi(argv[5]));
} else if (0 == strcmp(argv[1], "average")){
average(argc, argv);
} else if (0 == strcmp(argv[1], "visualize")){
visualize(argv[2], (argc > 3) ? argv[3] : 0);
} else if (0 == strcmp(argv[1], "mkimg")){
mkimg(argv[2], argv[3], atoi(argv[4]), atoi(argv[5]), atoi(argv[6]), argv[7]);
} else if (0 == strcmp(argv[1], "imtest")){
test_resize(argv[2]);
} else {
fprintf(stderr, "Not an option: %s\n", argv[1]);
}
return 0;
}
从main()函数可以看出,darknet支持很多参数,这篇主要讲述detect函数的用法
detect是使用模型进行预测的函数,比如用模型测试某张具体图片,命令如下:
./darknet detect cfg/yolo.cfg yolo.weights data/horses.jpg
下面就开始讲解test_detector()函数(写的可能比较乱,大家有不理解的可以留言)
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen)
{
//读取cfg文件(类似ini文件key=value),存储于双向链表
list *options = read_data_cfg(datacfg);
//从option中查找names对应的value,如没有,使用默认值“data/names.list”
char *name_list = option_find_str(options, "names", "data/names.list");
//获取所有类别名称,对应xml中定义的分类
char **names = get_labels(name_list);
//从data/labels/下加载ASCII码32-127的8种尺寸的图片,后边显示标签用。
image **alphabet = load_alphabet();
//加载之前训练的超参(权重等),这个函数之后详解
network *net = load_network(cfgfile, weightfile, 0);
//设置每层batch为1
set_batch_network(net, 1);
//srand函数是随机数发生器的初始化函数
srand(2222222);
double time;
char buff[256];
char *input = buff;
int j;
float nms=.3;
while(1){
if(filename){
strncpy(input, filename, 256);
} else {
printf("Enter Image Path: ");
fflush(stdout);
input = fgets(input, 256, stdin);
if(!input) return;
strtok(input, "\n");
}
//加载图片,默认当做彩色处理
image im = load_image_color(input,0,0);
//调整图片尺寸
image sized = letterbox_image(im, net->w, net->h);
//image sized = resize_image(im, net->w, net->h);
//image sized2 = resize_max(im, net->w);
//image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h);
//resize_network(net, sized.w, sized.h);
layer l = net->layers[net->n-1];
box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes + 1, sizeof(float *));
float **masks = 0;
if (l.coords > 4){
masks = calloc(l.w*l.h*l.n, sizeof(float*));
for(j = 0; j < l.w*l.h*l.n; ++j) masks[j] = calloc(l.coords-4, sizeof(float *));
}
float *X = sized.data;
time=what_time_is_it_now();
//预测
network_predict(net, X);
printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time);
get_region_boxes(l, im.w, im.h, net->w, net->h, thresh, probs, boxes, masks, 0, 0, hier_thresh, 1);
//if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
//画预测结果
draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, masks, names, alphabet, l.classes);
//保存标记了预测标签的图片
if(outfile){
save_image(im, outfile);
}
else{
save_image(im, "predictions");
#ifdef OPENCV
cvNamedWindow("predictions", CV_WINDOW_NORMAL);
if(fullscreen){
cvSetWindowProperty("predictions", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
}
show_image(im, "predictions");
cvWaitKey(0);
cvDestroyAllWindows();
#endif
}
free_image(im);
free_image(sized);
free(boxes);
free_ptrs((void **)probs, l.w*l.h*l.n);
if (filename) break;
}
}
下面详细讲解load_network
network *load_network(char *cfg, char *weights, int clear)
{
//解析yolo.cfg文件(主要是解析网络模型的结构,包括网络的层数,每层网络的参数类型,参数)
network *net = parse_network_cfg(cfg);
if(weights && weights[0] != 0){
//加载预训练的权重。
load_weights(net, weights);
}
if(clear) (*net->seen) = 0;
return net;
}
下面详细讲解parse_network_cfg
network *parse_network_cfg(char *filename)
{
//读取网络模型的结构,存于双向链表中
list *sections = read_cfg(filename);
node *n = sections->front;
if(!n) error("Config file has no sections");
//根据配置文件中参数创建网络模型
network *net = make_network(sections->size - 1);
net->gpu_index = gpu_index;
size_params params;
section *s = (section *)n->val;
list *options = s->options;
if(!is_network(s)) error("First section must be [net] or [network]");
parse_net_options(options, net);
params.h = net->h;
params.w = net->w;
params.c = net->c;
params.inputs = net->inputs;
params.batch = net->batch;
params.time_steps = net->time_steps;
params.net = net;
size_t workspace_size = 0;
n = n->next;
int count = 0;
free_section(s);
fprintf(stderr, "layer filters size input output\n");
while(n){
params.index = count;
fprintf(stderr, "%5d ", count);
s = (section *)n->val;
options = s->options;
layer l = {0};
LAYER_TYPE lt = string_to_layer_type(s->type);
if(lt == CONVOLUTIONAL){
l = parse_convolutional(options, params);
}else if(lt == DECONVOLUTIONAL){
l = parse_deconvolutional(options, params);
}else if(lt == LOCAL){
l = parse_local(options, params);
}else if(lt == ACTIVE){
l = parse_activation(options, params);
}else if(lt == RNN){
l = parse_rnn(options, params);
}else if(lt == GRU){
l = parse_gru(options, params);
}else if (lt == LSTM) {
l = parse_lstm(options, params);
}else if(lt == CRNN){
l = parse_crnn(options, params);
}else if(lt == CONNECTED){
l = parse_connected(options, params);
}else if(lt == CROP){
l = parse_crop(options, params);
}else if(lt == COST){
l = parse_cost(options, params);
}else if(lt == REGION){
l = parse_region(options, params);
}else if(lt == DETECTION){
l = parse_detection(options, params);
}else if(lt == SOFTMAX){
l = parse_softmax(options, params);
net->hierarchy = l.softmax_tree;
}else if(lt == NORMALIZATION){
l = parse_normalization(options, params);
}else if(lt == BATCHNORM){
l = parse_batchnorm(options, params);
}else if(lt == MAXPOOL){
l = parse_maxpool(options, params);
}else if(lt == REORG){
l = parse_reorg(options, params);
}else if(lt == AVGPOOL){
l = parse_avgpool(options, params);
}else if(lt == ROUTE){
l = parse_route(options, params, net);
}else if(lt == SHORTCUT){
l = parse_shortcut(options, params, net);
}else if(lt == DROPOUT){
l = parse_dropout(options, params);
l.output = net->layers[count-1].output;
l.delta = net->layers[count-1].delta;
#ifdef GPU
l.output_gpu = net->layers[count-1].output_gpu;
l.delta_gpu = net->layers[count-1].delta_gpu;
#endif
}else{
fprintf(stderr, "Type not recognized: %s\n", s->type);
}
l.truth = option_find_int_quiet(options, "truth", 0);
l.onlyforward = option_find_int_quiet(options, "onlyforward", 0);
l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
l.dontload = option_find_int_quiet(options, "dontload", 0);
l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
l.smooth = option_find_float_quiet(options, "smooth", 0);
option_unused(options);
net->layers[count] = l;
if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
free_section(s);
n = n->next;
++count;
if(n){
params.h = l.out_h;
params.w = l.out_w;
params.c = l.out_c;
params.inputs = l.outputs;
}
}
free_list(sections);
layer out = get_network_output_layer(net);
net->outputs = out.outputs;
net->truths = out.outputs;
if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths;
net->output = out.output;
net->input = calloc(net->inputs*net->batch, sizeof(float));
net->truth = calloc(net->truths*net->batch, sizeof(float));
#ifdef GPU
net->output_gpu = out.output_gpu;
net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch);
net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch);
#endif
if(workspace_size){
//printf("%ld\n", workspace_size);
#ifdef GPU
if(gpu_index >= 0){
net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
}else {
net->workspace = calloc(1, workspace_size);
}
#else
net->workspace = calloc(1, workspace_size);
#endif
}
return net;
}