[TOC]
## 配置文件
#### coco.data
```
classes= 80
train = /home/pjreddie/data/coco/trainvalno5k.txt
valid = coco_testdev
#valid = data/coco_val_5k.list
names = data/coco.names
backup = /home/pjreddie/backup/
eval=coco
```
#### coco.names
```
person
bicycle
car
motorbike
aeroplane
bus
train
truck
boat
traffic light
fire hydrant
stop sign
parking meter
bench
bird
cat
dog
horse
sheep
cow
elephant
bear
zebra
giraffe
backpack
umbrella
handbag
tie
suitcase
frisbee
skis
snowboard
sports ball
kite
baseball bat
baseball glove
skateboard
surfboard
tennis racket
bottle
wine glass
cup
fork
knife
spoon
bowl
banana
apple
sandwich
orange
broccoli
carrot
hot dog
pizza
donut
cake
chair
sofa
pottedplant
bed
diningtable
toilet
tvmonitor
laptop
mouse
remote
keyboard
cell phone
microwave
oven
toaster
sink
refrigerator
book
clock
vase
scissors
teddy bear
hair drier
toothbrush
```
#### yolov2.cfg
```c
[net]
# Testing
batch=1
subdivisions=1
# Training
# batch=64
# subdivisions=8
width=608
height=608
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1
learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1
[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky
[maxpool]
size=2
stride=2
...
[region]
anchors = 0.57273, 0.677385, 1.87446, 2.06253, 3.33843, 5.47434, 7.88282, 3.52778, 9.77052, 9.16828
bias_match=1
classes=80
coords=4
num=5
softmax=1
jitter=.3
rescore=1
object_scale=5
noobject_scale=1
class_scale=1
coord_scale=1
absolute=1
thresh = .6
random=1
```
## test printscreen
liaoyu@liaoyu:~/darknet$ ./darknet detector test cfg/coco.data cfg/yolov2.cfg yolov2.weights data/person.jpg
```
layer filters size input output
0 conv 32 3 x 3 / 1 608 x 608 x 3 -> 608 x 608 x 32 0.639 BFLOPs
1 max 2 x 2 / 2 608 x 608 x 32 -> 304 x 304 x 32
2 conv 64 3 x 3 / 1 304 x 304 x 32 -> 304 x 304 x 64 3.407 BFLOPs
3 max 2 x 2 / 2 304 x 304 x 64 -> 152 x 152 x 64
4 conv 128 3 x 3 / 1 152 x 152 x 64 -> 152 x 152 x 128 3.407 BFLOPs
5 conv 64 1 x 1 / 1 152 x 152 x 128 -> 152 x 152 x 64 0.379 BFLOPs
6 conv 128 3 x 3 / 1 152 x 152 x 64 -> 152 x 152 x 128 3.407 BFLOPs
7 max 2 x 2 / 2 152 x 152 x 128 -> 76 x 76 x 128
8 conv 256 3 x 3 / 1 76 x 76 x 128 -> 76 x 76 x 256 3.407 BFLOPs
9 conv 128 1 x 1 / 1 76 x 76 x 256 -> 76 x 76 x 128 0.379 BFLOPs
10 conv 256 3 x 3 / 1 76 x 76 x 128 -> 76 x 76 x 256 3.407 BFLOPs
11 max 2 x 2 / 2 76 x 76 x 256 -> 38 x 38 x 256
12 conv 512 3 x 3 / 1 38 x 38 x 256 -> 38 x 38 x 512 3.407 BFLOPs
13 conv 256 1 x 1 / 1 38 x 38 x 512 -> 38 x 38 x 256 0.379 BFLOPs
14 conv 512 3 x 3 / 1 38 x 38 x 256 -> 38 x 38 x 512 3.407 BFLOPs
15 conv 256 1 x 1 / 1 38 x 38 x 512 -> 38 x 38 x 256 0.379 BFLOPs
16 conv 512 3 x 3 / 1 38 x 38 x 256 -> 38 x 38 x 512 3.407 BFLOPs
17 max 2 x 2 / 2 38 x 38 x 512 -> 19 x 19 x 512
18 conv 1024 3 x 3 / 1 19 x 19 x 512 -> 19 x 19 x1024 3.407 BFLOPs
19 conv 512 1 x 1 / 1 19 x 19 x1024 -> 19 x 19 x 512 0.379 BFLOPs
20 conv 1024 3 x 3 / 1 19 x 19 x 512 -> 19 x 19 x1024 3.407 BFLOPs
21 conv 512 1 x 1 / 1 19 x 19 x1024 -> 19 x 19 x 512 0.379 BFLOPs
22 conv 1024 3 x 3 / 1 19 x 19 x 512 -> 19 x 19 x1024 3.407 BFLOPs
23 conv 1024 3 x 3 / 1 19 x 19 x1024 -> 19 x 19 x1024 6.814 BFLOPs
24 conv 1024 3 x 3 / 1 19 x 19 x1024 -> 19 x 19 x1024 6.814 BFLOPs
25 route 16
26 conv 64 1 x 1 / 1 38 x 38 x 512 -> 38 x 38 x 64 0.095 BFLOPs
27 reorg / 2 38 x 38 x 64 -> 19 x 19 x 256
28 route 27 24
29 conv 1024 3 x 3 / 1 19 x 19 x1280 -> 19 x 19 x1024 8.517 BFLOPs
30 conv 425 1 x 1 / 1 19 x 19 x1024 -> 19 x 19 x 425 0.314 BFLOPs
31 detection
mask_scale: Using default '1.000000'
Loading weights from yolov2.weights...Done!
data/person.jpg: Predicted in 0.026165 seconds.
horse: 82%
dog: 86%
person: 86%
```
![Yolov2_1](./img/Yolov2_1.png)
## debug line by line
源码解析笔记:/media/liaoyu/mix/jiangwenxiang/note/darknet_notes_Jiang/
#### argc argv
```c
(gdb) set args detector test cfg/coco.data cfg/yolov2.cfg yolov2.weights data/person.jpg
>>./examples/darknet.c:405
// main()
gpu_index=0;
417if(gpu_index>=0)
{
cuda_set_device(0);
}
>>./src/cuda.c
19 int cuda_get_device()
20 {
21 int n = 0;
22 cudaError_t status = cudaGetDevice(&n);
// 这里的status=cudaSuccess
// cudaGetDevice(0)
__host__ __device__ cudaError_t cudaGetDevice ( int* device )
Returns which device is currently being used.
Parameters: device
Returns the device on which the active host thread executes the device code.
Returns cudaSuccess, cudaErrorInvalidValue
23 check_error(status);
24 return n;
25 }
>>run_detector(argc,argv)
>>./examples/detector.c
void run_detector(int argc, char **argv)
{
char *prefix = find_char_arg(argc, argv, "-prefix", 0);
float thresh = find_float_arg(argc, argv, "-thresh", .5);
float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
int cam_index = find_int_arg(argc, argv, "-c", 0);
int frame_skip = find_int_arg(argc, argv, "-s", 0);
int avg = find_int_arg(argc, argv, "-avg", 3);
if(argc < 4){
fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
return;
}
char *gpu_list = find_char_arg(argc, argv, "-gpus", 0);
char *outfile = find_char_arg(argc, argv, "-out", 0);
int *gpus = 0;
int gpu = 0;
int ngpus = 0;
if(gpu_list){
printf("%s\n", gpu_list);
int len = strlen(gpu_list);
ngpus = 1;
int i;
for(i = 0; i < len; ++i){
if (gpu_list[i] == ',') ++ngpus;
}
gpus = calloc(ngpus, sizeof(int));
for(i = 0; i < ngpus; ++i){
gpus[i] = atoi(gpu_list);
gpu_list = strchr(gpu_list, ',')+1;
}
} else {
gpu = gpu_index;
gpus = &gpu;
ngpus = 1;
}
int clear = find_arg(argc, argv, "-clear");
int fullscreen = find_arg(argc, argv, "-fullscreen");
int width = find_int_arg(argc, argv, "-w", 0);
int height = find_int_arg(argc, argv, "-h", 0);
int fps = find_int_arg(argc, argv, "-fps", 0);
//int class = find_int_arg(argc, argv, "-class", 0);
char *datacfg = argv[3];
char *cfg = argv[4]; //.name cfg weight filename要按照顺序写,这里不是写的find_char_arg
char *weights = (argc > 5) ? argv[5] : 0;
char *filename = (argc > 6) ? argv[6]: 0;
if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear);
else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "valid2")) validate_detector_flip(datacfg, cfg, weights, outfile);
else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
else if(0==strcmp(argv[2], "demo")) {
list *options = read_data_cfg(datacfg);
int classes = option_find_int(options, "classes", 20);
char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list);
demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, avg, hier_thresh, width, height, fps, fullscreen);
}
//else if(0==strcmp(argv[2], "extract")) extract_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
//else if(0==strcmp(argv[2], "censor")) censor_detector(datacfg, cfg, weights, cam_index, filename, class, thresh, frame_skip);
```
#### coco.data解析
这里的list *read_data_cfg(char *filename);解析参数
![](./img/darknet_1.jpg)
```c
>>
test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, outfile, fullscreen);
test_detector (datacfg=0x7fffffffe0ea "cfg/coco.data", cfgfile=0x7fffffffe0f8 "cfg/yolov2.cfg",
weightfile=0x7fffffffe107 "yolov2.weights", filename=filename@entry=0x7fffffffe116 "data/person.jpg",
thresh=thresh@entry=0.5, hier_thresh=hier_thresh@entry=0.5, outfile=outfile@entry=0x0,
fullscreen=fullscreen@entry=0) at ./examples/detector.c:563
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, char *outfile, int fullscreen)
{
list *read_data_cfg(char *filename);//读取cfg/voc.data cfg/coco.data
/*
这里是根据 list->node->kvp的链表存储的coco.data的数据
>>option_list.c
list *read_data_cfg(char *filename)
{
FILE *file = fopen(filename, "r");
if(file == 0) file_error(filename); //如果文件打开失败则返回一个空指针
char *line;
int nu = 0;
list *options = make_list();
while((line=fgetl(file)) != 0){
++ nu;
strip(line); //去掉coco.data 每一行中的空格键 \t \n
(gdb) p line
$15 = 0x8ed570 "classes= 80"
(gdb) p line
$16 = 0x8ed570 "classes=80"
switch(line[0]){
case '\0':
case '#':
case ';':
free(line);
break;
default:
if(!read_option(line, options)) //读取每一行的key和value
{
fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line);
free(line);
}
break;
}
}
fclose(file);
return options;
}
>>option_list.c
>>read_option(line, options)
int read_option(char *s, list *options)
{
size_t i;
size_t len = strlen(s);
char *val = 0;
for(i = 0; i < len; ++i){
if(s[i] == '='){
s[i] = '\0';
val = s+i+1;
break;
}
}
if(i == len-1) return 0;
char *key = s;
option_insert(options, key, val); //key=classes val=80
return 1;
}
>>option_list.c
>>option_insert(options, key, val)
void option_insert(list *l, char *key, char *val)
{
kvp *p = malloc(sizeof(kvp));
p->key = key;
p->val = val;
p->used = 0;
list_insert(l, p);
}
这里的
typedef struct{
char *key;
char *val;
int used;
} kvp;
>> list.c
void list_insert(list *l, void *val)
{
node *new = malloc(sizeof(node));
new->val = val;
new->next = 0;
if(!l->back){
l->front = new;
new->prev = 0;
}else{
l->back->next = new;
new->prev = l->back;
}
l->back = new;
++l->size;
}
*/
char *name_list = option_find_str(options, "names", "data/names.list");
char **names = get_labels(name_list);
/*
565 char *name_list = option_find_str(options, "names", "data/names.list");
(gdb) n
566 char **names = get_labels(name_list);
(gdb) p name_list
$30 = 0x8eec36 "data/coco.names"
(gdb) p name
$34 = '\000'
(gdb) p names
$35 = (char **) 0x8ef320
(gdb) p names[1]
$36 = 0x8ed990 "bicycle"
(gdb) p names[2]
$37 = 0x8edbc0 "car"
*/
image **alphabet = load_alphabet();
/*
load 字母表从ASIC 码的32的空格一直到126的波浪号 每个符号八个大小
符号在./data/labels/里面
*/
network *net = load_network(cfgfile, weightfile, 0);
```
#### network结构体
```c
network结构体
等下打出来看一下参数都是哪些
typedef struct network{
int n;
int batch;
size_t *seen;
int *t;
float epoch;
int subdivisions;
layer *layers; //layer
float *output;
learning_rate_policy policy;
float learning_rate;
float momentum;
float decay;
float gamma;
float scale;
float power;
int time_steps;
int step;
int max_batches;
float *scales;
int *steps;
int num_steps;
int burn_in;
int adam;
float B1;
float B2;
float eps;
int inputs;
int outputs;
int truths;
int notruth;
int h, w, c;
int max_crop;
int min_crop;
float max_ratio;
float min_ratio;
int center;
float angle;
float aspect;
float exposure;
float saturation;
float hue;
int random;
int gpu_index;
tree *hierarchy;
float *input;
float *truth;
float *delta;
float *workspace;
int train;
int index;
float *cost;
float clip;
#ifdef GPU
float *input_gpu;
float *truth_gpu;
float *delta_gpu;
float *output_gpu;
#endif
} network;
```
![](./img/yolov2_cfg.png)
```c
>>>(gdb) ./src/network.c:54
load_network (cfg=cfg@entry=0x7fffffffe0f8 "cfg/yolov2.cfg", weights=0x7fffffffe107 "yolov2.weights",
clear=clear@entry=0) at ./src/network.c:54
network * load_network(cfgfile, weightfile, 0);
{
55 network *net = parse_network_cfg(cfg);
56 if(weights && weights[0] != 0){
57 load_weights(net, weights);
58 }
(gdb)
59 if(clear) (*net->seen) = 0; //不在屏幕上显示
60 return net;
}
先看下
55 network *net = parse_network_cfg(cfg);
>>>(gdb) ./src/parser.c:743
parse_network_cfg (filename=filename@entry=0x7fffffffe0f8 "cfg/yolov2.cfg") at ./src/parser.c:743
network *parse_network_cfg(char *filename)
{
list *sections = read_cfg(filename);
//***********************************************//
// 看上面的图就知道了
node *n = sections->front;
if(!n) error("Config file has no sections");
network *net = make_network(sections->size - 1);
net->gpu_index = gpu_index;
size_params params;
section *s = (section *)n->val;
list *options = s->options;
if(!is_network(s)) error("First section must be [net] or [network]");
parse_net_options(options, net);
params.h = net->h;
params.w = net->w;
params.c = net->c;
params.inputs = net->inputs;
params.batch = net->batch;
params.time_steps = net->time_steps;
params.net = net;
size_t workspace_size = 0;
n = n->next;
int count = 0;
free_section(s);
fprintf(stderr, "layer filters size input output\n");
while(n){
params.index = count;
fprintf(stderr, "%5d ", count);
s = (section *)n->val;
options = s->options;
layer l = {0};
LAYER_TYPE lt = string_to_layer_type(s->type);
if(lt == CONVOLUTIONAL){
l = parse_convolutional(options, params);
}else if(lt == DECONVOLUTIONAL){
l = parse_deconvolutional(options, params);
}else if(lt == LOCAL){
l = parse_local(options, params);
}else if(lt == ACTIVE){
l = parse_activation(options, params);
}else if(lt == LOGXENT){
l = parse_logistic(options, params);
}else if(lt == L2NORM){
l = parse_l2norm(options, params);
}else if(lt == RNN){
l = parse_rnn(options, params);
}else if(lt == GRU){
l = parse_gru(options, params);
}else if (lt == LSTM) {
l = parse_lstm(options, params);
}else if(lt == CRNN){
l = parse_crnn(options, params);
}else if(lt == CONNECTED){
l = parse_connected(options, params);
}else if(lt == CROP){
l = parse_crop(options, params);
}else if(lt == COST){
l = parse_cost(options, params);
}else if(lt == REGION){
l = parse_region(options, params);
}else if(lt == YOLO){
l = parse_yolo(options, params);
}else if(lt == ISEG){
l = parse_iseg(options, params);
}else if(lt == DETECTION){
l = parse_detection(options, params);
}else if(lt == SOFTMAX){
l = parse_softmax(options, params);
net->hierarchy = l.softmax_tree;
}else if(lt == NORMALIZATION){
l = parse_normalization(options, params);
}else if(lt == BATCHNORM){
l = parse_batchnorm(options, params);
}else if(lt == MAXPOOL){
l = parse_maxpool(options, params);
}else if(lt == REORG){
l = parse_reorg(options, params);
}else if(lt == AVGPOOL){
l = parse_avgpool(options, params);
}else if(lt == ROUTE){
l = parse_route(options, params, net);
}else if(lt == UPSAMPLE){
l = parse_upsample(options, params, net);
}else if(lt == SHORTCUT){
l = parse_shortcut(options, params, net);
}else if(lt == DROPOUT){
l = parse_dropout(options, params);
l.output = net->layers[count-1].output;
l.delta = net->layers[count-1].delta;
#ifdef GPU
l.output_gpu = net->layers[count-1].output_gpu;
l.delta_gpu = net->layers[count-1].delta_gpu;
#endif
}else{
fprintf(stderr, "Type not recognized: %s\n", s->type);
}
l.clip = net->clip;
l.truth = option_find_int_quiet(options, "truth", 0);
l.onlyforward = option_find_int_quiet(options, "onlyforward", 0);
l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
l.dontsave = option_find_int_quiet(options, "dontsave", 0);
l.dontload = option_find_int_quiet(options, "dontload", 0);
l.numload = option_find_int_quiet(options, "numload", 0);
l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
l.learning_rate_scale = option_find_float_quiet(options, "learning_rate", 1);
l.smooth = option_find_float_quiet(options, "smooth", 0);
option_unused(options);
net->layers[count] = l;
if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
free_section(s);
n = n->next;
++count;
if(n){
params.h = l.out_h;
params.w = l.out_w;
params.c = l.out_c;
params.inputs = l.outputs;
}
}
free_list(sections);
layer out = get_network_output_layer(net);
net->outputs = out.outputs;
net->truths = out.outputs;
if(net->layers[net->n-1].truths) net->truths = net->layers[net->n-1].truths;
net->output = out.output;
net->input = calloc(net->inputs*net->batch, sizeof(float));
net->truth = calloc(net->truths*net->batch, sizeof(float));
#ifdef GPU
net->output_gpu = out.output_gpu;
net->input_gpu = cuda_make_array(net->input, net->inputs*net->batch);
net->truth_gpu = cuda_make_array(net->truth, net->truths*net->batch);
#endif
if(workspace_size){
//printf("%ld\n", workspace_size);
#ifdef GPU
if(gpu_index >= 0){
net->workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
}else {
net->workspace = calloc(1, workspace_size);
}
#else
net->workspace = calloc(1, workspace_size);
#endif
}
return net;
}
```
```c
list *read_cfg(char *filename)
{
FILE *file = fopen(filename, "r");
if(file == 0) file_error(filename);
char *line;
int nu = 0;
list *options = make_list();
section *current = 0;
while((line=fgetl(file)) != 0){
++ nu;
strip(line);
switch(line[0]){
case '[':
current = malloc(sizeof(section));
list_insert(options, current);
current->options = make_list();
current->type = line;
break;
case '\0':
case '#':
case ';':
free(line);
break;
default:
if(!read_option(line, current->options)){
fprintf(stderr, "Config file error line %d, could parse: %s\n", nu, line);
free(line);
}
break;
}
}
fclose(file);
return options;
}
```
#### 读图
```c
set_batch_network(net, 1);
srand(2222222);
double time;
char buff[256];
char *input = buff;
float nms=.45;
while(1){
if(filename){
strncpy(input, filename, 256);
} else {
printf("Enter Image Path: ");
fflush(stdout);
input = fgets(input, 256, stdin);
if(!input) return;
strtok(input, "\n");
}
image im = load_image_color(input,0,0);
image sized = letterbox_image(im, net->w, net->h);
//image sized = resize_image(im, net->w, net->h);
//image sized2 = resize_max(im, net->w);
//image sized = crop_image(sized2, -((net->w - sized2.w)/2), -((net->h - sized2.h)/2), net->w, net->h);
//resize_network(net, sized.w, sized.h);
layer l = net->layers[net->n-1];
float *X = sized.data;
time=what_time_is_it_now();
network_predict(net, X);
printf("%s: Predicted in %f seconds.\n", input, what_time_is_it_now()-time);
int nboxes = 0;
detection *dets = get_network_boxes(net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes);
//printf("%d\n", nboxes);
//if (nms) do_nms_obj(boxes, probs, l.w*l.h*l.n, l.classes, nms);
if (nms) do_nms_sort(dets, nboxes, l.classes, nms);
draw_detections(im, dets, nboxes, thresh, names, alphabet, l.classes);
free_detections(dets, nboxes);
if(outfile){
save_image(im, outfile);
}
else{
save_image(im, "predictions");
#ifdef OPENCV
cvNamedWindow("predictions", CV_WINDOW_NORMAL);
if(fullscreen){
cvSetWindowProperty("predictions", CV_WND_PROP_FULLSCREEN, CV_WINDOW_FULLSCREEN);
}
show_image(im, "predictions", 0);
#endif
}
free_image(im);
free_image(sized);
if (filename) break;
}
}
```