写这个是因为有童鞋在跑VGG的时候遇到各种问题,供参考一下。
网络结构
以VGG16为例,自己跑的细胞数据
solver.prototxt:
net: "/media/dl/source/Experiment/cell/test/vgg/vgg16.prototxt" test_iter: 42 test_interval: 1000 base_lr: 0.0001 lr_policy: "step" gamma: 0.1 stepsize: 100000 display: 200 max_iter: 200000 momentum: 0.9 weight_decay: 0.0005 snapshot: 100000 snapshot_prefix: "/media/dl/source/Experiment/cell/test/vgg/vgg" solver_mode: GPU
vgg16.prototxt:
注意,这里的数据层我是用的“ImageData”格式,也就是没有转为LMDB,直接导入图片进去的,因为我用的服务器,为了方便。如果为了更高效,还是使用LMDB数据库的形式。使用LMDB数据库形式的数据层我也写了下,放在这个prototxt后面作为补充。
另外,注意修改最后一个全连接层的num_output为自己的类别数。并修改该层的名字,如我改为了“cellfc8”,是为了finetune vgg时重新训练该层,不使用该层的预训练参数。
1 name: "VGG16" 2 layer { 3 name: "data" 4 type: "ImageData" 5 top: "data" 6 top: "label" 7 include { 8 phase: TRAIN 9 } 10 # transform_param { 11 # mirror: true 12 # crop_size: 224 13 # mean_file: "data/ilsvrc12_shrt_256/imagenet_mean.binaryproto" 14 # } 15 16 image_data_param { 17 source: "/media/dl/source/Experiment/cell/data/trainnew2_resize/trainnew.txt" 18 batch_size: 20 19 shuffle:true 20 #is_color: false 21 new_height: 224 22 new_width: 224 23 } 24 } 25 layer { 26 name: "data" 27 type: "ImageData" 28 top: "data" 29 top: "label" 30 include { 31 phase: TEST 32 } 33 # transform_param { 34 # mirror: false 35 # crop_size: 224 36 # mean_file: "data/ilsvrc12_shrt_256/imagenet_mean.binaryproto" 37 # } 38 39 image_data_param { 40 source: "/media/dl/source/Experiment/cell/data/val2_resize/valnew.txt" 41 batch_size: 50 42 #is_color: false 43 new_height: 224 44 new_width: 224 45 } 46 } 47 layer { 48 bottom: "data" 49 top: "conv1_1" 50 name: "conv1_1" 51 type: "Convolution" 52 param { 53 lr_mult: 1 54 decay_mult: 1 55 } 56 param { 57 lr_mult: 2 58 decay_mult: 0 59 } 60 convolution_param { 61 num_output: 64 62 pad: 1 63 kernel_size: 3 64 weight_filler { 65 type: "gaussian" 66 std: 0.01 67 } 68 bias_filler { 69 type: "constant" 70 value: 0 71 } 72 } 73 } 74 layer { 75 bottom: "conv1_1" 76 top: "conv1_1" 77 name: "relu1_1" 78 type: "ReLU" 79 } 80 layer { 81 bottom: "conv1_1" 82 top: "conv1_2" 83 name: "conv1_2" 84 type: "Convolution" 85 param { 86 lr_mult: 1 87 decay_mult: 1 88 } 89 param { 90 lr_mult: 2 91 decay_mult: 0 92 } 93 convolution_param { 94 num_output: 64 95 pad: 1 96 kernel_size: 3 97 weight_filler { 98 type: "gaussian" 99 std: 0.01 100 } 101 bias_filler { 102 type: "constant" 103 value: 0 104 } 105 } 106 } 107 layer { 108 bottom: "conv1_2" 109 top: "conv1_2" 110 name: "relu1_2" 111 type: "ReLU" 112 } 113 layer { 114 bottom: "conv1_2" 115 top: "pool1" 116 name: "pool1" 117 type: "Pooling" 118 pooling_param { 119 pool: MAX 120 kernel_size: 2 121 stride: 2 122 } 123 } 124 layer { 125 bottom: "pool1" 126 top: "conv2_1" 127 name: "conv2_1" 128 type: "Convolution" 129 param { 130 lr_mult: 1 131 decay_mult: 1 132 } 133 param { 134 lr_mult: 2 135 decay_mult: 0 136 } 137 convolution_param { 138 num_output: 128 139 pad: 1 140 kernel_size: 3 141 weight_filler { 142 type: "gaussian" 143 std: 0.01 144 } 145 bias_filler { 146 type: "constant" 147 value: 0 148 } 149 } 150 } 151 layer { 152 bottom: "conv2_1" 153 top: "conv2_1" 154 name: "relu2_1" 155 type: "ReLU" 156 } 157 layer { 158 bottom: "conv2_1" 159 top: "conv2_2" 160 name: "conv2_2" 161 type: "Convolution" 162 param { 163 lr_mult: 1 164 decay_mult: 1 165 } 166 param { 167 lr_mult: 2 168 decay_mult: 0 169 } 170 convolution_param { 171 num_output: 128 172 pad: 1 173 kernel_size: 3 174 weight_filler { 175 type: "gaussian" 176 std: 0.01 177 } 178 bias_filler { 179 type: "constant" 180 value: 0 181 } 182 } 183 } 184 layer { 185 bottom: "conv2_2" 186 top: "conv2_2" 187 name: "relu2_2" 188 type: "ReLU" 189 } 190 layer { 191 bottom: "conv2_2" 192 top: "pool2" 193 name: "pool2" 194 type: "Pooling" 195 pooling_param { 196 pool: MAX 197 kernel_size: 2 198 stride: 2 199 } 200 } 201 layer { 202 bottom: "pool2" 203 top: "conv3_1" 204 name: "conv3_1" 205 type: "Convolution" 206 param { 207 lr_mult: 1 208 decay_mult: 1 209 } 210 param { 211 lr_mult: 2 212 decay_mult: 0 213 } 214 convolution_param { 215 num_output: 256 216 pad: 1 217 kernel_size: 3 218 weight_filler { 219 type: "gaussian" 220 std: 0.01 221 } 222 bias_filler { 223 type: "constant" 224 value: 0 225 } 226 } 227 } 228 layer { 229 bottom: "conv3_1" 230 top: "conv3_1" 231 name: "relu3_1" 232 type: "ReLU" 233 } 234 layer { 235 bottom: "conv3_1" 236 top: "conv3_2" 237 name: "conv3_2" 238 type: "Convolution" 239 param { 240 lr_mult: 1 241 decay_mult: 1 242 } 243 param { 244 lr_mult: 2 245 decay_mult: 0 246 } 247 convolution_param { 248 num_output: 256 249 pad: 1 250 kernel_size: 3 251 weight_filler { 252 type: "gaussian" 253 std: 0.01 254 } 255 bias_filler { 256 type: "constant" 257 value: 0 258 } 259 } 260 } 261 layer { 262 bottom: "conv3_2" 263 top: "conv3_2" 264 name: "relu3_2" 265 type: "ReLU" 266 } 267 layer { 268 bottom: "conv3_2" 269 top: "conv3_3" 270 name: "conv3_3" 271 type: "Convolution" 272 param { 273 lr_mult: 1 274 decay_mult: 1 275 } 276 param { 277 lr_mult: 2 278 decay_mult: 0 279 } 280 convolution_param { 281 num_output: 256 282 pad: 1 283 kernel_size: 3 284 weight_filler { 285 type: "gaussian" 286 std: 0.01 287 } 288 bias_filler { 289 type: "constant" 290 value: 0 291 } 292 } 293 } 294 layer { 295 bottom: "conv3_3" 296 top: "conv3_3" 297 name: "relu3_3" 298 type: "ReLU" 299 } 300 layer { 301 bottom: "conv3_3" 302 top: "pool3" 303 name: "pool3" 304 type: "Pooling" 305 pooling_param { 306 pool: MAX 307 kernel_size: 2 308 stride: 2 309 } 310 } 311 layer { 312 bottom: "pool3" 313 top: "conv4_1" 314 name: "conv4_1" 315 type: "Convolution" 316 param { 317 lr_mult: 1 318 decay_mult: 1 319 } 320 param { 321 lr_mult: 2 322 decay_mult: 0 323 } 324 convolution_param { 325 num_output: 512 326 pad: 1 327 kernel_size: 3 328 weight_filler { 329 type: "gaussian" 330 std: 0.01 331 } 332 bias_filler { 333 type: "constant" 334 value: 0 335 } 336 } 337 } 338 layer { 339 bottom: "conv4_1" 340 top: "conv4_1" 341 name: "relu4_1" 342 type: "ReLU" 343 } 344 layer { 345 bottom: "conv4_1" 346 top: "conv4_2" 347 name: "conv4_2" 348 type: "Convolution" 349 param { 350 lr_mult: 1 351 decay_mult: 1 352 } 353 param { 354 lr_mult: 2 355 decay_mult: 0 356 } 357 convolution_param { 358 num_output: 512 359 pad: 1 360 kernel_size: 3 361 weight_filler { 362 type: "gaussian" 363 std: 0.01 364 } 365 bias_filler { 366 type: "constant" 367 value: 0 368 } 369 } 370 } 371 layer { 372 bottom: "conv4_2" 373 top: "conv4_2" 374 name: "relu4_2" 375 type: "ReLU" 376 } 377 layer { 378 bottom: "conv4_2" 379 top: "conv4_3" 380 name: "conv4_3" 381 type: "Convolution" 382 param { 383 lr_mult: 1 384 decay_mult: 1 385 } 386 param { 387 lr_mult: 2 388 decay_mult: 0 389 } 390 convolution_param { 391 num_output: 512 392 pad: 1 393 kernel_size: 3 394 weight_filler { 395 type: "gaussian" 396 std: 0.01 397 } 398 bias_filler { 399 type: "constant" 400 value: 0 401 } 402 } 403 } 404 layer { 405 bottom: "conv4_3" 406 top: "conv4_3" 407 name: "relu4_3" 408 type: "ReLU" 409 } 410 layer { 411 bottom: "conv4_3" 412 top: "pool4" 413 name: "pool4" 414 type: "Pooling" 415 pooling_param { 416 pool: MAX 417 kernel_size: 2 418 stride: 2 419 } 420 } 421 layer { 422 bottom: "pool4" 423 top: "conv5_1" 424 name: "conv5_1" 425 type: "Convolution" 426 param { 427 lr_mult: 1 428 decay_mult: 1 429 } 430 param { 431 lr_mult: 2 432 decay_mult: 0 433 } 434 convolution_param { 435 num_output: 512 436 pad: 1 437 kernel_size: 3 438 weight_filler { 439 type: "gaussian" 440 std: 0.01 441 } 442 bias_filler { 443 type: "constant" 444 value: 0 445 } 446 } 447 } 448 layer { 449 bottom: "conv5_1" 450 top: "conv5_1" 451 name: "relu5_1" 452 type: "ReLU" 453 } 454 layer { 455 bottom: "conv5_1" 456 top: "conv5_2" 457 name: "conv5_2" 458 type: "Convolution" 459 param { 460 lr_mult: 1 461 decay_mult: 1 462 } 463 param { 464 lr_mult: 2 465 decay_mult: 0 466 } 467 convolution_param { 468 num_output: 512 469 pad: 1 470 kernel_size: 3 471 weight_filler { 472 type: "gaussian" 473 std: 0.01 474 } 475 bias_filler { 476 type: "constant" 477 value: 0 478 } 479 } 480 } 481 layer { 482 bottom: "conv5_2" 483 top: "conv5_2" 484 name: "relu5_2" 485 type: "ReLU" 486 } 487 layer { 488 bottom: "conv5_2" 489 top: "conv5_3" 490 name: "conv5_3" 491 type: "Convolution" 492 param { 493 lr_mult: 1 494 decay_mult: 1 495 } 496 param { 497 lr_mult: 2 498 decay_mult: 0 499 } 500 convolution_param { 501 num_output: 512 502 pad: 1 503 kernel_size: 3 504 weight_filler { 505 type: "gaussian" 506 std: 0.01 507 } 508 bias_filler { 509 type: "constant" 510 value: 0 511 } 512 } 513 } 514 layer { 515 bottom: "conv5_3" 516 top: "conv5_3" 517 name: "relu5_3" 518 type: "ReLU" 519 } 520 layer { 521 bottom: "conv5_3" 522 top: "pool5" 523 name: "pool5" 524 type: "Pooling" 525 pooling_param { 526 pool: MAX 527 kernel_size: 2 528 stride: 2 529 } 530 } 531 layer { 532 bottom: "pool5" 533 top: "fc6" 534 name: "fc6" 535 type: "InnerProduct" 536 param { 537 lr_mult: 1 538 decay_mult: 1 539 } 540 param { 541 lr_mult: 2 542 decay_mult: 0 543 } 544 inner_product_param { 545 num_output: 4096 546 weight_filler { 547 type: "gaussian" 548 std: 0.005 549 } 550 bias_filler { 551 type: "constant" 552 value: 0.1 553 } 554 } 555 } 556 layer { 557 bottom: "fc6" 558 top: "fc6" 559 name: "relu6" 560 type: "ReLU" 561 } 562 layer { 563 bottom: "fc6" 564 top: "fc6" 565 name: "drop6" 566 type: "Dropout" 567 dropout_param { 568 dropout_ratio: 0.5 569 } 570 } 571 layer { 572 bottom: "fc6" 573 top: "fc7" 574 name: "fc7" 575 type: "InnerProduct" 576 param { 577 lr_mult: 1 578 decay_mult: 1 579 } 580 param { 581 lr_mult: 2 582 decay_mult: 0 583 } 584 inner_product_param { 585 num_output: 4096 586 weight_filler { 587 type: "gaussian" 588 std: 0.005 589 } 590 bias_filler { 591 type: "constant" 592 value: 0.1 593 } 594 } 595 } 596 layer { 597 bottom: "fc7" 598 top: "fc7" 599 name: "relu7" 600 type: "ReLU" 601 } 602 layer { 603 bottom: "fc7" 604 top: "fc7" 605 name: "drop7" 606 type: "Dropout" 607 dropout_param { 608 dropout_ratio: 0.5 609 } 610 } 611 layer { 612 bottom: "fc7" 613 top: "fc8" 614 name: "cellfc8" 615 type: "InnerProduct" 616 param { 617 lr_mult: 1 618 decay_mult: 1 619 } 620 param { 621 lr_mult: 2 622 decay_mult: 0 623 } 624 inner_product_param { 625 num_output: 7 #改为自己的类别数 626 weight_filler { 627 type: "gaussian" 628 std: 0.005 629 } 630 bias_filler { 631 type: "constant" 632 value: 0.1 633 } 634 } 635 } 636 layer { 637 name: "accuracy_at_1" 638 type: "Accuracy" 639 bottom: "fc8" 640 bottom: "label" 641 top: "accuracy_at_1" 642 accuracy_param { 643 top_k: 1 644 } 645 include { 646 phase: TEST 647 } 648 } 649 layer { 650 name: "accuracy_at_5" 651 type: "Accuracy" 652 bottom: "fc8" 653 bottom: "label" 654 top: "accuracy_at_5" 655 accuracy_param { 656 top_k: 5 657 } 658 include { 659 phase: TEST 660 } 661 } 662 layer { 663 bottom: "fc8" 664 bottom: "label" 665 top: "loss" 666 name: "loss" 667 type: "SoftmaxWithLoss" 668 }
如果使用LMDB数据库形式,将前面的数据层改为:
1 name: "vgg" 2 layer { 3 name: "data" 4 type: "Data" 5 top: "data" 6 top: "label" 7 include { 8 phase: TRAIN 9 } 10 transform_param { 11 mirror: true 12 crop_size: 224 13 #如果图片大于224,则使用crop的方式,小于则使用下面的new_height和new_width 14 # new_height: 224 15 #new_width: 224 16 mean_file: "vggface/face_mean.binaryproto" 17 } 18 data_param { 19 source: "vggface/face_train_lmdb" 20 batch_size: 20 21 backend: LMDB 22 } 23 } 24 layer { 25 name: "data" 26 type: "Data" 27 top: "data" 28 top: "label" 29 include { 30 phase: TEST 31 } 32 transform_param { 33 mirror: false 34 crop_size: 224 35 #如果图片大于224,则使用crop的方式,小于则使用下面的new_height和new_width 36 # new_height: 224 37 #new_width: 224 38 mean_file: "vggface/face_mean.binaryproto" 39 } 40 data_param { 41 source: "vggface/face_val_lmdb" 42 batch_size: 20 43 backend: LMDB 44 } 45 }
训练
放一个shell命令:
#!/usr/bin/env sh TOOLS=/home/dl/caffe-jonlong/build/tools $TOOLS/caffe train \ -solver=/media/dl/source/Experiment/cell/test/vgg/solver.prototxt \ -weights=/media/dl/source/Experiment/cell/test/vgg/VGG_ILSVRC_16_layers.caffemodel \ -gpu=all \
预训练模型VGG_ILSVRC_16_layers.caffemodel的下载地址为
http://www.robots.ox.ac.uk/~vgg/software/very_deep/caffe/VGG_ILSVRC_16_layers.caffemodel