



2.1 利用imagenet数据生成lmdb,采用create_imagenet.sh生成,内容如下:
#!/usr/bin/env sh
# Create the imagenet lmdb inputs
# N.B. set the path to the imagenet train + val data dirs
set -e



# Set RESIZE=true to resize the images to 256x256. Leave as false if images have
# already been resized using another tool.
if $RESIZE; then

if [ ! -d "$TRAIN_DATA_ROOT" ]; then
  echo "Error: TRAIN_DATA_ROOT is not a path to a directory: $TRAIN_DATA_ROOT"
  echo "Set the TRAIN_DATA_ROOT variable in create_imagenet.sh to the path" \
       "where the ImageNet training data is stored."
  exit 1

echo "Creating train lmdb..."

GLOG_logtostderr=1 $TOOLS/convert_imageset \
    --resize_height=$RESIZE_HEIGHT \
    --resize_width=$RESIZE_WIDTH \
    --shuffle \
    $DATA/train.txt \

echo "Done."
000001.jpg 0
000002.jpg 1
000003.jpg 2
000004.jpg 3
000005.jpg 4
000006.jpg 5
000007.jpg 6
000008.jpg 7
000009.jpg 8
000010.jpg 9
前面的为 TRAIN_DATA_ROOT下的图片文件名,后面的数字代表其标签label。


2.2 编写solver和prototxt
name: "ResNet-50"

layer {
  name: "imagenet"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  data_param {
    source: "models/resnet/resnet_train_lmdb"         //刚才产生的train的lmdb
    batch_size: 8
    backend: LMDB
layer {
  name: "imagenet"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TEST
  data_param {
    source: "models/resnet/resnet_test_lmdb"          //同理可以产生的test的lmdb
    batch_size: 1
    backend: LMDB


layer {
	bottom: "data"
	top: "conv1"
	name: "conv1"
	type: "Convolution"
	convolution_param {
		num_output: 64
		kernel_size: 7
		pad: 3
		stride: 2

layer {
	bottom: "conv1"
	top: "conv1"
	name: "bn_conv1"
	type: "BatchNorm"
	batch_norm_param {
		use_global_stats: true

layer {
	bottom: "conv1"
	top: "conv1"
	name: "scale_conv1"
	type: "Scale"
	scale_param {
		bias_term: true

layer {
	bottom: "conv1"
	top: "conv1"
	name: "conv1_relu"
	type: "ReLU"

layer {
	bottom: "conv1"
	top: "pool1"
	name: "pool1"
	type: "Pooling"
	pooling_param {
		kernel_size: 3
		stride: 2
		pool: MAX

layer {
	bottom: "pool1"
	top: "res2a_branch1"
	name: "res2a_branch1"
	type: "Convolution"
	convolution_param {
		num_output: 256
		kernel_size: 1
		pad: 0
		stride: 1
		bias_term: false

layer {
	bottom: "res2a_branch1"
	top: "res2a_branch1"
	name: "bn2a_branch1"
	type: "BatchNorm"
	batch_norm_param {
		use_global_stats: true


layer {
	bottom: "res5c_branch2a"
	top: "res5c_branch2a"
	name: "bn5c_branch2a"
	type: "BatchNorm"
	batch_norm_param {
		use_global_stats: true

layer {
	bottom: "res5c_branch2a"
	top: "res5c_branch2a"
	name: "scale5c_branch2a"
	type: "Scale"
	scale_param {
		bias_term: true

layer {
	bottom: "res5c_branch2a"
	top: "res5c_branch2a"
	name: "res5c_branch2a_relu"
	type: "ReLU"

layer {
	bottom: "res5c_branch2a"
	top: "res5c_branch2b"
	name: "res5c_branch2b"
	type: "Convolution"
	convolution_param {
		num_output: 512
		kernel_size: 3
		pad: 1
		stride: 1
		bias_term: false

layer {
	bottom: "res5c_branch2b"
	top: "res5c_branch2b"
	name: "bn5c_branch2b"
	type: "BatchNorm"
	batch_norm_param {
		use_global_stats: true

layer {
	bottom: "res5c_branch2b"
	top: "res5c_branch2b"
	name: "scale5c_branch2b"
	type: "Scale"
	scale_param {
		bias_term: true

layer {
	bottom: "res5c_branch2b"
	top: "res5c_branch2b"
	name: "res5c_branch2b_relu"
	type: "ReLU"

layer {
	bottom: "res5c_branch2b"
	top: "res5c_branch2c"
	name: "res5c_branch2c"
	type: "Convolution"
	convolution_param {
		num_output: 2048
		kernel_size: 1
		pad: 0
		stride: 1
		bias_term: false

layer {
	bottom: "res5c_branch2c"
	top: "res5c_branch2c"
	name: "bn5c_branch2c"
	type: "BatchNorm"
	batch_norm_param {
		use_global_stats: true

layer {
	bottom: "res5c_branch2c"
	top: "res5c_branch2c"
	name: "scale5c_branch2c"
	type: "Scale"
	scale_param {
		bias_term: true

layer {
	bottom: "res5b"
	bottom: "res5c_branch2c"
	top: "res5c"
	name: "res5c"
	type: "Eltwise"

layer {
	bottom: "res5c"
	top: "res5c"
	name: "res5c_relu"
	type: "ReLU"

layer {
	bottom: "res5c"
	top: "pool5"
	name: "pool5"
	type: "Pooling"
	pooling_param {
		kernel_size: 7
		stride: 1
		pool: AVE

layer {
	bottom: "pool5"
	top: "fc1000"
	name: "fc1000"
	type: "InnerProduct"
	inner_product_param {
		num_output: 1000

//loss function

layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "fc1000"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "fc1000"
  bottom: "label"
  top: "loss"

net: "models/resnet/res_pretrain.prototxt"             //上一步中写的网络层次结构
test_iter: 10
test_interval: 10
base_lr: 0.01                                          //基础学习率 learning-rate
lr_policy: "step"                                      //学习策略
gamma: 0.1
stepsize: 100000
display: 20
max_iter: 450000                                       //迭代次数
momentum: 0.9                                          //学习率衰减系数
weight_decay: 0.0005                                   //权重衰减系数,防止过拟合
snapshot: 1000                                         //每1000次迭代保存一次参数中间结果
snapshot_prefix: "models/resnet/resnet_train"
solver_mode: CPU

2.3 进行pretrain训练
 ./build/tools/caffe train --solver=models/resnet/res_pretrain_solver.prototxt




3.1 产生lmdb
     其train.txt文件下不再是图片对应类型,因为有boundingbox的存在, 所以一个图片对应一个xml文件,如下:
VOC2007/JPEGImages/000001.jpg VOC2007/Annotations/000001.xml
VOC2007/JPEGImages/000002.jpg VOC2007/Annotations/000002.xml
VOC2007/JPEGImages/000003.jpg VOC2007/Annotations/000003.xml
VOC2007/JPEGImages/000004.jpg VOC2007/Annotations/000004.xml
VOC2007/JPEGImages/000006.jpg VOC2007/Annotations/000006.xml
VOC2007/JPEGImages/000008.jpg VOC2007/Annotations/000008.xml
VOC2007/JPEGImages/000010.jpg VOC2007/Annotations/000010.xml
VOC2007/JPEGImages/000011.jpg VOC2007/Annotations/000011.xml
VOC2007/JPEGImages/000013.jpg VOC2007/Annotations/000013.xml
VOC2007/JPEGImages/000014.jpg VOC2007/Annotations/000014.xml
cd $root_dir


extra_cmd="--encode-type=jpg --encoded"
if [ $redo ]
  extra_cmd="$extra_cmd --redo"
for subset in test trainval
  python $root_dir/scripts/create_annoset.py --anno-type=$anno_type --label-map-file=$mapfile --min-dim=$min_dim --max-dim=$max_dim 
--resize-width=$width --resize-height=$height --check-label $extra_cmd $data_root_dir $root_dir/data/$dataset_name/$subset.txt 
$data_root_dir/$dataset_name/$db/$dataset_name"_"$subset"_"$db examples/$dataset_name

3.2 编写solver和prototxt

layer {
  name: "data"
  type: "AnnotatedData"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  transform_param {
    mirror: true
    mean_value: 104
    mean_value: 117
    mean_value: 123
    resize_param {
      prob: 1
      resize_mode: WARP
      height: 300
      width: 300
      interp_mode: LINEAR
      interp_mode: AREA
      interp_mode: NEAREST
      interp_mode: CUBIC
      interp_mode: LANCZOS4
    emit_constraint {
      emit_type: CENTER
  data_param {
    source: "models/resnet/ssd_train_lmdb"               //刚才生成的新的lmdb
    batch_size: 32
    backend: LMDB
  annotated_data_param {
    batch_sampler {
      max_sample: 1
      max_trials: 1
    batch_sampler {
      sampler {
        min_scale: 0.3
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      sample_constraint {
        min_jaccard_overlap: 0.1
      max_sample: 1
      max_trials: 50
    batch_sampler {
      sampler {
        min_scale: 0.3
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      sample_constraint {
        min_jaccard_overlap: 0.3
      max_sample: 1
      max_trials: 50
    batch_sampler {
      sampler {
        min_scale: 0.3
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      sample_constraint {
        min_jaccard_overlap: 0.5
      max_sample: 1
      max_trials: 50
    batch_sampler {
      sampler {
        min_scale: 0.3
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      sample_constraint {
        min_jaccard_overlap: 0.7
      max_sample: 1
      max_trials: 50
    batch_sampler {
      sampler {
        min_scale: 0.3
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      sample_constraint {
        min_jaccard_overlap: 0.9
      max_sample: 1
      max_trials: 50
    batch_sampler {
      sampler {
        min_scale: 0.3
        max_scale: 1.0
        min_aspect_ratio: 0.5
        max_aspect_ratio: 2.0
      sample_constraint {
        max_jaccard_overlap: 1.0
      max_sample: 1
      max_trials: 50
    label_map_file: "data/VOC0712/labelmap_voc.prototxt"


layer {
bottom: "data"
top: "conv1"
name: "conv1"
type: "Convolution"
convolution_param {
num_output: 64
kernel_size: 7
pad: 3
stride: 2

layer {
bottom: "conv1"
top: "conv1"
name: "bn_conv1"
type: "BatchNorm"
batch_norm_param {
use_global_stats: true
layer {
	bottom: "data"
	top: "conv1"
	name: "conv1"
	type: "Convolution"
	convolution_param {
		num_output: 64
		kernel_size: 7
		pad: 3
		stride: 2


layer {
  bottom: "res5c"
  top: "res5c"
  name: "res5c_relu"
  type: "ReLU"

layer {
  bottom: "res5c"
  top: "pool5"
  name: "pool5"
  type: "Pooling"
  pooling_param {
    kernel_size: 7
    stride: 1
    pool: AVE



layer {
  name: "pool5_mbox_loc"
  type: "Convolution"
  bottom: "pool5"                               //选取pool5作为bottom,产生mbox_loc
  top: "pool5_mbox_loc"
  param {
    lr_mult: 1
    decay_mult: 1
  param {
    lr_mult: 2
    decay_mult: 0
  convolution_param {
    num_output: 24
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0
layer {
  name: "pool5_mbox_loc_perm"                     //将上一层产生的mbox_loc重新排序
  type: "Permute"
  bottom: "pool5_mbox_loc"
  top: "pool5_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "pool5_mbox_loc_flat"                    //将上一层展平(例如7*7的展平成1*49,方便之后的拼接)
  type: "Flatten"
  bottom: "pool5_mbox_loc_perm"
  top: "pool5_mbox_loc_flat"
  flatten_param {
    axis: 1
layer {
  name: "pool5_mbox_conf"
  type: "Convolution"
  bottom: "pool5"                               //选取pool5作为bottom,产生mbox_conf(之后的排序展平同理)
  top: "pool5_mbox_conf"
 param {
    lr_mult: 1
    decay_mult: 1
  param {
    lr_mult: 2
    decay_mult: 0
  convolution_param {
    num_output: 126
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0
layer {
  name: "pool5_mbox_conf_perm"
  type: "Permute"
  bottom: "pool5_mbox_conf"
  top: "pool5_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "pool5_mbox_conf_flat"
  type: "Flatten"
  bottom: "pool5_mbox_conf_perm"
  top: "pool5_mbox_conf_flat"
  flatten_param {
    axis: 1
layer {
  name: "pool5_mbox_priorbox"
  type: "PriorBox"
  bottom: "pool5"                                //选取pool5作为bottom,产生mbox_priorbox(之后排序展平)
  bottom: "data"
  top: "pool5_mbox_priorbox"
  prior_box_param {
    min_size: 276.0
    max_size: 330.0
    aspect_ratio: 2
    aspect_ratio: 3
    flip: true
    clip: true
    variance: 0.1
    variance: 0.1
    variance: 0.2
    variance: 0.2


layer {
  name: "res5c_mbox_loc"
  type: "Convolution"
  bottom: "res5c"
  top: "res5c_mbox_loc"                                         
  param {
    lr_mult: 1
    decay_mult: 1
  param {
    lr_mult: 2
    decay_mult: 0
  convolution_param {
    num_output: 24
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0
layer {
  name: "res5c_mbox_loc_perm"
  type: "Permute"
  bottom: "res5c_mbox_loc"
  top: "res5c_mbox_loc_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "res5c_mbox_loc_flat"
  type: "Flatten"
  bottom: "res5c_mbox_loc_perm"
  top: "res5c_mbox_loc_flat"
  flatten_param {
    axis: 1
layer {
  name: "res5c_mbox_conf"
  type: "Convolution"
  bottom: "res5c"
  top: "res5c_mbox_conf"
  param {
    lr_mult: 1
    decay_mult: 1
  param {
    lr_mult: 2
    decay_mult: 0
  convolution_param {
    num_output: 126
    pad: 1
    kernel_size: 3
    stride: 1
    weight_filler {
      type: "xavier"
    bias_filler {
      type: "constant"
      value: 0
layer {
  name: "res5c_mbox_conf_perm"
  type: "Permute"
  bottom: "res5c_mbox_conf"
  top: "res5c_mbox_conf_perm"
  permute_param {
    order: 0
    order: 2
    order: 3
    order: 1
layer {
  name: "res5c_mbox_conf_flat"
  type: "Flatten"
  bottom: "res5c_mbox_conf_perm"
  top: "res5c_mbox_conf_flat"
  flatten_param {
    axis: 1
layer {
  name: "res5c_mbox_priorbox"
  type: "PriorBox"
  bottom: "res5c"
  bottom: "data"
  top: "res5c_mbox_priorbox"
  prior_box_param {
    min_size: 276.0
    max_size: 330.0
    aspect_ratio: 2
    aspect_ratio: 3
    flip: true
    clip: true
    variance: 0.1
    variance: 0.1
    variance: 0.2
    variance: 0.2


layer {
  name: "mbox_loc"
  type: "Concat"
  bottom: "res5c_mbox_loc_flat"
  bottom: "pool5_mbox_loc_flat"
  top: "mbox_loc"
  concat_param {
    axis: 1
layer {
  name: "mbox_conf"
  type: "Concat"
  bottom: "res5c_mbox_conf_flat"
  bottom: "pool5_mbox_conf_flat"
  top: "mbox_conf"
  concat_param {
    axis: 1
layer {
  name: "mbox_priorbox"
  type: "Concat"
  bottom: "res5c_mbox_priorbox"
  bottom: "pool5_mbox_priorbox"
  top: "mbox_priorbox"
  concat_param {
    axis: 2


layer {
  name: "mbox_loss"
  type: "MultiBoxLoss"
  bottom: "mbox_loc"
  bottom: "mbox_conf"
  bottom: "mbox_priorbox"
  bottom: "label"
  top: "mbox_loss"
  include {
    phase: TRAIN
  propagate_down: true
  propagate_down: true
  propagate_down: false
  propagate_down: false
  loss_param {
    normalization: VALID
  multibox_loss_param {
    loc_loss_type: SMOOTH_L1
    conf_loss_type: SOFTMAX
    loc_weight: 1.0
    num_classes: 21
    share_location: true
    match_type: PER_PREDICTION
    overlap_threshold: 0.5
    use_prior_for_matching: true
    background_label_id: 0
    use_difficult_gt: true
    do_neg_mining: true
    neg_pos_ratio: 3.0
    neg_overlap: 0.5
    code_type: CENTER_SIZE

net: "models/resnet/ssd_finetuning.prototxt"
base_lr: 0.01
lr_policy: "step"
gamma: 0.1
stepsize: 100000
display: 20
max_iter: 450000
momentum: 0.9
weight_decay: 0.0005
snapshot: 10000
snapshot_prefix: "models/resnet/resnet_train"
solver_mode: CPU

3.3 训练网络
 ./build/tools/caffe train --solver=models/resnet/ssd_finetuning_solver.prototxt -weights models/resnet/res_pretrain.caffemodel
    solver=之后加solver地址, weights参数后加预训练pretrain中res_pretrain.caffemodel的参数。

