在设计神经网络的结构时,我们经常会用到一些经典的结构来提取特征,在网络有多个具有共性的输入的时候,为每一个输入添加一个特征提取器会使得网络结构臃肿,共享参数就成为一种有效的手段。
接下来,我会通过实例说明如何在caffe 中共享layer之间的参数。
参考:https://github.com/BVLC/caffe/tree/master/examples/siamese
如果需要共享参数,共享的层在定义的时候需要在param
中定义name
,两个结构参数一致的层只需要param
中的name
一样,就可以达到参数共享的功能。
...
param { name: "conv1_w" ... }
param { name: "conv1_b" ... }
...
param { name: "conv2_w" ... }
param { name: "conv2_b" ... }
...
param { name: "ip1_w" ... }
param { name: "ip1_b" ... }
...
param { name: "ip2_w" ... }
param { name: "ip2_b" ... }
...
############################pyramid extractor network1#################################
layer { name: "PFN1_conv1" type: "Convolution" bottom: "img0" top: "PFN1_conv1"
param { lr_mult: 1 decay_mult: 1 name: "PFN1_conv1_w"}
param { lr_mult: 2 decay_mult: 0 name: "PFN1_conv1_b"}
convolution_param { num_output: 16 pad: 1 kernel_size: 3
weight_filler {type: "msra"} bias_filler { type: "constant" } engine: CUDNN}}
layer { name: "PFN1_relu1" type: "ReLU" bottom: "PFN1_conv1" top: "PFN1_conv1" relu_param {negative_slope: 0.1}}
layer { name: "PFN1_downSample1" type: "Convolution" bottom: "PFN1_conv1" top: "PFN1_downSample1"
param { lr_mult: 1 decay_mult: 1 name: "PFN1_downSample1_w"}
param { lr_mult: 2 decay_mult: 0 name: "PFN1_downSample1_b"}
convolution_param { num_output: 16 pad: 1 kernel_size: 3 stride: 2
weight_filler {type: "msra"} bias_filler { type: "constant" } engine: CUDNN}}
layer { name: "PFN1_reluDownSample1" type: "ReLU" bottom: "PFN1_downSample1" top: "PFN1_downSample1" relu_param {negative_slope: 0.1}}
layer { name: "PFN1_conv2" type: "Convolution" bottom: "PFN1_downSample1" top: "PFN1_conv2"
param { lr_mult: 1 decay_mult: 1 name: "PFN1_conv2_w"}
param { lr_mult: 2 decay_mult: 0 name: "PFN1_conv2_b"}
convolution_param { num_output: 32 pad: 1 kernel_size: 3
weight_filler {type: "msra"} bias_filler { type: "constant" } engine: CUDNN}}
layer { name: "PFN1_relu2" type: "ReLU" bottom: "PFN1_conv2" top: "PFN1_conv2" relu_param {negative_slope: 0.1}}
layer { name: "PFN1_downSample2" type: "Convolution" bottom: "PFN1_conv2" top: "PFN1_downSample2"
param { lr_mult: 1 decay_mult: 1 name: "PFN1_downSample2_w"}
param { lr_mult: 2 decay_mult: 0 name: "PFN1_downSample2_b"}
convolution_param { num_output: 32 pad: 1 kernel_size: 3 stride: 2
weight_filler {type: "msra"} bias_filler { type: "constant" } engine: CUDNN}}
layer { name: "PFN1_reluDownSample2" type: "ReLU" bottom: "PFN1_downSample2" top: "PFN1_downSample2" relu_param {negative_slope: 0.1}}
layer { name: "PFN1_conv3" type: "Convolution" bottom: "PFN1_downSample2" top: "PFN1_conv3"
param { lr_mult: 1 decay_mult: 1 name: "PFN1_conv3_w"}
param { lr_mult: 2 decay_mult: 0 name: "PFN1_conv3_b"}
convolution_param { num_output: 64 pad: 1 kernel_size: 3
weight_filler {type: "msra"} bias_filler { type: "constant" } engine: CUDNN}}
layer { name: "PFN1_relu3" type: "ReLU" bottom: "PFN1_conv3" top: "PFN1_conv3" relu_param {negative_slope: 0.1}}
layer { name: "PFN1_downSample3" type: "Convolution" bottom: "PFN1_conv3" top: "PFN1_downSample3"
param { lr_mult: 1 decay_mult: 1 name: "PFN1_downSample3_w"}
param { lr_mult: 2 decay_mult: 0 name: "PFN1_downSample3_b"}
convolution_param { num_output: 64 pad: 1 kernel_size: 3 stride: 2
weight_filler {type: "msra"} bias_filler { type: "constant" } engine: CUDNN}}
layer { name: "PFN1_reluDownSample3" type: "ReLU" bottom: "PFN1_downSample3" top: "PFN1_downSample3" relu_param {negative_slope: 0.1}}
param
中的name
一样############################pyramid extractor network2#################################
layer { name: "PFN2_conv1" type: "Convolution" bottom: "img2" top: "PFN2_conv1"
param { lr_mult: 1 decay_mult: 1 name: "PFN1_conv1_w"}
param { lr_mult: 2 decay_mult: 0 name: "PFN1_conv1_b"}
convolution_param { num_output: 16 pad: 1 kernel_size: 3
weight_filler {type: "msra"} bias_filler { type: "constant" } engine: CUDNN}}
layer { name: "PFN2_relu1" type: "ReLU" bottom: "PFN2_conv1" top: "PFN2_conv1" relu_param {negative_slope: 0.1}}
layer { name: "PFN2_downSample1" type: "Convolution" bottom: "PFN2_conv1" top: "PFN2_downSample1"
param { lr_mult: 1 decay_mult: 1 name: "PFN1_downSample1_w"}
param { lr_mult: 2 decay_mult: 0 name: "PFN1_downSample1_b"}
convolution_param { num_output: 16 pad: 1 kernel_size: 3 stride: 2
weight_filler {type: "msra"} bias_filler { type: "constant" } engine: CUDNN}}
layer { name: "PFN2_reluDownSample1" type: "ReLU" bottom: "PFN2_downSample1" top: "PFN2_downSample1" relu_param {negative_slope: 0.1}}
layer { name: "PFN2_conv2" type: "Convolution" bottom: "PFN2_downSample1" top: "PFN2_conv2"
param { lr_mult: 1 decay_mult: 1 name: "PFN1_conv2_w"}
param { lr_mult: 2 decay_mult: 0 name: "PFN1_conv2_b"}
convolution_param { num_output: 32 pad: 1 kernel_size: 3
weight_filler {type: "msra"} bias_filler { type: "constant" } engine: CUDNN}}
layer { name: "PFN2_relu2" type: "ReLU" bottom: "PFN2_conv2" top: "PFN2_conv2" relu_param {negative_slope: 0.1}}
layer { name: "PFN2_downSample2" type: "Convolution" bottom: "PFN2_conv2" top: "PFN2_downSample2"
param { lr_mult: 1 decay_mult: 1 name: "PFN1_downSample2_w"}
param { lr_mult: 2 decay_mult: 0 name: "PFN1_downSample2_b"}
convolution_param { num_output: 32 pad: 1 kernel_size: 3 stride: 2
weight_filler {type: "msra"} bias_filler { type: "constant" } engine: CUDNN}}
layer { name: "PFN2_reluDownSample2" type: "ReLU" bottom: "PFN2_downSample2" top: "PFN2_downSample2" relu_param {negative_slope: 0.1}}
layer { name: "PFN2_conv3" type: "Convolution" bottom: "PFN2_downSample2" top: "PFN2_conv3"
param { lr_mult: 1 decay_mult: 1 name: "PFN1_conv3_w"}
param { lr_mult: 2 decay_mult: 0 name: "PFN1_conv3_b"}
convolution_param { num_output: 64 pad: 1 kernel_size: 3
weight_filler {type: "msra"} bias_filler { type: "constant" } engine: CUDNN}}
layer { name: "PFN2_relu3" type: "ReLU" bottom: "PFN2_conv3" top: "PFN2_conv3" relu_param {negative_slope: 0.1}}
layer { name: "PFN2_downSample3" type: "Convolution" bottom: "PFN2_conv3" top: "PFN2_downSample3"
param { lr_mult: 1 decay_mult: 1 name: "PFN1_downSample3_w"}
param { lr_mult: 2 decay_mult: 0 name: "PFN1_downSample3_b"}
convolution_param { num_output: 64 pad: 1 kernel_size: 3 stride: 2
weight_filler {type: "msra"} bias_filler { type: "constant" } engine: CUDNN}}
layer { name: "PFN2_reluDownSample3" type: "ReLU" bottom: "PFN2_downSample3" top: "PFN2_downSample3" relu_param {negative_slope: 0.1}}