caffe 实战系列:proto文件格式以及含义解析:如何定义网络,如何设置网络参数(以AlexNet为例) 2016.3.30

(0)前言:

初学者往往不知道如何配置网络,或者面对这些参数却无从下手不知道是什么含义,下面我根据分析源码的经验给出AlexNet的具体解释,希望能够给初学者一些定义网络上面的帮助此外还能够知道如何找网络的参数,这些参数是如何设置的。

以AlexNet为例:
首先给出配置实例:
name: "AlexNet"
layer { # 数据层
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TRAIN #include 表明这是在训练阶段才包括进去
  }
  transform_param { # 对数据进行预处理,分别为做镜像,设定crop的大小为227,以及减去均值文件
    mirror: true
    crop_size: 227
    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
  }
  data_param { # 设定数据的来源
    source: "examples/imagenet/ilsvrc12_train_lmdb"
    batch_size: 256
    backend: LMDB
  }
}
layer {
  name: "data"
  type: "Data"
  top: "data"
  top: "label"
  include { # 规定只在测试的时候使用该层
    phase: TEST
  }
  transform_param { # 测试的时候就不做镜像了
    mirror: false
    crop_size: 227
    mean_file: "data/ilsvrc12/imagenet_mean.binaryproto"
  }
  data_param {
    source: "examples/imagenet/ilsvrc12_val_lmdb"
    batch_size: 50
    backend: LMDB
  }
}
layer { # 卷积层
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param { #  通用的有关于学习的参数,学习率和权重衰减率,这里是两个学习率是因为定义了卷积组,且大小为2,所以是两个参数
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param { # 卷积层的参数,卷积核以及偏置
    num_output: 96
    kernel_size: 11
    stride: 4
    # 但是conv1却又没有定义group:2,下面的卷积层倒是都定义了,所以这有点奇怪。
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer { # relu层
  name: "relu1"
  type: "ReLU"
  bottom: "conv1"
  top: "conv1"
}
layer { # norm层
  name: "norm1"
  type: "LRN"
  bottom: "conv1"
  top: "norm1"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer { # 池化层
  name: "pool1"
  type: "Pooling"
  bottom: "norm1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 2
    kernel_size: 5
    group: 2# 卷积组的大小为2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu2"
  type: "ReLU"
  bottom: "conv2"
  top: "conv2"
}
layer {
  name: "norm2"
  type: "LRN"
  bottom: "conv2"
  top: "norm2"
  lrn_param {
    local_size: 5
    alpha: 0.0001
    beta: 0.75
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "norm2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "conv3"
  type: "Convolution"
  bottom: "pool2"
  top: "conv3"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "relu3"
  type: "ReLU"
  bottom: "conv3"
  top: "conv3"
}
layer {
  name: "conv4"
  type: "Convolution"
  bottom: "conv3"
  top: "conv4"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 384
    pad: 1
    kernel_size: 3
    group: 2# 卷积组的大小为2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu4"
  type: "ReLU"
  bottom: "conv4"
  top: "conv4"
}
layer {
  name: "conv5"
  type: "Convolution"
  bottom: "conv4"
  top: "conv5"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  convolution_param {
    num_output: 256
    pad: 1
    kernel_size: 3
    group: 2# 卷积组的大小为2
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu5"
  type: "ReLU"
  bottom: "conv5"
  top: "conv5"
}
layer {
  name: "pool5"
  type: "Pooling"
  bottom: "conv5"
  top: "pool5"
  pooling_param {
    pool: MAX
    kernel_size: 3
    stride: 2
  }
}
layer {
  name: "fc6"
  type: "InnerProduct"
  bottom: "pool5"
  top: "fc6"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu6"
  type: "ReLU"
  bottom: "fc6"
  top: "fc6"
}
layer {
  name: "drop6"
  type: "Dropout"
  bottom: "fc6"
  top: "fc6"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc7"
  type: "InnerProduct"
  bottom: "fc6"
  top: "fc7"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 4096
    weight_filler {
      type: "gaussian"
      std: 0.005
    }
    bias_filler {
      type: "constant"
      value: 0.1
    }
  }
}
layer {
  name: "relu7"
  type: "ReLU"
  bottom: "fc7"
  top: "fc7"
}
layer {
  name: "drop7"
  type: "Dropout"
  bottom: "fc7"
  top: "fc7"
  dropout_param {
    dropout_ratio: 0.5
  }
}
layer {
  name: "fc8"
  type: "InnerProduct"
  bottom: "fc7"
  top: "fc8"
  param {
    lr_mult: 1
    decay_mult: 1
  }
  param {
    lr_mult: 2
    decay_mult: 0
  }
  inner_product_param {
    num_output: 1000
    weight_filler {
      type: "gaussian"
      std: 0.01
    }
    bias_filler {
      type: "constant"
      value: 0
    }
  }
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "fc8"
  bottom: "label"
  top: "accuracy"
  include {# 测试阶段才包括该层
    phase: TEST
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "fc8"
  bottom: "label"
  top: "loss"
}

(1)数据输入层预处理的参数transform_param的定义:


// Message that stores parameters used to apply transformation
// to the data layer's data
message TransformationParameter {
  // For data pre-processing, we can do simple scaling and subtracting the
  // data mean, if provided. Note that the mean subtraction is always carried
  // out before scaling.
  // 对像素值进行缩放pixelvalue = scale*pixelvalue
  optional float scale = 1 [default = 1];
  // Specify if we want to randomly mirror data.
  // 是否对图像进行镜像
  optional bool mirror = 2 [default = false];
  // Specify if we would like to randomly crop an image.
  // 随机切割图像的大小
  optional uint32 crop_size = 3 [default = 0];
  // mean_file and mean_value cannot be specified at the same time
  // 均值文件的路径
  optional string mean_file = 4;
  // if specified can be repeated once (would substract it from all the channels)
  // or can be repeated the same number of times as channels
  // (would subtract them from the corresponding channel)
  // 如果不使用均值文件,用均值也可以的
  repeated float mean_value = 5;
  // Force the decoded image to have 3 color channels.
  // 强制认为数据是三通道的(彩色的)
  optional bool force_color = 6 [default = false];
  // Force the decoded image to have 1 color channels.
  // 强制认为数据是单通道的(灰度的)
  optional bool force_gray = 7 [default = false];
}

(2)数据输入层中数据源的参数data_param 定义

message DataParameter {
  enum DB { // 数据库的类型LEVELDB还是LMDB类型
    LEVELDB = 0;
    LMDB = 1;
  }
  // Specify the data source.
  // 数据库文件的路径
  optional string source = 1;
  // Specify the batch size.
  // 批大小
  optional uint32 batch_size = 4;
  // The rand_skip variable is for the data layer to skip a few data points
  // to avoid all asynchronous sgd clients to start at the same point. The skip
  // point would be set as rand_skip * rand(0,1). Note that rand_skip should not
  // be larger than the number of keys in the database.
  // DEPRECATED. Each solver accesses a different subset of the database.
  // 随机跳过前rand_skip个,这里程序中会生成[0,rand_skip-1]之间的一个随机数然后跳过这个数值个的数据
  optional uint32 rand_skip = 7 [default = 0];
  // 数据库的后端是使用的什么类型的数据库
  optional DB backend = 8 [default = LEVELDB];
  // DEPRECATED. See TransformationParameter. For data pre-processing, we can do
  // simple scaling and subtracting the data mean, if provided. Note that the
  // mean subtraction is always carried out before scaling.
  // 该参数已经过时,应该在TransformationParameter进行定义,上面我已经给出了这部分参数的定义
  optional float scale = 2 [default = 1];
  optional string mean_file = 3;
  // DEPRECATED. See TransformationParameter. Specify if we would like to randomly
  // crop an image.  该参数已经过时
  optional uint32 crop_size = 5 [default = 0];
  // DEPRECATED. See TransformationParameter. Specify if we want to randomly mirror
  // data. 该参数已经过时
  optional bool mirror = 6 [default = false];
  // Force the encoded image to have 3 color channels
  // 强制认为存储的图像是彩色的
  optional bool force_encoded_color = 9 [default = false];
  // Prefetch queue (Number of batches to prefetch to host memory, increase if
  // data access bandwidth varies).
  // 预取队列的个数
  optional uint32 prefetch = 10 [default = 4];
}


(3)卷积层中有关于学习的参数

首先就是那个卷积层的param,实际上就是在LayerParameter中进行定义的,也就是说每个层都有这个参数
这是一个通用的参数。定义了学习率啥的还有损失权重
// LayerParameter next available layer-specific ID: 139 (last added: tile_param)
message LayerParameter {
  optional string name = 1; // the layer name
  optional string type = 2; // the layer type
  repeated string bottom = 3; // the name of each bottom blob
  repeated string top = 4; // the name of each top blob

  // The train / test phase for computation.
  optional Phase phase = 10;

  // The amount of weight to assign each top blob in the objective.
  // Each layer assigns a default value, usually of either 0 or 1,
  // to each top blob.
  repeated float loss_weight = 5;

  // Specifies training parameters (multipliers on global learning constants,
  // and the name and other settings used for weight sharing).
  repeated ParamSpec param = 6;// 就是这货

这货的详细定义如下:
主要包括名字、维度检查的模式、学习率(默认是1),权重衰减率(等于1就是不衰减啦)
message ParamSpec {
  // The names of the parameter blobs -- useful for sharing parameters among
  // layers, but never required otherwise.  To share a parameter between two
  // layers, give it a (non-empty) name.
  optional string name = 1;

  // Whether to require shared weights to have the same shape, or just the same
  // count -- defaults to STRICT if unspecified.
  optional DimCheckMode share_mode = 2;
  enum DimCheckMode {
    // STRICT (default) requires that num, channels, height, width each match.
    STRICT = 0;
    // PERMISSIVE requires only the count (num*channels*height*width) to match.
    PERMISSIVE = 1;
  }

  // The multiplier on the global learning rate for this parameter.
  optional float lr_mult = 3 [default = 1.0];

  // The multiplier on the global weight decay for this parameter.
  optional float decay_mult = 4 [default = 1.0];
}

(4)卷积层中有关于卷积的参数

接下来介绍与卷积相关的参数,即在卷积层定义的convolution_param。
这货的定义是这样的:
message ConvolutionParameter {
  optional uint32 num_output = 1; // The number of outputs for the layer
  optional bool bias_term = 2 [default = true]; // whether to have bias terms

  // Pad, kernel size, and stride are all given as a single value for equal
  // dimensions in all spatial dimensions, or once per spatial dimension.
  // 是否padding
  repeated uint32 pad = 3; // The padding size; defaults to 0
  // 核大小
  repeated uint32 kernel_size = 4; // The kernel size
  // 步长
  repeated uint32 stride = 6; // The stride; defaults to 1

  // For 2D convolution only, the *_h and *_w versions may also be used to
  // specify both spatial dimensions.
  // 对于二维卷积来说是可以设定pad、kernel以及步长的宽度和高度不一样的
  optional uint32 pad_h = 9 [default = 0]; // The padding height (2D only)
  optional uint32 pad_w = 10 [default = 0]; // The padding width (2D only)
  optional uint32 kernel_h = 11; // The kernel height (2D only)
  optional uint32 kernel_w = 12; // The kernel width (2D only)
  optional uint32 stride_h = 13; // The stride height (2D only)
  optional uint32 stride_w = 14; // The stride width (2D only)

  // 每一个卷积组的大小
  optional uint32 group = 5 [default = 1]; // The group size for group conv

  // 这就是初始化权重和偏置的参数啦
  optional FillerParameter weight_filler = 7; // The filler for the weight
  optional FillerParameter bias_filler = 8; // The filler for the bias
  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  // 使用CPU还是GPU计算
  optional Engine engine = 15 [default = DEFAULT];

  // The axis to interpret as "channels" when performing convolution.
  // Preceding dimensions are treated as independent inputs;
  // succeeding dimensions are treated as "spatial".
  // With (N, C, H, W) inputs, and axis == 1 (the default), we perform
  // N independent 2D convolutions, sliding C-channel (or (C/g)-channels, for
  // groups g>1) filters across the spatial axes (H, W) of the input.
  // With (N, C, D, H, W) inputs, and axis == 1, we perform
  // N independent 3D convolutions, sliding (C/g)-channels
  // filters across the spatial axes (D, H, W) of the input.
  // 通道数,如果该值是1,那么如果数据是(N,C,H,W)
  // 那么就进行N个独立的二维卷积
  // 如果数据是(N,C,D,H,W),那么就进行三维卷积
  optional int32 axis = 16 [default = 1];

  // Whether to force use of the general ND convolution, even if a specific
  // implementation for blobs of the appropriate number of spatial dimensions
  // is available. (Currently, there is only a 2D-specific convolution
  // implementation; for input blobs with num_axes != 2, this option is
  // ignored and the ND implementation will be used.)
  // 强制使用通用的N维卷积方法
  // 如果num_axes!=2就会使用N维卷积
  optional bool force_nd_im2col = 17 [default = false];
}

(5)卷积层中有关于初始化的参数

下面给出卷积层初始化的参数:

message FillerParameter {
  // The filler type.
  // 初始化类型
  optional string type = 1 [default = 'constant'];
  // 如果是常数初始化的话需要该值
  optional float value = 2 [default = 0]; // the value in constant filler
  // 如果是均匀分布初始化则需要min和max
  optional float min = 3 [default = 0]; // the min value in uniform filler
  optional float max = 4 [default = 1]; // the max value in uniform filler
  // 如果是高斯分布初始化则需要mean和std
  optional float mean = 5 [default = 0]; // the mean value in Gaussian filler
  optional float std = 6 [default = 1]; // the std value in Gaussian filler
  // The expected number of non-zero output weights for a given input in
  // Gaussian filler -- the default -1 means don't perform sparsification.
  //  是否需要稀疏特性
  optional int32 sparse = 7 [default = -1];
  // Normalize the filler variance by fan_in, fan_out, or their average.
  // Applies to 'xavier' and 'msra' fillers.
  // 对于xavier和msra两种权重初始化需要设置归一化的类型是
  // 使用扇入还是扇出还是扇入+扇出进行归一化
  enum VarianceNorm {
    FAN_IN = 0;
    FAN_OUT = 1;
    AVERAGE = 2;
  }
  optional VarianceNorm variance_norm = 8 [default = FAN_IN];
}

(6)局部归一化层参数lrn_param的定义

(该层实际上证明已经没啥用了,所以就不解释了,一般也不用)
// Message that stores parameters used by LRNLayer
message LRNParameter {
  optional uint32 local_size = 1 [default = 5];
  optional float alpha = 2 [default = 1.];
  optional float beta = 3 [default = 0.75];
  enum NormRegion {
    ACROSS_CHANNELS = 0;
    WITHIN_CHANNEL = 1;
  }
  optional NormRegion norm_region = 4 [default = ACROSS_CHANNELS];
  optional float k = 5 [default = 1.];
}

(7)全连接层

Caffe中也称之为内积层,也有学习相关的参数以及初始化的参数:分别为param和inner_product_param

下面给出inner_product_param的定义,我们看到里面定义了FillerParameter类型的weight_filler和bias_filler
另外还定义了axis,默认为1.
message InnerProductParameter {
  optional uint32 num_output = 1; // The number of outputs for the layer
  optional bool bias_term = 2 [default = true]; // whether to have bias terms
  optional FillerParameter weight_filler = 3; // The filler for the weight
  optional FillerParameter bias_filler = 4; // The filler for the bias

  // The first axis to be lumped into a single inner product computation;
  // all preceding axes are retained in the output.
  // May be negative to index from the end (e.g., -1 for the last axis).
  optional int32 axis = 5 [default = 1];
}


(8)池化层的参数

pooling_param的定义如下:

message PoolingParameter {
  enum PoolMethod { // 几种池化方法
    MAX = 0;
    AVE = 1;
    STOCHASTIC = 2;
  }
  optional PoolMethod pool = 1 [default = MAX]; // The pooling method
  // Pad, kernel size, and stride are all given as a single value for equal
  // dimensions in height and width or as Y, X pairs.
  // 如果使用pad参数则认为是正方形的,如果使用pad_h和pad_w则认为是矩形的
  // 同理kernel_size也是、stride也是
  optional uint32 pad = 4 [default = 0]; // The padding size (equal in Y, X)
  optional uint32 pad_h = 9 [default = 0]; // The padding height
  optional uint32 pad_w = 10 [default = 0]; // The padding width
  optional uint32 kernel_size = 2; // The kernel size (square)
  optional uint32 kernel_h = 5; // The kernel height
  optional uint32 kernel_w = 6; // The kernel width
  optional uint32 stride = 3 [default = 1]; // The stride (equal in Y, X)
  optional uint32 stride_h = 7; // The stride height
  optional uint32 stride_w = 8; // The stride width
  enum Engine {
    DEFAULT = 0;
    CAFFE = 1;
    CUDNN = 2;
  }
  optional Engine engine = 11 [default = DEFAULT];
  // If global_pooling then it will pool over the size of the bottom by doing
  // kernel_h = bottom->height and kernel_w = bottom->width
  optional bool global_pooling = 12 [default = false];
}

(9)dropout层的参数

dropout_param的定义如下:
message DropoutParameter {
  optional float dropout_ratio = 1 [default = 0.5]; // dropout ratio
}
就一个参数,就是丢弃的数据的概率

(10)总结

各个参数的含义可以参考caffe.proto,碰到不懂的参数,或者考虑使用一些参数的时候也可以去找找你所用的层的参数是不是有。
此外:RELU没有参数

你可能感兴趣的:(C++,神经网络,计算机视觉,深度学习,代码阅读)