caffe——net.cpp——init()

上一节讲的是solver的初始化,在其过程中,调用了net.cpp的init函数,下面,来看一下它是
怎么干活的。

template <typename Dtype>
void Net<Dtype>::Init(const NetParameter& in_param) {
//in_param,接solver.cpp的NetParameter
  CHECK(Caffe::root_solver() || root_net_)
      << "root_net_ needs to be set for all non-root solvers";
  // Set phase from the state.
  phase_ = in_param.state().phase();
  //phase_ = caffe::TRAIN
  // Filter layers based on their include/exclude rules and
  // the current NetState.
  NetParameter filtered_param;
  FilterNet(in_param, &filtered_param);
  //这个函数的作用就是检查in_param,如果in_param的layer符合要求,就赋给filtered_param
  //否则就不赋给filtered_param,你也可以认为这个函数的作用是移除in_param的指定层,将剩下
  //的复制给filtered_param(这里面主要是针对included和exclude)
  LOG_IF(INFO, Caffe::root_solver())
      << "Initializing net from parameters: " << std::endl
      << filtered_param.DebugString();
  // Create a copy of filtered_param with splits added where necessary.
  NetParameter param;
  InsertSplits(filtered_param, &param);
  //函数从filtered_param读入新网络到param
  // Basically, build all the layers and set up their connections.
  name_ = param.name();
  map<string, int> blob_name_to_idx;
  set<string> available_blobs;
  //关于set容器,可以看这个网址http://blog.csdn.net/wangran51/article/details/8836160
  memory_used_ = 0;
  // For each layer, set up its input and output
  bottom_vecs_.resize(param.layer_size());//重置bottom_vecs_的大小,一下是函数前后对比
  // bottom_vecs_ = std::vector of length 0, capacity 0
// bottom_vecs_ = std::vector of length 9, capacity 9 = {
// std::vector of length 0, capacity 0, std::vector of length 0, capacity 0, 
// std::vector of length 0, capacity 0, std::vector of length 0, capacity 0, 
// std::vector of length 0, capacity 0, std::vector of length 0, capacity 0, 
// std::vector of length 0, capacity 0, std::vector of length 0, capacity 0, 
// std::vector of length 0, capacity 0}
//这里面九个元素指的是网络的train layer共有9个所以需要九个参数

  top_vecs_.resize(param.layer_size());
  bottom_id_vecs_.resize(param.layer_size());
  param_id_vecs_.resize(param.layer_size());
  top_id_vecs_.resize(param.layer_size());
  bottom_need_backward_.resize(param.layer_size());
  //差不多参数后面带‘_’的,代表的都是函数运行过程中的中间变量
  for (int layer_id = 0; layer_id < param.layer_size(); ++layer_id) {
  //对layer的每一层进行处理
    // For non-root solvers, whether this layer is shared from root_net_.
    bool share_from_root = !Caffe::root_solver()
        && root_net_->layers_[layer_id]->ShareInParallel();
    // Inherit phase from net if unset.
    if (!param.layer(layer_id).has_phase()) {
      param.mutable_layer(layer_id)->set_phase(phase_);
    }
    // Setup layer.
    const LayerParameter& layer_param = param.layer(layer_id);//看 caffe.proto去~ 赶紧的
    if (layer_param.propagate_down_size() > 0) {
    //propagate_down:Specifies on which bottoms the backpropagation should 
    //be skipped. The size must be either 0 or equal to the number of bottoms.
      CHECK_EQ(layer_param.propagate_down_size(),
          layer_param.bottom_size())
          << "propagate_down param must be specified "
          << "either 0 or bottom_size times ";
    }
    if (share_from_root) {
      LOG(INFO) << "Sharing layer " << layer_param.name() << " from root net";
      layers_.push_back(root_net_->layers_[layer_id]);
      layers_[layer_id]->SetShared(true);
    } else {
      layers_.push_back(LayerRegistry<Dtype>::CreateLayer(layer_param));
      创建layer并将layer_param的值赋值给layers_(具体见下篇博客)
    }
    layer_names_.push_back(layer_param.name());
    LOG_IF(INFO, Caffe::root_solver())
        << "Creating Layer " << layer_param.name();
    bool need_backward = false;

    // Figure out this layer's input and output
    for (int bottom_id = 0; bottom_id < layer_param.bottom_size();
         ++bottom_id) 
         //上边创建了层,然后就该对bottom/top进行处理了
         {
      const int blob_id = AppendBottom(param, layer_id, bottom_id,
                                       &available_blobs, &blob_name_to_idx);
     //见附1
      // If a blob needs backward, this layer should provide it.
      need_backward |= blob_need_backward_[blob_id];
    }
    int num_top = layer_param.top_size();
    for (int top_id = 0; top_id < num_top; ++top_id) {
      AppendTop(param, layer_id, top_id, &available_blobs, &blob_name_to_idx);
      //见附2
      // Collect Input layer tops as Net inputs.
      if (layer_param.type() == "Input") {
        const int blob_id = blobs_.size() - 1;
        net_input_blob_indices_.push_back(blob_id);
        net_input_blobs_.push_back(blobs_[blob_id].get());
      }
    }
    // If the layer specifies that AutoTopBlobs() -> true and the LayerParameter
    // specified fewer than the required number (as specified by
    // ExactNumTopBlobs() or MinTopBlobs()), allocate them here.
    Layer<Dtype>* layer = layers_[layer_id].get();
    //vector<shared_ptr<Layer<Dtype> > > layers_;
    if (layer->AutoTopBlobs()) {
      const int needed_num_top =
          std::max(layer->MinTopBlobs(), layer->ExactNumTopBlobs());
      for (; num_top < needed_num_top; ++num_top) {
        // Add "anonymous" top blobs -- do not modify available_blobs or
        // blob_name_to_idx as we don't want these blobs to be usable as input
        // to other layers.
        AppendTop(param, layer_id, num_top, NULL, NULL);
      }
    }
    // After this layer is connected, set it up.
    if (share_from_root) {
      // Set up size of top blobs using root_net_
      const vector<Blob<Dtype>*>& base_top = root_net_->top_vecs_[layer_id];
      const vector<Blob<Dtype>*>& this_top = this->top_vecs_[layer_id];
      for (int top_id = 0; top_id < base_top.size(); ++top_id) {
        this_top[top_id]->ReshapeLike(*base_top[top_id]);
        LOG(INFO) << "Created top blob " << top_id << " (shape: "
            << this_top[top_id]->shape_string() <<  ") for shared layer "
            << layer_param.name();
      }
    } else {
      layers_[layer_id]->SetUp(bottom_vecs_[layer_id], top_vecs_[layer_id]);
      //调用SetUp这一段的介绍看下一篇啊,要不然东西就太多了
    }
    LOG_IF(INFO, Caffe::root_solver())
        << "Setting up " << layer_names_[layer_id];

        //更新向量blob_loss_weights
    for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
      if (blob_loss_weights_.size() <= top_id_vecs_[layer_id][top_id]) {
        blob_loss_weights_.resize(top_id_vecs_[layer_id][top_id] + 1, Dtype(0));
        //调整blob_loss_weights_的大小,使其与top_id_vecs_[layer_id][top_id]一样大
      }
      blob_loss_weights_[top_id_vecs_[layer_id][top_id]] = layer->loss(top_id);
      //loss函数返回loss_weight ——> 在模板类的SetUp方法中会调用SetLossWeights来设置
      //其私有数据成员loss_,里面存储的其实是loss_weight 
      LOG_IF(INFO, Caffe::root_solver())
          << "Top shape: " << top_vecs_[layer_id][top_id]->shape_string();
          // top_vecs_[0][0]->shape_string() = "64 1 28 28 (50176)"
      if (layer->loss(top_id)) {
        LOG_IF(INFO, Caffe::root_solver())
            << " with loss weight " << layer->loss(top_id);
      }
      memory_used_ += top_vecs_[layer_id][top_id]->count();
    }
    LOG_IF(INFO, Caffe::root_solver())
        << "Memory required for data: " << memory_used_ * sizeof(Dtype);
    const int param_size = layer_param.param_size();
    const int num_param_blobs = layers_[layer_id]->blobs().size();
    //param_size是Layermeter类型对象layer_param中ParamSpec param成员的个数, num_param_blobs是一
//个Layer中learnable parameter blob的个数,param_size <= num_param_blobs 
    CHECK_LE(param_size, num_param_blobs)
        << "Too many params specified for layer " << layer_param.name();
    ParamSpec default_param_spec;
    for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
      const ParamSpec* param_spec = (param_id < param_size) ?
          &layer_param.param(param_id) : &default_param_spec;
      const bool param_need_backward = param_spec->lr_mult() != 0;
      //是否反反向传播,主要看基础学习率,如果其为0,则不传播
      need_backward |= param_need_backward;
      //由param_need_backward来决定need_backward是否为真,并且,只要有一次遍历使得
      //need_backward为真,则这个for循环结束后,need_backward也为真
      layers_[layer_id]->set_param_propagate_down(param_id,
                                                  param_need_backward);
    }
    for (int param_id = 0; param_id < num_param_blobs; ++param_id) {
      AppendParam(param, layer_id, param_id);//附3
    }
    // Finally, set the backward flag
    layer_need_backward_.push_back(need_backwar
    d);
    if (need_backward) {
      for (int top_id = 0; top_id < top_id_vecs_[layer_id].size(); ++top_id) {
        blob_need_backward_[top_id_vecs_[layer_id][top_id]] = true;
      }
    }
  }
  //大循环,对每个层都进行处理。 附4

  // Go through the net backwards to determine which blobs contribute to the
  // loss. We can skip backward computation for blobs that don't contribute
  // to the loss.
  // Also checks if all bottom blobs don't need backward computation (possible
  // because the skip_propagate_down param) and so we can skip bacward
  // computation for the entire layer
  set<string> blobs_under_loss;
  set<string> blobs_skip_backp;
  //这两个参数你可能不太懂,别着急 往下看
  //从上往下,遍历每一层
  for (int layer_id = layers_.size() - 1; layer_id >= 0; --layer_id) {
    bool layer_contributes_loss = false;
    bool layer_skip_propagate_down = true;
    //为true,则表示当前layer的bottom blob不需要backward computation,即该层不需要backward computation。 
//这个局部变量所表示的意义与caffe.proto里message Layerparameter的propagate_down的定义恰好相反。 
    //对于每一层的 top
    for (int top_id = 0; top_id < top_vecs_[layer_id].size(); ++top_id) {
      const string& blob_name = blob_names_[top_id_vecs_[layer_id][top_id]];
      if (layers_[layer_id]->loss(top_id) ||
          (blobs_under_loss.find(blob_name) != blobs_under_loss.end())) {
           //blobs_under_loss的赋值是在下面,也就是上几层
        layer_contributes_loss = true;
      }
      if (blobs_skip_backp.find(blob_name) == blobs_skip_backp.end()) {
        layer_skip_propagate_down = false;
      }
      if (layer_contributes_loss && !layer_skip_propagate_down)
        break;
    }
    // If this layer can skip backward computation, also all his bottom blobs
    // don't need backpropagation
    if (layer_need_backward_[layer_id] && layer_skip_propagate_down) {
      layer_need_backward_[layer_id] = false;
      for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size();
               ++bottom_id) {
        bottom_need_backward_[layer_id][bottom_id] = false;
      }
    }
    if (!layer_contributes_loss) { layer_need_backward_[layer_id] = false; }
    if (Caffe::root_solver()) {
      if (layer_need_backward_[layer_id]) {
        LOG(INFO) << layer_names_[layer_id] << " needs backward computation.";
      } else {
        LOG(INFO) << layer_names_[layer_id]
            << " does not need backward computation.";
      }
    }
    for (int bottom_id = 0; bottom_id < bottom_vecs_[layer_id].size();
         ++bottom_id) {
      if (layer_contributes_loss) {
        const string& blob_name =
            blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
        blobs_under_loss.insert(blob_name);
        //判断当前层是否contributions to loss 是的话 就把名字插入 blobs_under_loss中
      } else {
        bottom_need_backward_[layer_id][bottom_id] = false;
      }
      if (!bottom_need_backward_[layer_id][bottom_id]) {
        const string& blob_name =
                   blob_names_[bottom_id_vecs_[layer_id][bottom_id]];
        blobs_skip_backp.insert(blob_name);
        //若本层不需要反反向传播,将名字插入blobs_skip_backp中。
      }
    }
  }
  // Handle force_backward if needed.
  if (param.force_backward()) {
    for (int layer_id = 0; layer_id < layers_.size(); ++layer_id) {
      layer_need_backward_[layer_id] = true;
      for (int bottom_id = 0;
           bottom_id < bottom_need_backward_[layer_id].size(); ++bottom_id) {
        bottom_need_backward_[layer_id][bottom_id] =
            bottom_need_backward_[layer_id][bottom_id] ||
            layers_[layer_id]->AllowForceBackward(bottom_id);
        blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] =
            blob_need_backward_[bottom_id_vecs_[layer_id][bottom_id]] ||
            bottom_need_backward_[layer_id][bottom_id];
      }
      for (int param_id = 0; param_id < layers_[layer_id]->blobs().size();
           ++param_id) {
        layers_[layer_id]->set_param_propagate_down(param_id, true);
      }
    }
  }
  // In the end, all remaining blobs are considered output blobs.
  for (set<string>::iterator it = available_blobs.begin();
      it != available_blobs.end(); ++it) {
    LOG_IF(INFO, Caffe::root_solver())
        << "This network produces output " << *it;
    net_output_blobs_.push_back(blobs_[blob_name_to_idx[*it]].get());
    net_output_blob_indices_.push_back(blob_name_to_idx[*it]);
  }

//blob_names_.size() = 9
  for (size_t blob_id = 0; blob_id < blob_names_.size(); ++blob_id) {
    blob_names_index_[blob_names_[blob_id]] = blob_id;
    //向 blob_names_index_里逐一添加元素
  }

//layer_names_.size()= 9
  for (size_t layer_id = 0; layer_id < layer_names_.size(); ++layer_id) {
    layer_names_index_[layer_names_[layer_id]] = layer_id;
  }
/* (gdb) p blob_names_index_ $95 = std::map with 9 elements = {["conv1"] = 2, ["conv2"] = 4, ["data"] = 0, ["ip1"] = 6, ["ip2"] = 7, ["label"] = 1, ["loss"] = 8, ["pool1"] = 3, ["pool2"] = 5} (gdb) p layer_names_index_ $96 = std::map with 9 elements = {["conv1"] = 1, ["conv2"] = 3, ["ip1"] = 5, ["ip2"] = 7, ["loss"] = 8, ["mnist"] = 0, ["pool1"] = 2, ["pool2"] = 4, ["relu1"] = 6} */
  ShareWeights();
  debug_info_ = param.debug_info();
  LOG_IF(INFO, Caffe::root_solver()) << "Network initialization done.";
}

template <typename Dtype>
void Net<Dtype>::FilterNet(const NetParameter& param,
    NetParameter* param_filtered) {
  NetState net_state(param.state());
  param_filtered->CopyFrom(param);
  param_filtered->clear_layer();
  for (int i = 0; i < param.layer_size(); ++i) {
    const LayerParameter& layer_param = param.layer(i);
    const string& layer_name = layer_param.name();
    CHECK(layer_param.include_size() == 0 || layer_param.exclude_size() == 0)
          << "Specify either include rules or exclude rules; not both.";
    // If no include rules are specified, the layer is included by default and
    // only excluded if it meets one of the exclude rules.
    bool layer_included = (layer_param.include_size() == 0);
    for (int j = 0; layer_included && j < layer_param.exclude_size(); ++j) {
      if (StateMeetsRule(net_state, layer_param.exclude(j), layer_name)) {
        layer_included = false;
      }
    }
    for (int j = 0; !layer_included && j < layer_param.include_size(); ++j) {
      if (StateMeetsRule(net_state, layer_param.include(j), layer_name)) {
        layer_included = true;
      }
    }
    if (layer_included) {
      param_filtered->add_layer()->CopyFrom(layer_param);
    }
  }
}

template <typename Dtype>
bool Net<Dtype>::StateMeetsRule(const NetState& state,
    const NetStateRule& rule, const string& layer_name) {
  // Check whether the rule is broken due to phase.
  if (rule.has_phase()) {
      if (rule.phase() != state.phase()) {
        LOG_IF(INFO, Caffe::root_solver())
            << "The NetState phase (" << state.phase()
            << ") differed from the phase (" << rule.phase()
            << ") specified by a rule in layer " << layer_name;
        return false;
      }
  }
  // Check whether the rule is broken due to min level.
  if (rule.has_min_level()) {
    if (state.level() < rule.min_level()) {
      LOG_IF(INFO, Caffe::root_solver())
          << "The NetState level (" << state.level()
          << ") is above the min_level (" << rule.min_level()
          << ") specified by a rule in layer " << layer_name;
      return false;
    }
  }
  // Check whether the rule is broken due to max level.
  if (rule.has_max_level()) {
    if (state.level() > rule.max_level()) {
      LOG_IF(INFO, Caffe::root_solver())
          << "The NetState level (" << state.level()
          << ") is above the max_level (" << rule.max_level()
          << ") specified by a rule in layer " << layer_name;
      return false;
    }
  }
  // Check whether the rule is broken due to stage. The NetState must
  // contain ALL of the rule's stages to meet it.
  for (int i = 0; i < rule.stage_size(); ++i) {
    // Check that the NetState contains the rule's ith stage.
    bool has_stage = false;
    for (int j = 0; !has_stage && j < state.stage_size(); ++j) {
      if (rule.stage(i) == state.stage(j)) { has_stage = true; }
    }
    if (!has_stage) {
      LOG_IF(INFO, Caffe::root_solver())
          << "The NetState did not contain stage '" << rule.stage(i)
          << "' specified by a rule in layer " << layer_name;
      return false;
    }
  }
  // Check whether the rule is broken due to not_stage. The NetState must
  // contain NONE of the rule's not_stages to meet it.
  for (int i = 0; i < rule.not_stage_size(); ++i) {
    // Check that the NetState contains the rule's ith not_stage.
    bool has_stage = false;
    for (int j = 0; !has_stage && j < state.stage_size(); ++j) {
      if (rule.not_stage(i) == state.stage(j)) { has_stage = true; }
    }
    if (has_stage) {
      LOG_IF(INFO, Caffe::root_solver())
          << "The NetState contained a not_stage '" << rule.not_stage(i)
          << "' specified by a rule in layer " << layer_name;
      return false;
    }
  }
  return true;
}

附1::AppendBottom
// Helper for Net::Init: add a new bottom blob to the net.
template <typename Dtype>
int Net<Dtype>::AppendBottom(const NetParameter& param, const int layer_id,
    const int bottom_id, set<string>* available_blobs,
    map<string, int>* blob_name_to_idx) {
  const LayerParameter& layer_param = param.layer(layer_id);
  const string& blob_name = layer_param.bottom(bottom_id);
  if (available_blobs->find(blob_name) == available_blobs->end()) {
    LOG(FATAL) << "Unknown bottom blob '" << blob_name << "' (layer '"
               << layer_param.name() << "', bottom index " << bottom_id << ")";
  }
  const int blob_id = (*blob_name_to_idx)[blob_name];
  LOG_IF(INFO, Caffe::root_solver())
      << layer_names_[layer_id] << " <- " << blob_name;
  bottom_vecs_[layer_id].push_back(blobs_[blob_id].get());
  //调用shared_ptr类的get()方法提取存储在blobs_中的中间变量 
  bottom_id_vecs_[layer_id].push_back(blob_id);
  available_blobs->erase(blob_name);
  bool need_backward = blob_need_backward_[blob_id];
  // Check if the backpropagation on bottom_id should be skipped
  if (layer_param.propagate_down_size() > 0) {
    need_backward = layer_param.propagate_down(bottom_id);
    ////propagate_down为true,则表示参与BP;否则,skip bp 
  }  
  bottom_need_backward_[layer_id].push_back(need_backward);
  return blob_id;
}

附2:AppendTop
// Helper for Net::Init: add a new top blob to the net.
template <typename Dtype>
void Net<Dtype>::AppendTop(const NetParameter& param, const int layer_id,
                           const int top_id, set<string>* available_blobs,
                           map<string, int>* blob_name_to_idx) {
  shared_ptr<LayerParameter> layer_param( 
      new LayerParameter(param.layer(layer_id)));
      //param.layer(layer_id),第layer_id层的layer参数
  const string& blob_name = (layer_param->top_size() > top_id) ?
      layer_param->top(top_id) : "(automatic)";
  // Check if we are doing in-place computation
  if (blob_name_to_idx && layer_param->bottom_size() > top_id &&
      blob_name == layer_param->bottom(top_id)) {
    // In-place computation
    LOG_IF(INFO, Caffe::root_solver())
        << layer_param->name() << " -> " << blob_name << " (in-place)";
    top_vecs_[layer_id].push_back(blobs_[(*blob_name_to_idx)[blob_name]].get());
    top_id_vecs_[layer_id].push_back((*blob_name_to_idx)[blob_name]);
  } else if (blob_name_to_idx &&
             blob_name_to_idx->find(blob_name) != blob_name_to_idx->end()) {
    // If we are not doing in-place computation but have duplicated blobs,
    // raise an error.
    LOG(FATAL) << "Top blob '" << blob_name
               << "' produced by multiple sources.";
  } else {
    // Normal output.
    if (Caffe::root_solver()) {
      LOG(INFO) << layer_param->name() << " -> " << blob_name;
      //这里layer_param->name()指的是层的名字,blob_name指的是top或bottom的名字
    }
    shared_ptr<Blob<Dtype> > blob_pointer(new Blob<Dtype>());
    //构造函数 new一个bolb_pointer
    const int blob_id = blobs_.size();
    blobs_.push_back(blob_pointer);
    //blobs_是一个向量,值为vector of length 0, capacity 0
    //在其尾部插入blob_pointer值为vector of length 1, capacity 1 = {{px =
    //0x6af420, pn = {pi_ = 0x6af480}}}
    //感觉一开始的blibs_就是一个向量,里面储存的是可以0指向blob的的只能指针,然后将指向
    //blob_pointer的指针赋给了它
    blob_names_.push_back(blob_name);
    blob_need_backward_.push_back(false);
    if (blob_name_to_idx) { (*blob_name_to_idx)[blob_name] = blob_id; }
    //*blob_name_to_idx= std::map with 1 elements = {["data"] = 0}
/* blob_name_to_idx是一个局部变量,其实它是在当前layer的top blob 和下一层的bottom blob间起着一个桥梁作用。 blob_name_to_idx中元素的pair是从网络最开始一层一层搭建的过程中压入map的,其中的name和id都是不重复的。name是关键字——不重复是map数据结构的必然要求,id也是不重复的——0,1,2... blob_name_to_idx和blobs_一样,在"Normal output"的情形下,每次遍历到一个top blob的时候都会更新 参考 http://www.itdaan.com/blog/2016/03/26/726330.html */
    /// top_vecs stores the vectors containing the output for each layer
    //vector<vector<Blob<Dtype>*> > top_vecs_;
    //vector<vector<int> > top_id_vecs_;
    top_id_vecs_[layer_id].push_back(blob_id);
    top_vecs_[layer_id].push_back(blob_pointer.get());
  }
  if (available_blobs) { available_blobs->insert(blob_name); }
}
/* 总结:AppendTop主要干了以下几件事: 1.new了bolb类的指针; 2.将blob的指针,名字等压入blobs; 3.更新map类型的blob_name_to_idx以及set类型的available_blobs; 现在只是一个初始化过程,还没有进行 数据的处理,现在只是搭框架。 */3:

AppendParam函数
    template <typename Dtype>  
    void Net<Dtype>::AppendParam(const NetParameter& param, const int layer_id,  
                                 const int param_id) {  
      const LayerParameter& layer_param = layers_[layer_id]->layer_param();//模板类Layer的layer_param方法,返回Layerparameter类型成员 
      const int param_size = layer_param.param_size();  
      string param_name =  
          (param_size > param_id) ? layer_param.param(param_id).name() : "";  
      if (param_name.size()) {  
        param_display_names_.push_back(param_name);//vector<string> param_display_names_ 这里param_name获取的是PaParamSpec类型中的name成员,如果有name且非空,就把name压入该向量,否则就压入param_id 
      } else {  
        ostringstream param_display_name;  
        param_display_name << param_id;  
        param_display_names_.push_back(param_display_name.str());  
      }  
      //Append 参数blob 每一次循环,net_param_id和param_id_vecs_都会更新 
      const int net_param_id = params_.size();//vector<shared_ptr<Blob<Dtype> > > params_--->The parameters in the network,整个网络的参数的id,!!!不管这个参数有没有non-emty name,是否参与share!!! 
      params_.push_back(layers_[layer_id]->blobs()[param_id]);//将当前layer当前"参数blob"压入params_ --->vector<shared_ptr<Blob<Dtype> > > params_ 
      param_id_vecs_[layer_id].push_back(net_param_id);//将整个网络的参数按层的形式来存储,存储的元素可以理解为params_这个向量的下标值(类型为整型) 
      param_layer_indices_.push_back(make_pair(layer_id, param_id));//param_layer_indices_是向量,其元素为当layer_id 与当前param_id 组成的pair.vector<pair<int, int> > param_layer_indices_ 
      //获取每个param_id所对应的Paramspec类型成员,如果param_id >= param_size 则返回default_param_spec。注意param_size <= num_param_blobs 
      ParamSpec default_param_spec;  
      const ParamSpec* param_spec = (layer_param.param_size() > param_id) ?  
          &layer_param.param(param_id) : &default_param_spec;  
      if (!param_size || !param_name.size() || (param_name.size() &&  
          param_names_index_.find(param_name) == param_names_index_.end())) {  
        // This layer "owns" this parameter blob -- it is either anonymous 
        // (i.e., not given a param_name) or explicitly given a name that we 
        // haven't already seen. 
        // 相反,如果param_name不为空,而且能够在param_names_index_中找到,说明这个parameter已经存在于之前的某个或者某些网络层里,说明这个parameter是共享于多个layer 
        // 在caffe.proto的message ParamSpec里关于name的注释——>To share a parameter between two layers, give it a (non-empty) name, 可见,如果一个parameter是共享与多个网络层,那么它会有一个非空的name 
        param_owners_.push_back(-1);//vector<int> param_owners_ 是一个存储parameter "onwer"的一个向量 ——> -1 表示当前Layer就是该parameter的"owner" 
        //添加param_name 
        if (param_name.size()) {  
          //map<string, int> param_names_index_是整个网络的参数non-empty name与index的映射。 
          //注意,这个name是ParamSpec 类型中的name,而且,""To share a parameter between two layers, give it a (non-empty) name"",所以说这个map中存储的pair是<会被share的parameter_name, 其对应index> 
          param_names_index_[param_name] = net_param_id;//map<string, int> param_names_index_ 。虽然每一次循环,net_param_id都会更新,但是net_param_id只有当param_name.size()>0时才会被压入向量param_names_index_ 
        }  
        //添加learnable_param 
        const int learnable_param_id = learnable_params_.size();//vector<Blob<Dtype>*> learnable_params_ 
        learnable_params_.push_back(params_[net_param_id].get());//压入learnable parameter ---> 在模板类layer中,定义了一个blobs_成员,其存储的就是learnable parameter。随后压入learnable_param_id 
        learnable_param_ids_.push_back(learnable_param_id);//vector<int> learnable_param_ids_ 
        has_params_lr_.push_back(param_spec->has_lr_mult());//vector<bool> has_params_lr_ 
        has_params_decay_.push_back(param_spec->has_decay_mult());  
        params_lr_.push_back(param_spec->lr_mult());//vector<float> params_lr_ 
        params_weight_decay_.push_back(param_spec->decay_mult());  
      } else {  
        // Named param blob with name we've seen before: share params 
        const int owner_net_param_id = param_names_index_[param_name];//因为"To share a parameter between two layers, give it a (non-empty) name",所以这句代码就是获取shared parameter的"owner" net_param_id 
        param_owners_.push_back(owner_net_param_id);//vector<int> param_owners_ 
        const pair<int, int>& owner_index =  
            param_layer_indices_[owner_net_param_id];//只获取了那些shared的parameter,即具有non-empty name的parameter的pair<layer_id, param_id> 
        const int owner_layer_id = owner_index.first;  
        const int owner_param_id = owner_index.second;  
        LOG_IF(INFO, Caffe::root_solver()) << "Sharing parameters '" << param_name  
            << "' owned by "  
            << "layer '" << layer_names_[owner_layer_id] << "', param "  
            << "index " << owner_param_id;  
        Blob<Dtype>* this_blob = layers_[layer_id]->blobs()[param_id].get();//获取当前层的当前参数Blob 
        Blob<Dtype>* owner_blob =  
            layers_[owner_layer_id]->blobs()[owner_param_id].get();//获取owner layer的对应的参数blob 
        const int param_size = layer_param.param_size();  
        if (param_size > param_id && (layer_param.param(param_id).share_mode() ==  
                                      ParamSpec_DimCheckMode_PERMISSIVE)) {  
          // Permissive dimension checking -- only check counts are the same. 
          CHECK_EQ(this_blob->count(), owner_blob->count())  
              << "Cannot share param '" << param_name << "' owned by layer '"  
              << layer_names_[owner_layer_id] << "' with layer '"  
              << layer_names_[layer_id] << "'; count mismatch. Owner layer param "  
              << "shape is " << owner_blob->shape_string() << "; sharing layer "  
              << "shape is " << this_blob->shape_string();  
        } else {  
          // Strict dimension checking -- all dims must be the same. 
          CHECK(this_blob->shape() == owner_blob->shape())  
              << "Cannot share param '" << param_name << "' owned by layer '"  
              << layer_names_[owner_layer_id] << "' with layer '"  
              << layer_names_[layer_id] << "'; shape mismatch. Owner layer param "  
              << "shape is " << owner_blob->shape_string() << "; sharing layer "  
              << "expects shape " << this_blob->shape_string();  
        }  
        //获取owner layer的learnable_param_id,并且压入当前layer的向量learnable_param_ids_。 
        //而且在这里也没有把参数blob压入learnable_params_向量(只是将id压入learnable_param_ids_),从而避免当前layer与sharing layer之间关于shared parameter blob 的重复 
        const int learnable_param_id = learnable_param_ids_[owner_net_param_id];//vector<int> learnable_param_ids_ ; vector<float> params_lr_; 
        learnable_param_ids_.push_back(learnable_param_id);  
        if (param_spec->has_lr_mult()) {  
          if (has_params_lr_[learnable_param_id]) {  
            CHECK_EQ(param_spec->lr_mult(), params_lr_[learnable_param_id])  
                << "Shared param '" << param_name << "' has mismatched lr_mult.";  
          } else {  
            has_params_lr_[learnable_param_id] = true;  
            params_lr_[learnable_param_id] = param_spec->lr_mult();  
          }  
        }  
        if (param_spec->has_decay_mult()) {  
          if (has_params_decay_[learnable_param_id]) {  
            CHECK_EQ(param_spec->decay_mult(),  
                     params_weight_decay_[learnable_param_id])  
                << "Shared param '" << param_name << "' has mismatched decay_mult.";  
          } else {  
            has_params_decay_[learnable_param_id] = true;  
            params_weight_decay_[learnable_param_id] = param_spec->decay_mult();  
          }  
        }  
      }  
    }  
    ps:借鉴的这个网址http://blog.csdn.net/iamzhangzhuping/article/details/505372404:
I0509 15:23:17.999642  6356 layer_factory.hpp:77] Creating layer mnist
[New Thread 0x7ffff0bc6700 (LWP 6357)]
I0509 15:23:18.007805  6356 net.cpp:91] Creating Layer mnist
I0509 15:23:18.007853  6357 db_lmdb.cpp:38] Opened lmdb examples/mnist/mnist_train_lmdb
I0509 15:23:18.007879  6356 net.cpp:399] mnist -> data
I0509 15:23:18.008003  6356 net.cpp:399] mnist -> label
I0509 15:23:18.008141  6356 data_layer.cpp:41] output data size: 64,1,28,28
I0509 15:23:18.008430  6356 base_data_layer.cpp:69] Initializing prefetch
[New Thread 0x7effebfff700 (LWP 6358)]
I0509 15:23:18.009194  6356 base_data_layer.cpp:72] Prefetch initialized.
I0509 15:23:18.009217  6356 net.cpp:141] Setting up mnist
I0509 15:23:18.009263  6356 net.cpp:148] Top shape: 64 1 28 28 (50176)
I0509 15:23:18.009282  6356 net.cpp:148] Top shape: 64 (64)
I0509 15:23:18.009294  6356 net.cpp:156] Memory required for data: 200960
I0509 15:23:18.009320  6356 layer_factory.hpp:77] Creating layer conv1
I0509 15:23:18.009393  6356 net.cpp:91] Creating Layer conv1
I0509 15:23:18.009428  6356 net.cpp:425] conv1 <- data
I0509 15:23:18.009490  6356 net.cpp:399] conv1 -> conv1
I0509 15:23:18.009726  6356 net.cpp:141] Setting up conv1
I0509 15:23:18.009752  6356 net.cpp:148] Top shape: 64 20 24 24 (737280)
I0509 15:23:18.009764  6356 net.cpp:156] Memory required for data: 3150080
I0509 15:23:18.009879  6356 layer_factory.hpp:77] Creating layer pool1
I0509 15:23:18.009918  6356 net.cpp:91] Creating Layer pool1
I0509 15:23:18.009935  6356 net.cpp:425] pool1 <- conv1
I0509 15:23:18.009965  6356 net.cpp:399] pool1 -> pool1
I0509 15:23:18.010017  6356 net.cpp:141] Setting up pool1
I0509 15:23:18.010040  6356 net.cpp:148] Top shape: 64 20 12 12 (184320)
I0509 15:23:18.010063  6356 net.cpp:156] Memory required for data: 3887360
I0509 15:23:18.010081  6356 layer_factory.hpp:77] Creating layer conv2
I0509 15:23:18.010113  6356 net.cpp:91] Creating Layer conv2
I0509 15:23:18.010128  6356 net.cpp:425] conv2 <- pool1
I0509 15:23:18.010161  6356 net.cpp:399] conv2 -> conv2
I0509 15:23:18.010467  6358 data_layer.cpp:102] Prefetch batch: 1 ms.
I0509 15:23:18.010498  6358 data_layer.cpp:103]      Read time: 0.112 ms.
I0509 15:23:18.010507  6358 data_layer.cpp:104] Transform time: 0.714 ms.
I0509 15:23:18.011415  6358 data_layer.cpp:102] Prefetch batch: 0 ms.
I0509 15:23:18.011430  6358 data_layer.cpp:103]      Read time: 0.076 ms.
I0509 15:23:18.011437  6358 data_layer.cpp:104] Transform time: 0.565 ms.
I0509 15:23:18.011806  6356 net.cpp:141] Setting up conv2
I0509 15:23:18.011836  6356 net.cpp:148] Top shape: 64 50 8 8 (204800)
I0509 15:23:18.011848  6356 net.cpp:156] Memory required for data: 4706560
I0509 15:23:18.011881  6356 layer_factory.hpp:77] Creating layer pool2
I0509 15:23:18.011915  6356 net.cpp:91] Creating Layer pool2
I0509 15:23:18.011934  6356 net.cpp:425] pool2 <- conv2
I0509 15:23:18.011976  6356 net.cpp:399] pool2 -> pool2
I0509 15:23:18.012018  6356 net.cpp:141] Setting up pool2
I0509 15:23:18.012035  6356 net.cpp:148] Top shape: 64 50 4 4 (51200)
I0509 15:23:18.012043  6356 net.cpp:156] Memory required for data: 4911360
I0509 15:23:18.012054  6356 layer_factory.hpp:77] Creating layer ip1
I0509 15:23:18.012079  6356 net.cpp:91] Creating Layer ip1
I0509 15:23:18.012122  6356 net.cpp:425] ip1 <- pool2
I0509 15:23:18.012156  6356 net.cpp:399] ip1 -> ip1
I0509 15:23:18.012449  6358 data_layer.cpp:102] Prefetch batch: 0 ms.
I0509 15:23:18.012465  6358 data_layer.cpp:103]      Read time: 0.099 ms.
I0509 15:23:18.012475  6358 data_layer.cpp:104] Transform time: 0.595 ms.
I0509 15:23:18.035526  6356 net.cpp:141] Setting up ip1
I0509 15:23:18.035575  6356 net.cpp:148] Top shape: 64 500 (32000)
I0509 15:23:18.035583  6356 net.cpp:156] Memory required for data: 5039360
I0509 15:23:18.035614  6356 layer_factory.hpp:77] Creating layer relu1
I0509 15:23:18.035656  6356 net.cpp:91] Creating Layer relu1
I0509 15:23:18.035681  6356 net.cpp:425] relu1 <- ip1
I0509 15:23:18.035698  6356 net.cpp:386] relu1 -> ip1 (in-place)
I0509 15:23:18.035717  6356 net.cpp:141] Setting up relu1
I0509 15:23:18.035727  6356 net.cpp:148] Top shape: 64 500 (32000)
I0509 15:23:18.035732  6356 net.cpp:156] Memory required for data: 5167360
I0509 15:23:18.035739  6356 layer_factory.hpp:77] Creating layer ip2
I0509 15:23:18.035755  6356 net.cpp:91] Creating Layer ip2
I0509 15:23:18.035764  6356 net.cpp:425] ip2 <- ip1
I0509 15:23:18.035806  6356 net.cpp:399] ip2 -> ip2
I0509 15:23:18.036211  6356 net.cpp:141] Setting up ip2
I0509 15:23:18.036257  6356 net.cpp:148] Top shape: 64 10 (640)
I0509 15:23:18.036262  6356 net.cpp:156] Memory required for data: 5169920
I0509 15:23:18.036274  6356 layer_factory.hpp:77] Creating layer loss
I0509 15:23:18.036298  6356 net.cpp:91] Creating Layer loss
I0509 15:23:18.036308  6356 net.cpp:425] loss <- ip2
I0509 15:23:18.036320  6356 net.cpp:425] loss <- label
I0509 15:23:18.036336  6356 net.cpp:399] loss -> loss
I0509 15:23:18.036363  6356 layer_factory.hpp:77] Creating layer loss
I0509 15:23:18.036408  6356 net.cpp:141] Setting up loss
I0509 15:23:18.036420  6356 net.cpp:148] Top shape: (1)
I0509 15:23:18.036427  6356 net.cpp:151]     with loss weight 1
I0509 15:23:18.036437  6356 net.cpp:156] Memory required for data: 5169924

你可能感兴趣的:(caffe)