Caffe源码剖析3-InnerProduct层

首先引入对接口caffe_gemm和caffe_gemv的介绍


template<>
void caffe_cpu_gemm<float>(const CBLAS_TRANSPOSE TransA,
    const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
    const float alpha, const float* A, const float* B, const float beta,
    float* C) {
  int lda = (TransA == CblasNoTrans) ? K : M;
  int ldb = (TransB == CblasNoTrans) ? N : K;
  cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
      ldb, beta, C, N);
}


其中TransA表示是否对A进行转置,TransB表示是否对B进行转置。
const int M,矩阵A的行,矩阵C的行
const int N,矩阵B的列,矩阵C的列
const int K,矩阵A的列,矩阵B的行
换言之:A: M*K B: K*N C: M*N,这个还是不直观,我画了了一个图,相信你永远也不会忘记了。
Caffe源码剖析3-InnerProduct层_第1张图片

想起了什么,向量加法啊,整体就构成了一个向量加法(我自创的,求不拍)

功能: C(vetor)←alpba*AB+ beta*C. This function multiplies A * C (after transposing A, if needed) and multiplies the resulting matrix by alpha. It then multiplies vector C by beta. It stores the sum of these two products in vector C.


接口:caffe_gemv
template <>
void caffe_cpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M,
    const int N, const float alpha, const float* A, const float*B,
    const float beta, float* C) 

向量与矩阵相乘, A:M*N,  如果TransA=false, B = N*1, C = M*1, 如果TransA = true, B = M*1, C = N*1


功能: C(vetor)←alpba*AB+ beta*C. This function multiplies A * C (after transposing A, if needed) and multiplies the resulting matrix by alpha. It then multiplies vector C by beta. It stores the sum of these two products in vector C.

#include <vector>

#include "caffe/blob.hpp"
#include "caffe/common.hpp"
#include "caffe/filler.hpp"
#include "caffe/layer.hpp"
#include "caffe/util/math_functions.hpp"
#include "caffe/vision_layers.hpp"

namespace caffe {

/*
输入层:(M_, N_, 1, 1);
输出层: (M_, K_, 1, 1);
W矩阵:(N_,K_,1,1);
b矩阵:(N_,1,1,1);
*/

template <typename Dtype>
void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  const int num_output = this->layer_param_.inner_product_param().num_output();
  bias_term_ = this->layer_param_.inner_product_param().bias_term();
  N_ = num_output; // 输出层神经元个数(特征维度)
  const int axis = bottom[0]->CanonicalAxisIndex(
      this->layer_param_.inner_product_param().axis());
  // Dimensions starting from "axis" are "flattened" into a single
  // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W),
  // and axis == 1, N inner products with dimension CHW are performed.
  K_ = bottom[0]->count(axis); // 输入层神经元个数(特征维度)
  // Check if we need to set up the weights
  if (this->blobs_.size() > 0) {
    LOG(INFO) << "Skipping parameter initialization";
  } else {
    if (bias_term_) {
      this->blobs_.resize(2);
    } else {
      this->blobs_.resize(1);
    }
    // Intialize the weight
    vector<int> weight_shape(2);
    weight_shape[0] = N_;
    weight_shape[1] = K_;
    this->blobs_[0].reset(new Blob<Dtype>(weight_shape));
    // fill the weights
    shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>(
        this->layer_param_.inner_product_param().weight_filler()));
    weight_filler->Fill(this->blobs_[0].get());
    // If necessary, intiialize and fill the bias term
    if (bias_term_) {
      vector<int> bias_shape(1, N_);
      this->blobs_[1].reset(new Blob<Dtype>(bias_shape));
      shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>(
          this->layer_param_.inner_product_param().bias_filler()));
      bias_filler->Fill(this->blobs_[1].get());
    }
  }  // parameter initialization
  this->param_propagate_down_.resize(this->blobs_.size(), true);
}

template <typename Dtype>
void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
      const vector<Blob<Dtype>*>& top) {
  // Figure out the dimensions
  const int axis = bottom[0]->CanonicalAxisIndex(
      this->layer_param_.inner_product_param().axis());
  const int new_K = bottom[0]->count(axis);
  CHECK_EQ(K_, new_K)
      << "Input size incompatible with inner product parameters.";
  // The first "axis" dimensions are independent inner products; the total
  // number of these is M_, the product over these dimensions.
  M_ = bottom[0]->count(0, axis); // M_ = batch_size
  // The top shape will be the bottom shape with the flattened axes dropped,
  // and replaced by a single axis with dimension num_output (N_).
  vector<int> top_shape = bottom[0]->shape();
  top_shape.resize(axis + 1);
  top_shape[axis] = N_;
  top[0]->Reshape(top_shape);
  // Set up the bias multiplier
  if (bias_term_) {
    vector<int> bias_shape(1, M_);
    bias_multiplier_.Reshape(bias_shape);
    caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data());
  }
}

// 计算y=W'*x + b, X表示输入,y表示输出
//  x为输入,维度 M_*K_  
//  y为输出,维度 M_*N_  
//  W为权重,维度 N_*K_, W_diff权重的梯度维度也为N_*K_
//  b为偏置,维度 N_*1_

template <typename Dtype>
void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    const vector<Blob<Dtype>*>& top) {
  const Dtype* bottom_data = bottom[0]->cpu_data();
  Dtype* top_data = top[0]->mutable_cpu_data();
  const Dtype* weight = this->blobs_[0]->cpu_data();
  //caffe_cpu_gemm, C←αA×B+βC,前两个参数控制A,B是否转置  
  //其中A(bottom_data)维度是M_xK_,B(weight')维度是K_xN_,C(top_data)维度为M_xN_ 
  //最终 y = X*W', 维度为 M_xN_

  caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1.,
      bottom_data, weight, (Dtype)0., top_data);
  // 表示y= y + b (bias_multiplier维度为M_*1, b为1*N_(b实际上是N_*1,但是存储方式与1*N_等价,
  // top_data为M_*N_)
  // 实际是相当于将b复制成了M_*N_的矩阵,类似matlab的repmat(b, [M_, 1]),然后和top_data相加
  if (bias_term_) {
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1.,
        bias_multiplier_.cpu_data(),
        this->blobs_[1]->cpu_data(), (Dtype)1., top_data);
  }
}


template <typename Dtype>
void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const vector<bool>& propagate_down,
    const vector<Blob<Dtype>*>& bottom) {
  if (this->param_propagate_down_[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    const Dtype* bottom_data = bottom[0]->cpu_data();
    // Gradient with respect to weight  
    //A(top_diff'):N_*M_, B(bottom_data):M_*K_, C(W_diff):N_*K_  
    //W_diff = top_diff' * bottom_data
    caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
        top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff());
  }
  if (bias_term_ && this->param_propagate_down_[1]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    // Gradient with respect to bias
    // top_diff(M_*N_), bias_multiplier(M_*1), b_diff(N_1)
    // b_diff = top_diff' * bias_multiplier, 注意和gemm接口的区别
    caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
        bias_multiplier_.cpu_data(), (Dtype)0.,
        this->blobs_[1]->mutable_cpu_diff());
  }
  if (propagate_down[0]) {
    const Dtype* top_diff = top[0]->cpu_diff();
    // Gradient with respect to bottom data
    // A(top_diff) M_*N_ , B(weight) N_*K_, C(bottom_diff) M_*K_
    // bottom_diff = top_diff * weight
    caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1.,
        top_diff, this->blobs_[0]->cpu_data(), (Dtype)0.,
        bottom[0]->mutable_cpu_diff());
  }
}

#ifdef CPU_ONLY
STUB_GPU(InnerProductLayer);
#endif

INSTANTIATE_CLASS(InnerProductLayer);
REGISTER_LAYER_CLASS(InnerProduct);

}  // namespace caffe



你可能感兴趣的:(Caffe源码剖析3-InnerProduct层)