首先引入对接口caffe_gemm和caffe_gemv的介绍
template<> void caffe_cpu_gemm<float>(const CBLAS_TRANSPOSE TransA, const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K, const float alpha, const float* A, const float* B, const float beta, float* C) { int lda = (TransA == CblasNoTrans) ? K : M; int ldb = (TransB == CblasNoTrans) ? N : K; cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, N); }
template <> void caffe_cpu_gemv<float>(const CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, const float* A, const float*B, const float beta, float* C)
#include <vector> #include "caffe/blob.hpp" #include "caffe/common.hpp" #include "caffe/filler.hpp" #include "caffe/layer.hpp" #include "caffe/util/math_functions.hpp" #include "caffe/vision_layers.hpp" namespace caffe { /* 输入层:(M_, N_, 1, 1); 输出层: (M_, K_, 1, 1); W矩阵:(N_,K_,1,1); b矩阵:(N_,1,1,1); */ template <typename Dtype> void InnerProductLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const int num_output = this->layer_param_.inner_product_param().num_output(); bias_term_ = this->layer_param_.inner_product_param().bias_term(); N_ = num_output; // 输出层神经元个数(特征维度) const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.inner_product_param().axis()); // Dimensions starting from "axis" are "flattened" into a single // length K_ vector. For example, if bottom[0]'s shape is (N, C, H, W), // and axis == 1, N inner products with dimension CHW are performed. K_ = bottom[0]->count(axis); // 输入层神经元个数(特征维度) // Check if we need to set up the weights if (this->blobs_.size() > 0) { LOG(INFO) << "Skipping parameter initialization"; } else { if (bias_term_) { this->blobs_.resize(2); } else { this->blobs_.resize(1); } // Intialize the weight vector<int> weight_shape(2); weight_shape[0] = N_; weight_shape[1] = K_; this->blobs_[0].reset(new Blob<Dtype>(weight_shape)); // fill the weights shared_ptr<Filler<Dtype> > weight_filler(GetFiller<Dtype>( this->layer_param_.inner_product_param().weight_filler())); weight_filler->Fill(this->blobs_[0].get()); // If necessary, intiialize and fill the bias term if (bias_term_) { vector<int> bias_shape(1, N_); this->blobs_[1].reset(new Blob<Dtype>(bias_shape)); shared_ptr<Filler<Dtype> > bias_filler(GetFiller<Dtype>( this->layer_param_.inner_product_param().bias_filler())); bias_filler->Fill(this->blobs_[1].get()); } } // parameter initialization this->param_propagate_down_.resize(this->blobs_.size(), true); } template <typename Dtype> void InnerProductLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { // Figure out the dimensions const int axis = bottom[0]->CanonicalAxisIndex( this->layer_param_.inner_product_param().axis()); const int new_K = bottom[0]->count(axis); CHECK_EQ(K_, new_K) << "Input size incompatible with inner product parameters."; // The first "axis" dimensions are independent inner products; the total // number of these is M_, the product over these dimensions. M_ = bottom[0]->count(0, axis); // M_ = batch_size // The top shape will be the bottom shape with the flattened axes dropped, // and replaced by a single axis with dimension num_output (N_). vector<int> top_shape = bottom[0]->shape(); top_shape.resize(axis + 1); top_shape[axis] = N_; top[0]->Reshape(top_shape); // Set up the bias multiplier if (bias_term_) { vector<int> bias_shape(1, M_); bias_multiplier_.Reshape(bias_shape); caffe_set(M_, Dtype(1), bias_multiplier_.mutable_cpu_data()); } } // 计算y=W'*x + b, X表示输入,y表示输出 // x为输入,维度 M_*K_ // y为输出,维度 M_*N_ // W为权重,维度 N_*K_, W_diff权重的梯度维度也为N_*K_ // b为偏置,维度 N_*1_ template <typename Dtype> void InnerProductLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) { const Dtype* bottom_data = bottom[0]->cpu_data(); Dtype* top_data = top[0]->mutable_cpu_data(); const Dtype* weight = this->blobs_[0]->cpu_data(); //caffe_cpu_gemm, C←αA×B+βC,前两个参数控制A,B是否转置 //其中A(bottom_data)维度是M_xK_,B(weight')维度是K_xN_,C(top_data)维度为M_xN_ //最终 y = X*W', 维度为 M_xN_ caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, N_, K_, (Dtype)1., bottom_data, weight, (Dtype)0., top_data); // 表示y= y + b (bias_multiplier维度为M_*1, b为1*N_(b实际上是N_*1,但是存储方式与1*N_等价, // top_data为M_*N_) // 实际是相当于将b复制成了M_*N_的矩阵,类似matlab的repmat(b, [M_, 1]),然后和top_data相加 if (bias_term_) { caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, N_, 1, (Dtype)1., bias_multiplier_.cpu_data(), this->blobs_[1]->cpu_data(), (Dtype)1., top_data); } } template <typename Dtype> void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) { if (this->param_propagate_down_[0]) { const Dtype* top_diff = top[0]->cpu_diff(); const Dtype* bottom_data = bottom[0]->cpu_data(); // Gradient with respect to weight //A(top_diff'):N_*M_, B(bottom_data):M_*K_, C(W_diff):N_*K_ //W_diff = top_diff' * bottom_data caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1., top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff()); } if (bias_term_ && this->param_propagate_down_[1]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bias // top_diff(M_*N_), bias_multiplier(M_*1), b_diff(N_1) // b_diff = top_diff' * bias_multiplier, 注意和gemm接口的区别 caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff, bias_multiplier_.cpu_data(), (Dtype)0., this->blobs_[1]->mutable_cpu_diff()); } if (propagate_down[0]) { const Dtype* top_diff = top[0]->cpu_diff(); // Gradient with respect to bottom data // A(top_diff) M_*N_ , B(weight) N_*K_, C(bottom_diff) M_*K_ // bottom_diff = top_diff * weight caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1., top_diff, this->blobs_[0]->cpu_data(), (Dtype)0., bottom[0]->mutable_cpu_diff()); } } #ifdef CPU_ONLY STUB_GPU(InnerProductLayer); #endif INSTANTIATE_CLASS(InnerProductLayer); REGISTER_LAYER_CLASS(InnerProduct); } // namespace caffe