void cblas_gemm (
const enum CBLAS_ORDER Order, // Specifies row-major (C) or column-major (Fortran) data ordering.
#Matrix A =
#[1 2 3]
#[4 5 6]
#Row-major stores values as {1,2,3,4,5,6}
#Col-major stores values as {1, 4, 2, 5, 3, 6}
const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A.
const enum CBLAS_TRANSPOSE TransB,
const int M, //Number of rows in matrices A and C.
const int N,//Number of rows in matrices A and C.
const int K, //Number of columns in matrix A; number of rows in matrix B
const float alpha, //Scaling factor for the product of matrices A and B
const float *A,
const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m. stride
const float *B,
const int ldb, //The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m.
const float beta, //Scaling factor for matrix C.
float *C,
const int ldc //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m.
(2)Caffe 中使用了cblas_gemm的使用例子:
void ConvolutionLayer::Forward_cpu(const vector*>& bottom,const vector*>& top)
{ const Dtype* weight = this->blobs_[0]->cpu_data();
for (int i = 0; i < bottom.size(); ++i)
const Dtype* bottom_data = bottom[i]->cpu_data();
Dtype* top_data = top[i]->mutable_cpu_data();
for (int n = 0; n < this->num_; ++n)
this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
top_data + n * this->top_dim_);//把现在的矩阵和权重矩阵相乘,输出到top_dim,
if (this->bias_term_)
const Dtype* bias = this->blobs_[1]->cpu_data();
this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
void BaseConvolutionLayer::forward_cpu_gemm(const Dtype* input,
const Dtype* weights, Dtype* output, bool skip_im2col)
const Dtype* col_buff = input;
if (!is_1x1_) {
if (!skip_im2col)
conv_im2col_cpu(input, col_buffer_.mutable_cpu_data());
col_buff = col_buffer_.cpu_data();
for (int g = 0; g < group_; ++g)
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, conv_out_channels_ /
group_, conv_out_spatial_dim_, kernel_dim_,
(Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g,
(Dtype)0., output + output_offset_ * g);
void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
const float alpha, const float* A, const float* B, const float beta,
float* C)
int lda = (TransA == CblasNoTrans) ? K : M;
int ldb = (TransB == CblasNoTrans) ? N : K;
cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
ldb, beta, C, N);//封装模块。
二、Caffe 的最小版本:四个变量+两个函数
Struct Blob{
float *data_;//数据,向前传播使用
float *diff_;//梯度,先后传播使用
class Layer{
void forward(vector*bottom,vector*top);
Void backward(vector*bottom,vector*top);
$ sudo apt-get install libjsoncpp-dev
#cmake .
#make all
$ sudo make install
$ make all