一、矩阵运算
(1)库:cblas
void cblas_gemm (
const enum CBLAS_ORDER Order, // Specifies row-major (C) or column-major (Fortran) data ordering.
#eg:
#Matrix A =
#[1 2 3]
#[4 5 6]
#Row-major stores values as {1,2,3,4,5,6}
#Col-major stores values as {1, 4, 2, 5, 3, 6}
#设置矩阵读取方式,默认Row-major。
const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A.
const enum CBLAS_TRANSPOSE TransB,
#是否转置矩阵A,B
const int M, //Number of rows in matrices A and C.
const int N,//Number of rows in matrices A and C.
#矩阵运算的行数
const int K, //Number of columns in matrix A; number of rows in matrix B
const float alpha, //Scaling factor for the product of matrices A and B
const float *A,
const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m. stride
const float *B,
const int ldb, //The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m.
const float beta, //Scaling factor for matrix C.
float *C,
const int ldc //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m.
);
(2)Caffe 中使用了cblas_gemm的使用例子:
caffe/src/caffe/layers/cudnn_conv_layer.cpp
template
void ConvolutionLayer::Forward_cpu(const vector*>& bottom,const vector*>& top)
{ const Dtype* weight = this->blobs_[0]->cpu_data();
for (int i = 0; i < bottom.size(); ++i)
{
const Dtype* bottom_data = bottom[i]->cpu_data();
Dtype* top_data = top[i]->mutable_cpu_data();
for (int n = 0; n < this->num_; ++n)
{
this->forward_cpu_gemm(bottom_data + n * this->bottom_dim_, weight,
top_data + n * this->top_dim_);//把现在的矩阵和权重矩阵相乘,输出到top_dim,
if (this->bias_term_)
{
const Dtype* bias = this->blobs_[1]->cpu_data();
this->forward_cpu_bias(top_data + n * this->top_dim_, bias);
}
}
}
}
template
void BaseConvolutionLayer::forward_cpu_gemm(const Dtype* input,
const Dtype* weights, Dtype* output, bool skip_im2col)
{
const Dtype* col_buff = input;
if (!is_1x1_) {
if (!skip_im2col)
{
conv_im2col_cpu(input, col_buffer_.mutable_cpu_data());
}
col_buff = col_buffer_.cpu_data();
}
for (int g = 0; g < group_; ++g)
{
caffe_cpu_gemm(CblasNoTrans, CblasNoTrans, conv_out_channels_ /
group_, conv_out_spatial_dim_, kernel_dim_,
(Dtype)1., weights + weight_offset_ * g, col_buff + col_offset_ * g,
(Dtype)0., output + output_offset_ * g);
}
}
caffe/src/caffe/util/math_functions.cpp
template<>
void caffe_cpu_gemm(const CBLAS_TRANSPOSE TransA,
const CBLAS_TRANSPOSE TransB, const int M, const int N, const int K,
const float alpha, const float* A, const float* B, const float beta,
float* C)
{
int lda = (TransA == CblasNoTrans) ? K : M;
int ldb = (TransB == CblasNoTrans) ? N : K;
cblas_sgemm(CblasRowMajor, TransA, TransB, M, N, K, alpha, A, lda, B,
ldb, beta, C, N);//封装模块。
}
二、Caffe 的最小版本:四个变量+两个函数
数据结构Blob:
Struct Blob{
vectorshape_;//Blob的形状[num,channel,width,height]
float *data_;//数据,向前传播使用
float *diff_;//梯度,先后传播使用
};
层Layer:
class Layer{
public:
void forward(vector*bottom,vector*top);
Void backward(vector*bottom,vector*top);
protected:
vectorblob_;//存放学习参数,如weight,bias,一般调用0或1,全卷积中用到
}
使用课件定义的简化版的Blob、Layer和CBLAS库,尝试自己编写一个CNN框架,以LeNet的网络结构为例。
#安装json
$ sudo apt-get install libjsoncpp-dev
#安装先行运算库
#armadillo-7.500.2目录下
#cmake .
#make all
$ sudo make install
#编译MinNet
#在MiniNet/cpp目录下
$ make all