使用blas做矩阵乘法
#define min(x,y) (((x) < (y)) ? (x) : (y)) #include#include #include #include #include //extern "C" //{ #include //} using namespace std; int main() { const enum CBLAS_ORDER Order=CblasRowMajor; const enum CBLAS_TRANSPOSE TransA=CblasNoTrans; const enum CBLAS_TRANSPOSE TransB=CblasNoTrans; const int M=4;//A的行数,C的行数 const int N=2;//B的列数,C的列数 const int K=3;//A的列数,B的行数 const float alpha=1; const float beta=0; const int lda=K;//A的列 const int ldb=N;//B的列 const int ldc=N;//C的列 const float A[M*K]={1,2,3,4,5,6,7,8,9,8,7,6}; const float B[K*N]={5,4,3,2,1,0}; float C[M*N]; cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta, C, ldc); for(int i=0;i ) { for(int j=0;j ) { cout< "\n"; } cout<<endl; } return EXIT_SUCCESS; }
g++ testblas.c++ -lopenblas -o testout
g++ testblas.c++ -lopenblas_piledriverp-r0.2.9 -o testout 本地编译openblas版本
注意library放在引用library的函数的后面
cblas_sgemm Multiplies two matrices (single-precision). void cblas_sgemm ( const enum CBLAS_ORDER Order, // Specifies row-major (C) or column-major (Fortran) data ordering. //typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A. const enum CBLAS_TRANSPOSE TransB, const int M, //Number of rows in matrices A and C. const int N,//Number of rows in matrices A and C. const int K, //Number of columns in matrix A; number of rows in matrix B const float alpha, //Scaling factor for the product of matrices A and B const float *A, const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m. stride lda, ldb and ldc (the strides) are not relevant to my problem after all, but here's an explanation of them : The elements of a matrix (i.e a 2D array) are stored contiguously in memory. However, they may be stored in either column-major or row-major fashion. The stride represents the distance in memory between elements in adjacent rows (if row-major) or in adjacent columns (if column-major). This means that the stride is usually equal to the number of rows/columns in the matrix. Matrix A = [1 2 3] [4 5 6] Row-major stores values as {1,2,3,4,5,6} Stride here is 3 Col-major stores values as {1, 4, 2, 5, 3, 6} Stride here is 2 Matrix B = [1 2 3] [4 5 6] [7 8 9] Col-major storage is {1, 4, 7, 2, 5, 8, 3, 6, 9} Stride here is 3 Read more: http://www.physicsforums.com const float *B, const int ldb, //The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m. const float beta, //Scaling factor for matrix C. float *C, const int ldc //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m. ); Thus, it calculates either C←αAB + βC or C←αBA + βC with optional use of transposed forms of A, B, or both.
typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; typedef enum CBLAS_TRANSPOSE {CblasNoTrans=111, CblasTrans=112, CblasConjTrans=113, CblasConjNoTrans=114} CBLAS_TRANSPOSE;
C=A∗BC=A∗B
CT=(A∗B)T=BT∗ATCT=(A∗B)T=BT∗AT 把A和B的顺序颠倒,可以直接得到转制矩阵乘法的结果,不用作其他变换,(结果C也是转制)。
Y←αAX + βY
cblas_sgemv
Multiplies a matrix by a vector (single precision).
void cblas_sgemv ( const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA, const int M, const int N, const float alpha, const float *A, const int lda, const float *X, const int incX, const float beta, float *Y, const int incY );
STL版本
cblas_daxpy
Computes a constant times a vector plus a vector (double-precision).
On return, the contents of vector Y are replaced with the result. The value computed is (alpha * X[i]) +
Y[i].
#include#include #include #include int main() { blasint n = 10; blasint in_x =1; blasint in_y =1; std::vector<double> x(n); std::vector<double> y(n); double alpha = 10; std::fill(x.begin(),x.end(),1.0); std::fill(y.begin(),y.end(),2.0); cblas_daxpy( n, alpha, &x[0], in_x, &y[0], in_y); //Print y for(int j=0;j ) std::cout << y[j] << "\t"; std::cout << std::endl; }
cublas
cublasStatus_t
cublasCreate(cublasHandle_t *handle)
Return Value MeaningCUBLAS_STATUS_SUCCESS the initialization succeededCUBLAS_STATUS_NOT_INITIALIZED the CUDATM Runtime initialization failedCUBLAS_STATUS_ALLOC_FAILED the resources could not be allocated
cublasStatus_tcublasDestroy(cublasHandle_t handle)
Return Value MeaningCUBLAS_STATUS_SUCCESS the shut down succeededCUBLAS_STATUS_NOT_INITIALIZED the library was not initialized
cublasStatus_t cublasSgemm(cublasHandle_t handle, // 唯一的不同:handle to the cuBLAS library context.
cublasOperation_t transa, cublasOperation_t transb int m, int n, int k, const float *alpha, const float*A, int lda, const float*B, int ldb, const float*beta, float*C, int ldc )
void cblas_sgemm ( const enum CBLAS_ORDER Order, // Specifies row-major (C) or column-major (Fortran) data ordering. //typedef enum CBLAS_ORDER {CblasRowMajor=101, CblasColMajor=102} CBLAS_ORDER; const enum CBLAS_TRANSPOSE TransA,//Specifies whether to transpose matrix A. const enum CBLAS_TRANSPOSE TransB, const int M, //Number of rows in matrices A and C. const int N,//Number of rows in matrices A and C. const int K, //Number of columns in matrix A; number of rows in matrix B const float alpha, //Scaling factor for the product of matrices A and B const float *A, const int lda, //The size of the first dimention of matrix A; if you are passing a matrix A[m][n], the value should be m. const float *B, const int ldb, //The size of the first dimention of matrix B; if you are passing a matrix B[m][n], the value should be m. const float beta, //Scaling factor for matrix C. float *C, const int ldc //The size of the first dimention of matrix C; if you are passing a matrix C[m][n], the value should be m. );