首先,命名规范:
There are three levels of BLAS operations,
Each routine has a name which specifies the operation, the type of matrices involved and their precisions. Some of the most common operations and their names are given below,
The type of matrices are,
Each operation is defined for four precisions,
Thus, for example, the name SGEMM stands for "single-precision general matrix-matrix multiply" and ZGEMM stands for "double-precision complex matrix-matrix multiply".
因此,例如,命名为SGEMM的函数意思为“单精度普通矩阵乘法”,ZGEMM为“双精度复数矩阵乘法”。
更多的,可以参考:http://www.netlib.org/blas/blasqr.pdf
下面这个例子是在csdn论坛上看到的,简单地改一下,可用GotoBlas2调用成功。
程序说明:下面的matrix.c 文件分别调用 C 代码, BLAS Level 1 函数 (ddot), BLAS Level 2 函数(dgemv) 与 BLAS Level 3的函数(DGEMM)完成矩阵计算: Yours_multiply 是 C 源代码,它直接依赖编译器生成优化代码。Ddot_Multiply,Dgemv_multiply使用Gotoblas2调用实现部分矩阵运算。Dgemm_multiply 直接调用GotoBlas2 的矩阵计算函数。
//Simple minded matrix multiply #include <stdio.h> #include <time.h> #include <stdlib.h> #include <common.h> //GotoBlas2 #include <cblas.h> //GotoBlas2 void print_arr(int N, char * name, double* array); void init_arr(int N, double* a); void Dgemm_multiply(double* a,double* b,double* c, int N); void Dgemv_multiply(double* a,double* b,double* c, int N); void Ddot_Multiply(double* a,double* b,double* c, int N); void Yours_multiply(double* a,double* b,double* c, int N); int main(int argc, char* argv[]) { clock_t start, stop; int i, j; int N; double* a; double* b; double* c; if(argc < 2) { printf("Enter matrix size N="); //please enter small number first to ensure that the //multiplication is correct! and then you may enter //a "reasonably" large number say like 500 or even 1000 scanf("%d",&N); } else { N = atoi(argv[1]); } a=(double*) malloc( sizeof(double)*N*N ); b=(double*) malloc( sizeof(double)*N*N ); c=(double*) malloc( sizeof(double)*N*N ); init_arr(N,a); init_arr(N,b); start = clock(); Yours_multiply(a,b,c,N); stop = clock(); printf("roll_your_own_multiply(). Elapsed time = %g seconds/n", ((double)(stop - start)) / CLOCKS_PER_SEC); //print simple test case of data to be sure multiplication is correct if (N < 7) { print_arr(N,"a", a); print_arr(N,"b", b); print_arr(N,"c", c); } free(a); free(b); free(c); //DDOT multiply a=(double*) malloc( sizeof(double)*N*N ); b=(double*) malloc( sizeof(double)*N*N ); c=(double*) malloc( sizeof(double)*N*N ); init_arr(N,a); init_arr(N,b); start = clock(); Ddot_Multiply(a,b,c,N); stop = clock(); printf("Ddot_Multiply(). Elapsed time = %g seconds/n", ((double)(stop - start)) / CLOCKS_PER_SEC); //print simple test case of data to be sure multiplication is correct if (N < 7) { print_arr(N,"a", a); print_arr(N,"b", b); print_arr(N,"c", c); } free(a); free(b); free(c); //DGEMV Multiply //reallcoate to force cash to be flushed a=(double*) malloc( sizeof(double)*N*N ); b=(double*) malloc( sizeof(double)*N*N ); c=(double*) malloc( sizeof(double)*N*N ); init_arr(N,a); init_arr(N,b); start = clock(); Dgemv_multiply(a,b,c,N); stop = clock(); printf("Dgemv_multiply(). Elapsed time = %g seconds/n", ((double)(stop - start)) / CLOCKS_PER_SEC); //print simple test case of data to be sure multiplication is correct if (N < 7) { print_arr(N,"a", a); print_arr(N,"b", b); print_arr(N,"c", c); } free(a); free(b); free(c); //DGEMM Multiply //reallocate to force cash to be flushed a=(double*) malloc( sizeof(double)*N*N ); b=(double*) malloc( sizeof(double)*N*N ); c=(double*) malloc( sizeof(double)*N*N ); init_arr(N,a); init_arr(N,b); start = clock(); Dgemm_multiply(a,b,c,N); stop = clock(); printf("Dgemm_multiply(). Elapsed time = %g seconds/n", ((double)(stop - start)) / CLOCKS_PER_SEC); //print simple test case of data to be sure multiplication is correct if (N < 7) { print_arr(N,"a", a); print_arr(N,"b", b); print_arr(N,"c", c); } free(a); free(b); free(c); return 0; } //Brute force way of matrix multiply void Yours_multiply(double* a,double* b,double* c, int N) { int i, j, k; for (i=0;i<N*N;i++) c[i]=0; for (i = 0; i < N; i++) { for (j=0; j<N; j++) { for (k=0; k<N; k++) { c[N*i+j] += a[N*i+k] * b[N*k+j]; } } } } //The ddot way to matrix multiply void Ddot_Multiply(double* a,double* b,double* c, int N) { int i, j; int incx = 1; int incy = N; for (i = 0; i < N; i++) { for (j=0; j<N; j++) { c[N*i+j] = cblas_ddot(N,&a[N*i],incx,&b[j],incy); } } } //DGEMV way of matrix multiply void Dgemv_multiply(double* a,double* b,double* c, int N) { int i; double alpha = 1.0, beta = 0.; int incx = 1; int incy = N; for (i = 0; i < N; i++) { cblas_dgemv(CblasRowMajor,CblasNoTrans,N,N,alpha,a,N,&b[i],N,beta,&c[i],N); } } //DGEMM way. The PREFERED way, especially for large matrices void Dgemm_multiply(double* a,double* b,double* c, int N) { int i; double alpha = 1.0, beta = 0.; int incx = 1; int incy = N; cblas_dgemm(CblasRowMajor,CblasNoTrans,CblasNoTrans,N,N,N,alpha,b,N,a,N,beta,c,N); } //initialize array with random data void init_arr(int N, double* a) { int i,j; for (i=0; i< N;i++) { for (j=0; j<N;j++) { a[i*N+j] = (i+j+1)%10; //keep all entries less than 10. pleasing to the eye! } } } //print array to std out void print_arr(int N, char * name, double* array) { int i,j; printf("/n%s/n",name); for (i=0;i<N;i++) { for (j=0;j<N;j++) { printf("%g/t",array[N*i+j]); } printf("/n"); } }