pthread多线程加速示例(大型矩阵乘法)

#include 
#include 
#include "MatrixLib.h"
#pragma comment(lib,"MatrixLib.lib")
#pragma warning(disable:4996)

void  checkResult(char* str, int value, FILE* pflog) 
{ 
    if (value != 0) 
    { 
	    fprintf(pflog, "Failed with %d at %s", value, str); 
	    exit(1); 
    } 
}

typedef struct
{
	FILE* pflog;
	double** R;
	double** A;
	double** B;
	int start_row;
	int end_row;
	int num_col;
} threadParm_t;

void *oneThread(void *param)
{
	threadParm_t *p = (threadParm_t *)param;
	fprintf(p->pflog, "# Thread  \'%.8X %.8X\'  is now running.\n", getpid());
	double** R = p->R;
	double** A = p->A;
	double** B = p->B;
	int start_row = p->start_row;
	int end_row = p->end_row;
	int num_col = p->num_col;
	double tmp;
	for (int i = start_row; i < end_row; ++i)
	{
		for (int j = 0; j < num_col; ++j)
		{
			tmp = 0;
			for (int k = 0; k < num_col; ++k)
			{
				tmp += A[i][k] * B[j][k];
			}
			R[i][j] = tmp;
		}
	}

	return NULL;
}

void OneTry(const int N, const int C,FILE* pflog)
{
	fprintf(pflog,"== %4d * %4d Matrix Multiply, %d Threads. ==\n", N, N, C);
	clock_t start = clock();
	double** X = NewSquareMatrix(N);
	double** Y = NewSquareMatrix(N);
	double** Z = NewSquareMatrix(N);
	TransformSquareMat(Z, N); // 转置一次

	int start_row = 0, end_row = 0;
	int  inc_row = N / C;
	end_row = start_row + inc_row;
	int i, rc;
	pthread_t* threads = new pthread_t[C];
	threadParm_t* tparams = new threadParm_t[C];
	for (i = 0; i < C; ++i)
	{
		tparams[i].pflog = pflog;
		tparams[i].R = X;
		tparams[i].A = Y;
		tparams[i].B = Z;
		tparams[i].num_col = N;
		tparams[i].start_row = start_row;
		tparams[i].end_row = end_row;
		start_row = end_row + 1;
		end_row += inc_row;
		rc = pthread_create(&threads[i], NULL, oneThread, & tparams[i]);
		checkResult("!! pthread_create()\n", rc,pflog);
		fprintf(pflog,"**********  %2d of %2d threads created  **********\n", i + 1,C);
	}
	fprintf(pflog,"@ Waiting for worker threads' end...\n");
	int* status = new int[C];
	for (i = 0; i < C; ++i)
	{
		rc = pthread_join(threads[i], (void**)(&status[i]));
		checkResult("!! pthread_join()\n", rc,pflog);
	}
	fprintf(pflog,"@ Check all thread's results\n");
	for (i = 0; i < C; ++i)
	{
		if (status[i] != NULL)
		{
			fprintf(pflog,"!! Unexpected thread status\n");
		}
	}
	//TransformSquareMat(Z, N); // 恢复
	SafeDeleteSquareMat(X, N);
	SafeDeleteSquareMat(Y, N);
	SafeDeleteSquareMat(Z, N);
	clock_t finish = clock();
	fprintf(pflog,"@ All finished. Total time:%.8f(sec).\n\n",(finish-start)/(1.0*CLOCKS_PER_SEC));
}

int main(int argc, char **argv)
{
	FILE* pflog = fopen("trace_log.txt", "a");
	const int N = 4096, C = 32;
	printf("Matrix N=%d,Thread C=%d, now running...", N, C);
	time_t rawtime;
	time(&rawtime);
	tm* tminfo = localtime(&rawtime);
	fprintf(pflog, "NEW LOG @%s", asctime(tminfo));
	OneTry(4096,32,pflog);
	fflush(pflog);
	fclose(pflog);
	printf("finshed!\n");
	system("pause");
	return 0;
}


cmnheader.h同之前有关pthread的文章
MatrixLib.dll是自己写的,代码如下
Matrix.h文件内容

/*
* 矩阵操作的定义
* 用于导出DLL
*/
#ifndef MATRIX_LIB_H
#define MATRIX_LIB_H

// 兼容C版本导出符号
#ifdef __cplusplus    
// 定义DLLEXPORT时启用导出
#ifdef DLLEXPORT
#define MAPI extern "C" __declspec (dllexport)
// 未定义DLLEXPORT宏时即为导入
#else  /* DLLEXPORT */
#define MAPI extern "C" __declspec (dllimport)
#endif /* DLLEXPORT  */
#else /* __cplusplus     */
#ifdef DLLIMPORT
#define MAPI  __declspec (dllexport)
#else /* DLLIMPORT */
#define MAPI  __declspec (dllimport)
#endif /* DLLIMPORT */
#endif /* __cplusplus */

#include 
#include 
#include 

MAPI double** NewSquareMatrix(const int n);
MAPI void TransformSquareMat(double** mat,const int n);
MAPI double  SafeDeleteSquareMat(double** mat, const int n);
MAPI double  SquareMatMultiply(double** R, double** A, double** B, const int n);
MAPI double  SquareMatMultiplyTrans(double** R,double** A,double** B,const int n);
MAPI double  RndInitSquareMat(double **mat, const int n);
MAPI void TraceLogInfo(char* strInfo, FILE* pflog = stdout);
MAPI void TraceLogDuration(double durationTime, FILE* pflog = stdout);

#endif /* MATRIX_LIB_H */


MatrixLib.cpp文件内容

// 启用DLLEXPORT宏定义,声明将要导出DLL
#define DLLEXPORT
#include "MatrixLib.h"

// 创建一个n阶方阵mat并分配内存,返回首地址
MAPI double **NewSquareMatrix(const int n)
{
	double** mat = new double*[n];
	for (int i = 0; i < n; ++i)
	{
		mat[i] = new double[n];
	}
	return mat;
}

// 方阵的转置
MAPI void TransformSquareMat(double** mat,const int n)
{
    double tmp;
	for(int i=0;i


日志片段

NEW LOG @Fri Apr 18 16:42:44 2014
== 4096 * 4096 Matrix Multiply, 32 Threads. ==
**********   1 of 32 threads created  **********
**********   2 of 32 threads created  **********
**********   3 of 32 threads created  **********
**********   4 of 32 threads created  **********
# Thread  '00000C48 00B728A0'  is now running.
# Thread  '00000C48 00B72B80'  is now running.
**********   5 of 32 threads created  **********
# Thread  '00000C48 00B72E60'  is now running.
# Thread  '00000C48 00B73140'  is now running.
**********   6 of 32 threads created  **********
**********   7 of 32 threads created  **********
**********   8 of 32 threads created  **********
**********   9 of 32 threads created  **********
**********  10 of 32 threads created  **********
**********  11 of 32 threads created  **********
**********  12 of 32 threads created  **********
**********  13 of 32 threads created  **********
**********  14 of 32 threads created  **********
**********  15 of 32 threads created  **********
**********  16 of 32 threads created  **********
**********  17 of 32 threads created  **********
# Thread  '00000C48 00B73640'  is now running.
**********  18 of 32 threads created  **********
**********  19 of 32 threads created  **********
**********  20 of 32 threads created  **********
**********  21 of 32 threads created  **********
**********  22 of 32 threads created  **********
**********  23 of 32 threads created  **********
**********  24 of 32 threads created  **********
**********  25 of 32 threads created  **********
**********  26 of 32 threads created  **********
**********  27 of 32 threads created  **********
**********  28 of 32 threads created  **********
**********  29 of 32 threads created  **********
**********  30 of 32 threads created  **********
**********  31 of 32 threads created  **********
**********  32 of 32 threads created  **********
@ Waiting for worker threads' end...
# Thread  '00000C48 00B73920'  is now running.
# Thread  '00000C48 00B731E8'  is now running.
# Thread  '00000C48 00B73290'  is now running.
# Thread  '00000C48 00B73338'  is now running.
# Thread  '00000C48 00B74248'  is now running.
# Thread  '00000C48 00B747F0'  is now running.
# Thread  '00000C48 00B74AD0'  is now running.
# Thread  '00000C48 00B74DB0'  is now running.
# Thread  '00000C48 00B78AB0'  is now running.
# Thread  '00000C48 00B7B8A8'  is now running.
# Thread  '00000C48 00B7BAA0'  is now running.
# Thread  '00000C48 00B7B950'  is now running.
# Thread  '00000C48 00B704C0'  is now running.
# Thread  '00000C48 00B781E0'  is now running.
# Thread  '00000C48 00B7ACD8'  is now running.
# Thread  '00000C48 00B7AED0'  is now running.
# Thread  '00000C48 00B7AE28'  is now running.
# Thread  '00000C48 00B7B170'  is now running.
# Thread  '00000C48 00B7B800'  is now running.
# Thread  '00000C48 00B7AD80'  is now running.
# Thread  '00000C48 00B7B9F8'  is now running.
# Thread  '00000C48 00B7B218'  is now running.
# Thread  '00000C48 00B7AF78'  is now running.
# Thread  '00000C48 00B7B020'  is now running.
# Thread  '00000C48 00B7B0C8'  is now running.
# Thread  '00000C48 00B7B2C0'  is now running.
# Thread  '00000C48 00B74510'  is now running.
@ Check all thread's results
@ All finished. Total time:44.18600000(sec).


你可能感兴趣的:(Parallel,CUDA,etc.)