MPI并行计算学习笔记5——矩阵相乘(cannon算法的实现)

一 . 运行环境VS2017 + MPI

二 . 算法原理可见博客https://www.cnblogs.com/chihaoyuIsnotHere/p/10553617.html

三.  使用到的MPI关键技术:虚拟笛卡尔进程拓扑

四.  源代码:头文件Matrix.h与第三篇文章的类似,只需加上+号重载

#include
#include"Matrix.h"
#include
#include
#include"mpi.h"

using namespace std;

const int N = 600;
int myid, numprocs,upRank,downRank,leftRank,rightRank,part,num;
int masterNode;
int coord[2];
double start, finish;
MPI_Comm Cart_Comm_World;

int isqrt(int x)
{
	int ipart = sqrt(x);
	if (ipart*ipart != x)
		++ipart;
	return ipart;
}

void MatrixScatter(Matrix &A)
{
	double *memoryPool = nullptr;
	
	if (coord[0] == 0 && coord[1] == 0)
	{
		Matrix tmp(N);
		tmp.ranCreate();
		//cout << tmp << endl;
		memoryPool = new double[N*N];
		
		int count = 0;
		for (int k = 0; k < part; ++k)
			for (int h = 0; h < part; ++h)
				for (int i = k * num; i <(k+1)*num; ++i)
					for (int j = h * num; j < (h+1)*num; ++j)
						memoryPool[count++] = tmp(i, j);
	}
	MPI_Scatter(memoryPool, num*num, MPI_DOUBLE, &A(0, 0), num*num, MPI_DOUBLE, masterNode, Cart_Comm_World);
	
	if (memoryPool)
		delete[]memoryPool;

}

//进行初始位移
void cannonInit(Matrix &A, Matrix &B)
{
	int x = coord[0], y = coord[1];//矩阵块的行号与列号
	MPI_Status status;

        //获得当前矩阵块横坐标方向相聚x的左右邻居
	MPI_Cart_shift(Cart_Comm_World, 1, x, &leftRank, &rightRank);
	if (leftRank != myid)
		MPI_Sendrecv_replace(&A(0, 0), num*num, MPI_DOUBLE, leftRank, 0, rightRank, 0, Cart_Comm_World, &status);
	
	MPI_Cart_shift(Cart_Comm_World, 0, y, &upRank, &downRank);
	if (upRank != myid)
		MPI_Sendrecv_replace(&B(0, 0), num*num, MPI_DOUBLE, upRank, 0, downRank, 0, Cart_Comm_World, &status);
	

}

void cannonSolver(Matrix &A,Matrix &B)
{
	MPI_Cart_shift(Cart_Comm_World, 0, 1, &upRank, &downRank);
	MPI_Cart_shift(Cart_Comm_World, 1, 1, &leftRank, &rightRank);
	Matrix C(num);
	MPI_Status status;
	for (int i = 0; i < part; ++i)
	{
		C = C + A * B;
		MPI_Sendrecv_replace(&A(0, 0), num*num, MPI_DOUBLE, leftRank, 0, rightRank, 0, Cart_Comm_World, &status);
		MPI_Sendrecv_replace(&B(0, 0), num*num, MPI_DOUBLE, upRank, 0, downRank, 0, Cart_Comm_World, &status);
	}
	
	double *memoryPool = nullptr;
	if (myid == masterNode)
		memoryPool = new double[N*N];
	MPI_Gather(&C(0, 0), num*num, MPI_DOUBLE, memoryPool, num*num, MPI_DOUBLE, masterNode, Cart_Comm_World);
	if (myid == masterNode)
	{
		Matrix result(N);
		int count = 0;
		for (int k = 0; k < part; ++k)
			for (int h = 0; h < part; ++h)
				for (int i = k * num; i < (k + 1)*num; ++i)
					for (int j = h * num; j < (h + 1)*num; ++j)
						result(i, j) = memoryPool[count++];
		//cout << result << endl;
		delete[]memoryPool;
	}
	


}

int main(int argc, char* argv[])
{
	MPI_Init(&argc, &argv);
	
	MPI_Comm_size(MPI_COMM_WORLD, &numprocs);


	if (numprocs == 1)
	{
		
		Matrix A(N), B(N);
		A.ranCreate();
		B.ranCreate();
		start = MPI_Wtime();//随机数的生成特别耗费时间,不应该计入
		Matrix result = A * B;
		//cout << A * B << endl;
		finish = MPI_Wtime();
		cout << finish - start << endl;
		MPI_Finalize();
		
		return 0;
	}

	

	part = isqrt(numprocs);//一个维度的方向上,矩阵块数;isqrt是对整型的求根,保证不错
	num = N / part;//分块矩阵的维度

        //创建虚拟笛卡尔拓扑,方便通信
	int dims[2] = { part,part };
	int periods[2] = { 1,1 };
	MPI_Cart_create(MPI_COMM_WORLD, 2, dims, periods, 1, &Cart_Comm_World);
	MPI_Comm_rank(Cart_Comm_World, &myid);
	MPI_Cart_coords(Cart_Comm_World, myid, 2, coord);
	
	int position[2] = { 0,0 };
	MPI_Cart_rank(Cart_Comm_World, position, &masterNode);

	Matrix A(num),B(num);
	MatrixScatter(A);
	
	MatrixScatter(B);
	start = MPI_Wtime();
	cannonInit(A, B);
	cannonSolver(A,B);
	finish = MPI_Wtime();
	if(myid == 0)
	   cout << finish - start << endl;

	MPI_Comm_free(&Cart_Comm_World);
	MPI_Finalize();
	return 0;
}

五.运行结果(dim = 600,cannon算法在问题规模较小的情况下仍能获得不错的加速比)

MPI并行计算学习笔记5——矩阵相乘(cannon算法的实现)_第1张图片

 注:个人PC仅有8个计算核心,因而9核模拟的效果稍差

 

你可能感兴趣的:(MPI并行计算学习笔记5——矩阵相乘(cannon算法的实现))