MPI其实是十分简单而又强大的并行库。只是这次让我花了半天的工夫才到出了一个一个微小的BUG,让我几乎崩溃。
原程序用于计算两个矩阵相乘。分配任务时,只把第一个矩阵分解传给若干个slave,第二个矩阵全传。虽然效率不高,但我的作业需要是把固定的矩阵大小改为可变的。
源代码:
#include "mpi.h" #include <stdio.h> #include <stdlib.h> #define NRA 62 /* number of rows in matrix A */ #define NCA 15 /* number of columns in matrix A */ #define NCB 7 /* number of columns in matrix B */ #define MASTER 0 /* taskid of first task */ #define FROM_MASTER 1 /* setting a message type */ #define FROM_WORKER 2 /* setting a message type */ int main(argc,argv) int argc; char *argv[]; ...{ int numtasks, /**//* number of tasks in partition */ taskid, /**//* a task identifier */ numworkers, /**//* number of worker tasks */ source, /**//* task id of message source */ dest, /**//* task id of message destination */ mtype, /**//* message type */ rows, /**//* rows of matrix A sent to each worker */ averow, extra, offset, /**//* used to determine rows sent to each worker */ i, j, k, rc; /**//* misc */ double a[NRA][NCA], /**//* matrix A to be multiplied */ b[NCA][NCB], /**//* matrix B to be multiplied */ c[NRA][NCB]; /**//* result matrix C */ MPI_Status status; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&taskid); MPI_Comm_size(MPI_COMM_WORLD,&numtasks); if (numtasks < 2 ) ...{ printf("Need at least two MPI tasks. Quitting... "); MPI_Abort(MPI_COMM_WORLD, rc); exit(1); } numworkers = numtasks-1; /**//**************************** master task ************************************/ if (taskid == MASTER) ...{ printf("mpi_mm has started with %d tasks. ",numtasks); printf("Initializing arrays... "); for (i=0; i<NRA; i++) for (j=0; j<NCA; j++) a[i][j]= i+j; for (i=0; i<NCA; i++) for (j=0; j<NCB; j++) b[i][j]= i*j; /**//* Send matrix data to the worker tasks */ averow = NRA/numworkers; extra = NRA%numworkers; offset = 0; mtype = FROM_MASTER; for (dest=1; dest<=numworkers; dest++) ...{ rows = (dest <= extra) ? averow+1 : averow; printf("Sending %d rows to task %d offset=%d ",rows,dest,offset); MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD); MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD); MPI_Send(&a[offset][0], rows*NCA, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD); MPI_Send(&b, NCA*NCB, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD); offset = offset + rows; } /**//* Receive results from worker tasks */ mtype = FROM_WORKER; for (i=1; i<=numworkers; i++) ...{ source = i; MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&c[offset][0], rows*NCB, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status); printf("Received results from task %d ",source); } /**//* Print results */ printf("****************************************************** "); printf("Result Matrix: "); for (i=0; i<NRA; i++) ...{ printf(" "); for (j=0; j<NCB; j++) printf("%6.2f ", c[i][j]); } printf(" ****************************************************** "); printf ("Done. "); } /**//**************************** worker task ************************************/ if (taskid > MASTER) ...{ mtype = FROM_MASTER; MPI_Recv(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&a, rows*NCA, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&b, NCA*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status); for (k=0; k<NCB; k++) for (i=0; i<rows; i++) ...{ c[i][k] = 0.0; for (j=0; j<NCA; j++) c[i][k] = c[i][k] + a[i][j] * b[j][k]; } mtype = FROM_WORKER; MPI_Send(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD); MPI_Send(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD); MPI_Send(&c, rows*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD); } MPI_Finalize(); }
改过的代码(已经加了很多调试输出):
#include "mpi.h" #include <stdio.h> #include <stdlib.h> #define MASTER 0 /* taskid of first task */ #define FROM_MASTER 1 /* setting a message type */ #define FROM_WORKER 2 /* setting a message type */ int main(int argc, char *argv[]) ...{ int NRA; int NCA; int NCB; int numtasks, /**//* number of tasks in partition */ taskid, /**//* a task identifier */ numworkers, /**//* number of worker tasks */ source, /**//* task id of message source */ dest, /**//* task id of message destination */ mtype, /**//* message type */ rows, /**//* rows of matrix A sent to each worker */ averow, extra, offset, /**//* used to determine rows sent to each worker */ i, j, k, rc; /**//* misc */ double * a, /**//* matrix A to be multiplied */ *b, /**//* matrix B to be multiplied */ *c; /**//* result matrix C */ MPI_Status status; MPI_Init(&argc,&argv); MPI_Comm_rank(MPI_COMM_WORLD,&taskid); MPI_Comm_size(MPI_COMM_WORLD,&numtasks); if (numtasks < 2 ) ...{ printf("Need at least two MPI tasks. Quitting... "); MPI_Abort(MPI_COMM_WORLD, rc); exit(1); } numworkers = numtasks-1; /**//**************************** master task ************************************/ if (taskid == MASTER) ...{ scanf("%d %d %d", &NRA, &NCA, &NCB); a= new double[NRA*NCA]; b= new double[NCA*NCB]; c= new double[NRA*NCB]; for (int dest=1; dest<= numworkers; ++ dest) ...{ MPI_Send(&NRA, 1, MPI_INT, dest, FROM_MASTER, MPI_COMM_WORLD); MPI_Send(&NCA, 1, MPI_INT, dest, FROM_MASTER, MPI_COMM_WORLD); MPI_Send(&NCB, 1, MPI_INT, dest, FROM_MASTER, MPI_COMM_WORLD); } printf("mpi_mm has started with %d tasks. ",numtasks); printf("Initializing arrays... "); for (i=0; i<NRA; i++) for (j=0; j<NCA; j++) a[i*NCA+j]= i+j; for (i=0; i<NCA; i++) for (j=0; j<NCB; j++) b[i*NCB+j]= i*j; printf("****************************************************** "); printf("A Matrix: "); for (i=0; i<NRA; i++) ...{ printf(" "); for (j=0; j<NCA; j++) printf("%6.2f ", a[i*NCA+j]); } printf(" B Matrix: "); for (i=0; i<NCA; i++) ...{ printf(" "); for (j=0; j<NCB; j++) printf("%6.2f ", b[i*NCB+j]); } printf(" ****************************************************** "); /**//* Send matrix data to the worker tasks */ averow = NRA/numworkers; extra = NRA%numworkers; offset = 0; mtype = FROM_MASTER; for (dest=1; dest<=numworkers; dest++) ...{ rows = (dest <= extra) ? averow+1 : averow; printf("Sending %d rows to task %d offset=%d apos %d. ",rows,dest,offset, &(a[offset*NCA]) ); MPI_Send(&offset, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD); MPI_Send(&rows, 1, MPI_INT, dest, mtype, MPI_COMM_WORLD); MPI_Send(&(a[offset*NCA]), rows*NCA, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD); MPI_Send(&b, NCA*NCB, MPI_DOUBLE, dest, mtype, MPI_COMM_WORLD); offset = offset + rows; } /**//* Receive results from worker tasks */ mtype = FROM_WORKER; for (i=1; i<=numworkers; i++) ...{ source = i; MPI_Recv(&offset, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&rows, 1, MPI_INT, source, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&(c[offset*NCB]), rows*NCB, MPI_DOUBLE, source, mtype, MPI_COMM_WORLD, &status); printf("Received results from task %d ",source); } /**//* Print results */ printf("****************************************************** "); printf("Result Matrix: "); for (i=0; i<NRA; i++) ...{ printf(" "); for (j=0; j<NCB; j++) printf("%6.2f ", c[i*NCB+j]); } printf(" ****************************************************** "); printf ("Done. "); delete []a; delete []b; delete []c; } /**//**************************** worker task ************************************/ if (taskid > MASTER) ...{ mtype = FROM_MASTER; MPI_Recv(&NRA, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&NCA, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&NCB, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status); printf("processor %d : NRA %d, NCA %d, NCB %d. ", taskid, NRA, NCA, NCB); a= new double[NRA*NCA]; b= new double[NCA*NCB]; c= new double[NRA*NCB]; printf("a addr : %d on procs %d. ", &a, taskid); if (a==NULL || b==NULL || c==NULL) ...{ printf("Allocated error on procs %d. ", taskid); } MPI_Recv(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status); MPI_Recv(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD, &status); printf("processor %d : offset %d, rows %d. ", taskid, offset, rows); MPI_Recv(&a, rows*NCA, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status); ...{ int count; MPI_Get_count(&status, MPI_DOUBLE, &count); printf("recived %d data of a on procs %d, %d. ", count, taskid, *(a+2)); printf("a addr : %d on procs %d. ", &a, taskid); } MPI_Recv(&b, NCA*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD, &status); ...{ int count; MPI_Get_count(&status, MPI_DOUBLE, &count); printf("recived %d data of b on procs %d. ", count, taskid); } printf("******on processor %d ******************************** ", taskid); printf("A Matrix: "); for (i=0; i<NRA; i++) ...{ printf(" "); for (j=0; j<NCA; j++) printf("%6.2f ", a[i*NCA+j]); } printf(" B Matrix: "); for (i=0; i<NCA; i++) ...{ printf(" "); for (j=0; j<NCB; j++) printf("%6.2f ", b[i*NCB+j]); } printf(" ****************************************************** "); for (k=0; k<NCB; k++) for (i=0; i<rows; i++) ...{ c[i*NCB+k] = 0.0; for (j=0; j<NCA; j++) c[i*NCB+k] = c[i*NCB+k] + a[i*NCA+j] * b[j*NCB+k]; } mtype = FROM_WORKER; MPI_Send(&offset, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD); MPI_Send(&rows, 1, MPI_INT, MASTER, mtype, MPI_COMM_WORLD); MPI_Send(&c, rows*NCB, MPI_DOUBLE, MASTER, mtype, MPI_COMM_WORLD); delete []a; delete []b; delete []c; } MPI_Finalize(); }
以上程序的运行唯一结果,就是segment fault。
后在通过dbx工具在core中定位到printf("%6.2f ", a[i*NCA+j]);一句。经过分析,终于找到问题出现在由
double a[][] -> double *a = new [] 这样的转变中。
由于这样的转变,a变成了指针,因此使用Send或Recieve时,就不能再使用 &a 作为第一个参数了,而是直接使用a。
程序这样修改后,终于能正常执行了。而我也可以继续下一个作业了。