注意:【实现Jacobi(雅可比)迭代可以用MPI实现,也可以用CUDA实现,想使用CUDA实现Jacobi的同学,请参考这里,
说明:
MPI采用 对等模式实现雅可比并行计算,而不是一对多模式,所以需要将数据集划分为几个长度相等的子数据集,每一个进程单独负责处理一个数据集即可,这样进程与进程之间具有一定的独立性;
可以采用行划分,也可以采用列划分
注意,一旦使用MPI,则在程序开始的那一刻,你每声明的一个变量,数组等在每一个进程中都存在,且变量名相同,所以你使用MPI的时候,每一个进程的数据矩阵的大小应该是 总数据集的[维度 / 进程数, 维度] ,但是因为需要接收边界值,所以真实的数组大小是[维度 / 进程数 + 2, 维度 + 2]。
这里我说一下为什么会产生死锁
如果有两个进程处理数据总集,但是他们都是先执行接收操作,而不是发送操作,那么两个进程会因都收不到数据而一致等待,如果没有外部干预,会一直等待下去
#include
#include
#include
#include
// determine the size of the array needing itering
#define N 100
// define the num of processes
#define X 4
// know the size of the array each process has
#define myarysize N / X
using namespace std;
int main(int argc, char* argv[])
{
int n, myid, numprocs;
int up, down;
int opt;
int i, j;
// define the num of iterations
int nsteps = 1000;
double prev[myarysize + 2][N + 2], curr[myarysize + 2][N + 2], gather_arr[N][N + 2], temp[myarysize][N + 2];
MPI_Datatype onerow;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
// initialize all arrays
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N + 2; j++)
{
if (i < myarysize)
temp[i][j] = 0;
gather_arr[i][j] = 0;
}
}
for (int i = 0; i < myarysize + 2; i++)
{
for (int j = 0; j < N + 2; j++)
{
prev[i][j] = 20.00;
}
}
if (myid == 0)
{
for (int j = 0; j < N + 2; j++)
{
// 将第一行的大约三分之一到三分之二之间的位置设置为100,其他设置为20
if (j > 0.3 * (N + 1) && j < 0.7 * (N + 1))
{
prev[0][j] = 100.00;
}
}
}
MPI_Type_contiguous(N + 2, MPI_DOUBLE, &onerow);
MPI_Type_commit(&onerow);
int begin_row = 1;
int end_row = myarysize;
for (n = 0; n < nsteps; n++)
{
if (myid < 3) {
MPI_Send(&prev[myarysize][0], 1, onerow,
myid + 1, 1000, MPI_COMM_WORLD);
MPI_Recv(&prev[myarysize + 1][0], 1, onerow,
myid + 1, 1000, MPI_COMM_WORLD, &status);
}
if (myid > 0) {
MPI_Send(&prev[1][0], 1, onerow,
myid - 1, 1000, MPI_COMM_WORLD);
MPI_Recv(&prev[0][0], 1, onerow,
myid - 1, 1000, MPI_COMM_WORLD, &status);
}
for (i = begin_row; i <= end_row; i++)
{
for (j = 1; j < N + 1; j++)
{
curr[i][j] = (prev[i][j + 1] + prev[i][j - 1] + prev[i - 1][j] + prev[i + 1][j]) * 0.25;
}
}
for (i = begin_row; i <= end_row; i++)
{
for (j = 1; j < N + 1; j++)
{
prev[i][j] = curr[i][j];
}
}
}/* 迭代结束*/
// 用一个临时数组存放这些更新完的数组(去掉首行和末行,因为这两行是边界)
for (int i = begin_row; i <= end_row; i++)
{
for (int j = 0; j < N + 2; j++)
{
temp[i - 1][j] = prev[i][j];
}
}
MPI_Barrier(MPI_COMM_WORLD);
MPI_Gather(&temp, myarysize * (N + 2), MPI_DOUBLE, &gather_arr, myarysize * (N + 2), MPI_DOUBLE, 0, MPI_COMM_WORLD);//需要单独在外面
MPI_Type_free(&onerow);
MPI_Finalize();
if (myid == 0)
{
// output the result to the csv file
std::ofstream myfile;
myfile.open("finalTemperatures.csv");
for (int p = 0; p < N + 2; p++)
{
for (int i = 0; i < N + 2; i++)
{
std::ostringstream out;
out.precision(8);
if (p == 0)
{
if (i > 0.3 * (N + 1) && i < 0.7 * (N + 1))
myfile << "100" << ", ";
else
myfile << "20" << ", ";
}
else if (p == N + 1)
myfile << "20" << ", ";
else
{
out << gather_arr[p - 1][i];
std::string str = out.str(); //从流中取出字符串 数值现在存储在str中,等待格式化。
myfile << str << ", ";
}
}
myfile << "\n";
}
myfile.close();
}
return 1;
}
module load gcc mvapich2(这表示加载相应模块,如果没有请下载)
mpic++ Lab6_MPI.cpp -o a.out
mpirun -np 16 ./a.out -N 256 -I 10000( -np 16表示分配的处理器的数量是16个)
#include
#include
#include
#include
// determine the size of the array needing itering
#define N 100
// define the num of processes
#define X 4
// know the size of the array each process has
#define myarysize N / X
using namespace std;
int main(int argc, char* argv[])
{
int n, myid, numprocs ;
int up, down;
int opt;
int i, j;
// define the num of iterations
int nsteps = 1000;
char optstring[] = "N:I:"; // 分别代表-N, -I命令行参数,其中带:的表示参数可以指定值
double prev[myarysize + 2][N + 2], curr[myarysize + 2][N + 2], gather_arr[N][N + 2], temp[myarysize][N + 2];
MPI_Datatype onerow;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
// initialize all arrays
for (int i = 0; i < N; i++)
{
for (int j = 0; j < N + 2; j++)
{
if (i < myarysize)
temp[i][j] = 0;
gather_arr[i][j] = 0;
}
}
for (int i = 0; i < myarysize + 2; i++)
{
for (int j = 0; j < N + 2; j++)
{
prev[i][j] = 20.00;
}
}
if (myid == 0)
{
for (int j = 0; j < N + 2; j++)
{
// 将第一行的大约三分之一到三分之二之间的位置设置为100,其他设置为20
if (j > 0.3 * (N + 1) && j < 0.7 * (N + 1))
{
prev[0][j] = 100.00;
}
}
}
MPI_Type_contiguous(N + 2, MPI_DOUBLE, &onerow);
MPI_Type_commit(&onerow);
up = myid - 1;
if (up < 0) up = MPI_PROC_NULL;
down = myid + 1;
if (down > 3) down = MPI_PROC_NULL;
int begin_row = 1;
int end_row = myarysize;
for (n = 0; n < nsteps; n++)
{
MPI_Sendrecv(&prev[1][0], 1, onerow, up, 1000, &prev[myarysize + 1][0], 1, onerow, down, 1000, MPI_COMM_WORLD, &status);
MPI_Sendrecv(&prev[myarysize][0], 1, onerow, down, 1000, &prev[0][0], 1, onerow, up, 1000, MPI_COMM_WORLD, &status);
for (i = begin_row; i <= end_row; i++)
{
for (j = 1; j < N + 1; j++)
{
curr[i][j] = (prev[i][j + 1] + prev[i][j - 1] + prev[i - 1][j] + prev[i + 1][j]) * 0.25;
}
}
for (i = begin_row; i <= end_row; i++)
{
for (j = 1; j < N + 1; j++)
{
prev[i][j] = curr[i][j];
}
}
}/* 迭代结束*/
// 用一个临时数组存放这些更新完的数组(去掉首行和末行,因为这两行是边界)
for (int i = begin_row; i <= end_row; i++)
{
for (int j = 0; j < N + 2; j++)
{
temp[i - 1][j] = prev[i][j];
}
}
MPI_Barrier(MPI_COMM_WORLD);
//std::cout << "进程号为"< 0.3 * (N + 1) && i < 0.7 * (N + 1))
myfile << "100" << ", ";
else
myfile << "20" << ", ";
}
else if (p == N + 1)
myfile << "20" << ", ";
else
{
out << gather_arr[p - 1][i];
std::string str = out.str(); //从流中取出字符串 数值现在存储在str中,等待格式化。
myfile << str << ", ";
}
}
myfile << "\n";
}
myfile.close();
}
return 1;
}
module load gcc mvapich2
mpic++ Lab6_MPI.cpp -o a.out
mpirun -n 16 ./a.out -N 256 -I 10000
mpirun -np 16 ./a.out -N 256 -I 10000( -np 16表示处理器的数量,也表示进程数, -N 256表示矩阵维度为256,-I 10000表示迭代次数为10000)
#include
#include
#include
#include
#include
#include
//#include
//#define N 100
//#define myarysize N / 4
using namespace std;
int main(int argc, char* argv[])
{
// myid defines the NO of each process, while numprocs marks the total number of processors
int n, myid, numprocs ;
// the border process ranked 0 and (numproc - 1)
int up, down;
int i, j;
// determine the size of the array needing itering
int N;
// know the size of the array each process has
int myarysize ;
// define the num of iterations
int nsteps;
MPI_Datatype onerow;
MPI_Status status;
MPI_Init(&argc, &argv);
MPI_Comm_rank(MPI_COMM_WORLD, &myid);
MPI_Comm_size(MPI_COMM_WORLD, &numprocs);
// judge the args
if (argc < 5)
{
cout << "Invalid parameters, please check your values." << endl;
return -1;
}
else
{
if (atoi(argv[2]) <= 0 || atoi(argv[4]) <= 0)
{
cout << "Invalid parameters, please check your values." << endl;
return 0;
}
if (atoi(argv[2]) > 256 || atoi(argv[4]) > 10000)
{
cout << " the first param is up to 256 and the second up to 10000." << endl;
return 0;
}
}
// initialize all arrays claimed above
N = atoi(argv[2]);
//N = 256;
//nsteps = 10000;
nsteps = atoi(argv[4]);
myarysize = N / numprocs;
vector<vector<double>> prev(myarysize + 2, vector<double>(N + 2));
vector<vector<double>> curr(myarysize + 2, vector<double>(N + 2));
// define the temp_1d designed for saving the outcome of iterations
double* temp_1d = (double*)malloc(sizeof(double) * myarysize * (N + 2));
// this 1d array is used for collecting data coming from all processes
double* gather_arr = (double*)malloc(sizeof(double) * N * (N + 2));
for (size_t i = 0; i < N * (N + 2); i++) gather_arr[i] = 20.00;
for (int i = 0; i < myarysize + 2; i++)for (int j = 0; j < N + 2; j++) prev[i][j] = 20.00;
if (myid == 0)
for (int j = 0; j < N + 2; j++)
if (j > 0.3 * (N + 1) && j < 0.7 * (N + 1))
prev[0][j] = 100.00;
// this is a kind of new datatype assembling the concrete length of data we want to transfer from process to process
MPI_Type_contiguous(N + 2, MPI_DOUBLE, &onerow);
MPI_Type_commit(&onerow);
up = myid - 1;
if (up < 0) up = MPI_PROC_NULL;// when the current process is the number 0 process, it needn't transfer data to the number -1 process,and hence set NULL_process
down = myid + 1;
if (down > (numprocs - 1)) down = MPI_PROC_NULL;// when the current process is the NO (numproc - 1) process, it needn't transfer data to the number (numproc) process,and therefore set NULL_process
int begin_row = 1;
int end_row = myarysize;
// timestamp 1 - marking the start time of iterations
//struct timeval t1, t2;
//gettimeofday(&t1, 0);
clock_t start, end;
start = clock();
for (n = 0; n < nsteps; n++)
{
MPI_Sendrecv(&prev[1][0], 1, onerow, up, 1000, &prev[myarysize + 1][0], 1, onerow, down, 1000, MPI_COMM_WORLD, &status);
MPI_Sendrecv(&prev[myarysize][0], 1, onerow, down, 1000, &prev[0][0], 1, onerow, up, 1000, MPI_COMM_WORLD, &status);
for (i = begin_row; i <= end_row; i++)
{
for (j = 1; j < N + 1; j++)
{
curr[i][j] = (prev[i][j + 1] + prev[i][j - 1] + prev[i - 1][j] + prev[i + 1][j]) * 0.25;
}
}
for (i = begin_row; i <= end_row; i++)
for (j = 1; j < N + 1; j++)
prev[i][j] = curr[i][j];
}/* iteration over*/
// use a 1d temporary array to save these data that need removing the first and the last line(because these two lines are borders used in exchange with other processes)
int k = 0;
for (int i = begin_row; i <= end_row; i++)
{
for (int j = 0; j < N + 2; j++)
{
temp_1d[k] = prev[i][j];
k++;
}
}
MPI_Barrier(MPI_COMM_WORLD);
if (myid == 0)
{
// print the time
end = clock();
double interval = (double)(end - start) / CLOCKS_PER_SEC;
// timestamp2 - marking the end of iterations
//gettimeofday(&t2, 0);
//double time = (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000.0;
printf("Time to generate: %4.2f ms \n", interval * 1000);
}
// notice : please use 2 1D arrays to gather all data from all processes, otherwise there must be an unknown error caused by pointer
MPI_Gather(temp_1d, myarysize* (N + 2), MPI_DOUBLE, gather_arr, myarysize* (N + 2), MPI_DOUBLE, 0, MPI_COMM_WORLD);
MPI_Type_free(&onerow);
MPI_Finalize();
// back to the root process
if (myid == 0)
{
// output the result to the csv file
std::ofstream myfile;
myfile.open("finalTemperatures.csv");
for (int p = 0; p < (N + 2) * (N + 2); p++)
{
std::ostringstream out;
out.precision(8);
if (p % (N + 2) == 0 && p != 0)
{
myfile << "\n" ;
}
if (p < (N + 2))
{
if (p > 0.3 * (N + 1) && p < 0.7 * (N + 1))
myfile << "100" << ", ";
else
myfile << "20" << ", ";
}
else if (p >= (N + 2) * (N + 1))
myfile << "20" << ", ";
else
{
out << gather_arr[p - (N + 2)];
std::string str = out.str(); //format
myfile << str << ", ";
}
}
myfile.close();
}
free(gather_arr);
free(temp_1d);
std::vector<vector<double>>().swap(prev);
std::vector<vector<double>>().swap(curr);
return 1;
}