<span style="font-size:18px;">最近学了高性能计算这门课程,老师让用OpenMP、MPI或mapReduce写个大作业。我之前刚好在写排序,于是我就将常用的排序写了一遍并且用OpenMP进行并行,计算加速比等数据进行分析。在这篇文章中我主要介绍八大基本排序的实现原理及代码,以及对这些算法进行改进从而让它们可以并行,并且对他们的性能进行了比较。首先跟大家分享一下我的心得体会,所谓排序算法,就是通过调整元素的位置达到想要的结果。我们需要明确这个排序算法的概念,也就是排序的思想。明白这个之后,在进行算法设计是,我是这样思考的:这个排序算法有多少趟排序(这里指的是大的排序),也就是最外层的for循环有多少次;然后每一趟是从哪里开始?怎么进行元素的交换?有了这个思路之后,排序算法就好写多了,当然我是这样认为的,每个人都有不同的思维方式。对于像归并、快排这样用递归实现起来较为方便的,我认为把出栈、入栈的顺序理清,理解起来就容易多了。其实,只有自己动手实践了,发现了问题并解决了或者说虽然没有解决,但是可以问老师、同学,并且确实对算法有了更深的理解,这无疑是很有意义的。这里我都是先列举排序算法的大致思路,然后直接贴上代码,代码几乎没注释,一是算法完全可以看懂,而是希望大家在有了自己的思路基础上再去看代码或动手写代码,你会发现自己对算法的思路更加明晰了。这里我用到了openmp进行并行,如果只是想看排序算法的,可以直接把并行部分忽略。</span>
</pre><pre name="code" class="cpp">首先是一些头文件和宏定义,如下:
#include "stdlib.h" #include "iostream" #include "omp.h" #include "time.h" #include "vector" #include "stack" using namespace std; //#define random(x) (rand()%x) #define BOUNDARY 1000000000 //定义随机数产生的区间 #define MAX_NUM 1000000 //随机数组的元素个数 const double MinProb = 1.0 / (RAND_MAX + 1); //概率 typedef int KeyInt; //定义一个记录待排序的区间[low,high] typedef struct Region { int low; int high; }Region;
下面是我写的一些函数:
KeyInt* randomCreate(int N); bool happened(double probability); int myrandom(int n);//产生0~n-1之间的等概率随机数 void DisPlay(int N, KeyInt *p); KeyInt* BubbleAlgorithm(int N, KeyInt *p);//冒泡排序 KeyInt* BubbleAlgorithmParallel(int N, KeyInt *p);//奇偶排序 KeyInt* InsertSort(int N, KeyInt *p);//插入排序 KeyInt* InsertSort(int *p, int low, int high);//指定区间插入排序 vector<KeyInt> InsertSortPart(int N, KeyInt *p);//分区间插入排序 vector<KeyInt> InsertSortParallel(int N, KeyInt *p);//插入排序并行 vector<KeyInt> InsertVector(vector<KeyInt> &vec, int value); vector<KeyInt> InsertVectorSort(vector<KeyInt> &vec); KeyInt* ShellSort(int N, KeyInt *p);//希尔排序 KeyInt* ShellSortParallel(int N, KeyInt *p);//希尔排序并行 KeyInt* InsertSort(int N, KeyInt *p, int start, int inc);//指定起始点和步长进行插入排序 void MergeSort(KeyInt *p, KeyInt *temp, int l, int r);//归并排序 void MergeSort(KeyInt *p, int N);//非递归归并排序 void MergeSortParallel(KeyInt *p, KeyInt *temp, int l, int r);//2核归并排序 void MergeSortParallel(KeyInt *p, KeyInt *temp, int N);//4核归并排序 void MergeSortParallel(KeyInt *p, int N);//并行非递归归并排序 void Merge(KeyInt *p, KeyInt *temp, int l, int r);//归并 void QuickSort(KeyInt *p, int low, int high);//快排 void QuickSortAverage(KeyInt *p, int low, int high);//快排+三数取中+插入 void QuickSortSame(KeyInt *p, int low, int high);//快排+三数取中+插入+聚集相等元素 int SelectPivotMedianOfThree(int *arr, int low, int high);//三数取中 int Partition(int * a, int low, int high);//分隔 void NonRecursiveQuickSort(int *a, int len);//用栈实现快排 void QuickSortParallel(KeyInt *p, int low, int high);//2核快排 void QuickSortParallel4Core(KeyInt *p, int low, int high);//4核快排
KeyInt* BubbleAlgorithm(int N, KeyInt *p) //冒泡排序 { int i, j; KeyInt temp; //#pragma omp parallel for for (i = 0; i<N-1; i++) for (j = 0; j<N-1-i; j++) if (p[j]>p[j+1]) { temp = p[j]; p[j] = p[j+1]; p[j+1] = temp; } return (p); }冒泡排序比较简单,每趟排序从前往后依次比较相邻的两个元素,使小的在前,大的在后,经过n-1次排序即可完成。我们可以看出for循环执行了n*(n-1)/2次,所以时间复杂度是o(n^2)。另外,冒泡排序是不可以直接进行并行的,因为前面排序的结果会对后面的排序产生影响,所以我们需要对它进行改进。下面,我介绍冒泡的并行版本,奇偶排序(Odd-even Sort)。
KeyInt* BubbleAlgorithmParallel(int N, KeyInt *p) //奇偶排序Odd-even Sort { int i, j; for (i = 1; i < N; i++) { if ((i&0x1) == 1) { #pragma omp parallel for for (j = 0; j < N - 1; j += 2) { if (p[j] > p[j + 1]) { int temp = p[j]; p[j] = p[j + 1]; p[j + 1] = temp; } } } else { #pragma omp parallel for for (j = 2; j < N; j += 2) { if (p[j-1] > p[j]) { int temp = p[j-1]; p[j-1] = p[j]; p[j] = temp; } } } } return (p); }
奇偶排序是冒泡排序的并行化版本,其主要思想是奇数次排序比较奇数位和它后面一位的大小,偶数次排序比较奇数位和其前面一位的大小。这里的#pragma omp parallel for 是openmp的并行语句,表示紧跟其后的for循环开多个线程并行。如果只是
看排序算法,可以自动忽略。
上图为odd-even sort的基本方法。
奇数步中, array中奇数项array[i]与右边的item(array[i + 1])比较;
偶数步中, array中奇数项array[i]与左边的item(array[i - 1]) 比较;
奇偶排序在实际中用来并行并没有意义,因为每次循环都需要进行线程的创建和销毁,你会发现这大大影响了算法的效率,甚至开了并行后更慢了。冒泡排序只是针对小数据量的排序,比如元素个数小于一万的数组,所以奇偶排序用来并行并没有实际意义,仅有学习价值。
KeyInt* InsertSort(int N, KeyInt *p)//插入排序 { int temp; for (int i = 1; i < N; i++) { for (int j = i; (j > 0) && (p[j] < p[j - 1]); j--) { temp = p[j]; p[j] = p[j - 1]; p[j - 1] = temp; } } return p; }插入排序基本思想
KeyInt* InsertSort(int *p, int low, int high)//指定区间插入排序,即对数组p的指定位置进行插入排序 { int temp; for (int i = low+1; i <= high; i++) { for (int j = i; (j > low) && (p[j] < p[j - 1]); j--) { temp = p[j]; p[j] = p[j - 1]; p[j - 1] = temp; } } return p; } vector<KeyInt> InsertVector(vector<KeyInt> &vec, int value) { //vector类型插入排序 if (vec.size() == 0) { vec.push_back(value); return vec; } vec.push_back(value); //int temp; for (int j = vec.size()-1; j > 0; j--) { if (vec[j] < vec[j - 1]) { /*temp = vec[j]; vec[j] = vec[j - 1]; vec[j - 1] = temp;*/ swap(vec[j-1], vec[j]); } else if (vec[j] >= vec[j - 1]) break; } return vec; }
vector<KeyInt> InsertVectorSort(vector<KeyInt> &vec) { //int temp; for (int i = 1; i < vec.size(); i++) { for (int j = i; j > 0; j--) { if (vec[j] < vec[j - 1]) { /*temp = vec[j]; vec[j] = vec[j - 1]; vec[j - 1] = temp;*/ swap(vec[j - 1], vec[j]); } else if (vec[j] >= vec[j - 1]) break; } } return vec; }
vector<KeyInt> InsertSortPart(int N, KeyInt *p)//分区间插入排序 { int i; int interval = BOUNDARY / 4; vector<int> vec[4]; for (i = 0; i < N; i++) { if (p[i] < interval) vec[0].push_back(p[i]); else if (p[i] < 2 * interval) vec[1].push_back(p[i]); else if (p[i] < 3 * interval) vec[2].push_back(p[i]); else vec[3].push_back(p[i]); } int* arr0 = new int[vec[0].size()]; int* arr1 = new int[vec[1].size()]; int* arr2 = new int[vec[2].size()]; int* arr3 = new int[vec[3].size()]; for (i = 0; i < vec[0].size(); i++) arr0[i] = vec[0][i]; for (i = 0; i < vec[1].size(); i++) arr1[i] = vec[1][i]; for (i = 0; i < vec[2].size(); i++) arr2[i] = vec[2][i]; for (i = 0; i < vec[3].size(); i++) arr3[i] = vec[3][i]; arr0 = InsertSort(vec[0].size(), arr0); arr1 = InsertSort(vec[1].size(), arr1); arr2 = InsertSort(vec[2].size(), arr2); arr3 = InsertSort(vec[3].size(), arr3); vector<int> vec1[4]; for (i = 0; i < vec[0].size(); i++) vec1[0].push_back(arr0[i]); for (i = 0; i < vec[1].size(); i++) vec1[1].push_back(arr1[i]); for (i = 0; i < vec[2].size(); i++) vec1[2].push_back(arr2[i]); for (i = 0; i < vec[3].size(); i++) vec1[3].push_back(arr3[i]); vec1[0].insert(vec1[0].end(), vec1[1].begin(), vec1[1].end()); vec1[0].insert(vec1[0].end(), vec1[2].begin(), vec1[2].end()); vec1[0].insert(vec1[0].end(), vec1[3].begin(), vec1[3].end()); return vec1[0]; }
vector<KeyInt> InsertSortParallel(int N, KeyInt *p)//插入排序并行 { int i; int interval = BOUNDARY / 4; vector<int> vec[4]; //vec[0].reserve(MAX_NUM); //vec[1].reserve(MAX_NUM/2); //vec[2].reserve(MAX_NUM/2); //vec[3].reserve(MAX_NUM/2); //long start = clock(); for (i = 0; i < N; i++) { if (p[i] < interval) vec[0].push_back(p[i]); else if (p[i] < 2 * interval) vec[1].push_back(p[i]); else if (p[i] < 3 * interval) vec[2].push_back(p[i]); else vec[3].push_back(p[i]); } //long end = clock(); //printf("The time1 is:%lf\n", (double)(end - start)); //printf("%d %d %d %d\n", vec[0].size(), vec[1].size(), vec[2].size(), vec[3].size); //cout << vec[0].size() << '\n'; //cout << vec[1].size() << '\n'; //cout << vec[2].size() << '\n'; //cout << vec[3].size() << '\n'; //long start1 = clock(); int* arr0 = new int[vec[0].size()]; int* arr1 = new int[vec[1].size()]; int* arr2 = new int[vec[2].size()]; int* arr3 = new int[vec[3].size()]; for (i = 0; i < vec[0].size(); i++) arr0[i] = vec[0][i]; for (i = 0; i < vec[1].size(); i++) arr1[i] = vec[1][i]; for (i = 0; i < vec[2].size(); i++) arr2[i] = vec[2][i]; for (i = 0; i < vec[3].size(); i++) arr3[i] = vec[3][i]; omp_set_num_threads(4); #pragma omp parallel { #pragma omp sections { #pragma omp section { //InsertVectorSort(vec[0]); arr0 = InsertSort(vec[0].size(), arr0); //printf("%d\n", omp_get_thread_num()); } #pragma omp section { //InsertVectorSort(vec[1]); arr1 = InsertSort(vec[1].size(), arr1); //printf("%d\n", omp_get_thread_num()); } #pragma omp section { //InsertVectorSort(vec[2]); arr2 = InsertSort(vec[2].size(), arr2); //printf("%d\n", omp_get_thread_num()); } #pragma omp section { //InsertVectorSort(vec[3]); arr3 = InsertSort(vec[3].size(), arr3); //printf("%d\n", omp_get_thread_num()); } } } /*InsertVectorSort(vec[0]); InsertVectorSort(vec[1]); InsertVectorSort(vec[2]); InsertVectorSort(vec[3]);*/ /*arr0 = InsertSort(vec[0].size(), arr0); arr1 = InsertSort(vec[1].size(), arr1); arr2 = InsertSort(vec[2].size(), arr2); arr3 = InsertSort(vec[3].size(), arr3);*/ //long end1 = clock(); //printf("The time2 is:%lf\n", (double)(end1 - start1)); /*vec[0].clear(); vec[1].clear(); vec[2].clear(); vec[3].clear();*/ //long start2 = clock(); vector<int> vec1[4]; for (i = 0; i < vec[0].size(); i++) vec1[0].push_back(arr0[i]); for (i = 0; i < vec[1].size(); i++) vec1[1].push_back(arr1[i]); for (i = 0; i < vec[2].size(); i++) vec1[2].push_back(arr2[i]); for (i = 0; i < vec[3].size(); i++) vec1[3].push_back(arr3[i]); vec1[0].insert(vec1[0].end(), vec1[1].begin(), vec1[1].end()); vec1[0].insert(vec1[0].end(), vec1[2].begin(), vec1[2].end()); vec1[0].insert(vec1[0].end(), vec1[3].begin(), vec1[3].end()); //long end2 = clock(); //printf("The time3 is:%lf\n", (double)(end2 - start2)); return vec1[0]; /*vec[0].insert(vec[0].end(), vec[1].begin(), vec[1].end()); vec[0].insert(vec[0].end(), vec[2].begin(), vec[2].end()); vec[0].insert(vec[0].end(), vec[3].begin(), vec[3].end()); return vec[0];*/ }
KeyInt* ShellSort(int N, KeyInt *p) //希尔排序 { for (int i = N / 2; i > 2; i /= 2) { for (int j = 0; j < i; j++) { InsertSort(N, p, j, i); } } InsertSort(N, p, 0, 1); return p; } KeyInt* ShellSortParallel(int N, KeyInt *p)//希尔排序并行 { for (int i = N / 2; i > 2; i /= 2) { #pragma omp parallel for for (int j = 0; j < i; j++) { InsertSort(N, p, j, i); } } InsertSort(N, p, 0, 1); return p; } KeyInt* InsertSort(int N, KeyInt *p, int start, int inc)//指定起始点和步长进行插入排序 { int temp; for (int i = start + inc; i < N; i += inc) { for (int j = i; (j >= inc) && (p[j] < p[j - inc]); j -= inc) { int temp = p[j]; p[j] = p[j-inc]; p[j-inc] = temp; } } return p; }/*
void Merge(KeyInt *p, KeyInt *temp, int l, int r)//归并 { int mid = (l + r) / 2; int i1 = l; int i2 = mid + 1; for (int cur = l; cur <= r; cur++) { if (i1 == mid + 1) p[cur] = temp[i2++]; else if (i2 > r) p[cur] = temp[i1++]; else if (temp[i1] < temp[i2]) p[cur] = temp[i1++]; else p[cur] = temp[i2++]; } } void MergeSort(KeyInt *p, KeyInt *temp, int l, int r) //归并排序 { int mid = (l + r) / 2; if (l == r) return; MergeSort(p, temp, l, mid); MergeSort(p, temp, mid + 1, r); for (int i = l; i <= r; i++) { temp[i] = p[i]; } /*int i1 = l; int i2 = mid + 1; for (int cur = l; cur <= r; cur++) { if (i1 == mid + 1) p[cur] = temp[i2++]; else if (i2 > r) p[cur] = temp[i1++]; else if (temp[i1] < temp[i2]) p[cur] = temp[i1++]; else p[cur] = temp[i2++]; }*/ Merge(p, temp, l, r); } void MergeSort(KeyInt *p, int N)//非递归归并排序 { int i, left_min, left_max, right_min, right_max, next; int *tmp = (int*)malloc(sizeof(int) * N); for (i = 1; i < N; i *= 2) // i为步长,1,2,4,8…… { for (left_min = 0; left_min < N - i; left_min = right_max) { right_min = left_max = left_min + i; right_max = left_max + i; if (right_max > N) right_max = N; next = 0; while (left_min < left_max && right_min < right_max) tmp[next++] = p[left_min] > p[right_min] ? p[right_min++] : p[left_min++]; while (left_min < left_max) p[--right_min] = p[--left_max]; while (next > 0) p[--right_min] = tmp[--next]; } } free(tmp); }
void MergeSortParallel(KeyInt *p, int N)//并行非递归归并排序 { //int left_max, right_min, right_max, next; int *tmp = (int*)malloc(sizeof(int) * N); for (int i = 1; i < N; i *= 2) // i为步长,1,2,4,8…… { #pragma omp parallel for for (int left_min = 0; left_min < N - i; left_min += 2*i) { //int *tmp = (int*)malloc(sizeof(int) * 2*i); int temp = left_min; int right_min = temp + i; int left_max = temp + i; int right_max = left_max + i; if (right_max > N) right_max = N; //int next = 0; int next = left_min; while (temp < left_max && right_min < right_max) tmp[next++] = p[temp] > p[right_min] ? p[right_min++] : p[temp++]; while (temp < left_max) p[--right_min] = p[--left_max]; while (next > left_min) p[--right_min] = tmp[--next]; } } free(tmp); } void MergeSortParallel(KeyInt *p, KeyInt *temp, int l, int r)//2核归并排序 { int mid = (l + r) / 2; if (l == r) return; #pragma omp parallel { #pragma omp sections { #pragma omp section { //printf("%d,", omp_get_num_threads()); //printf("%d,", omp_get_thread_num()); MergeSort(p, temp, l, mid); } #pragma omp section { //printf("%d,", omp_get_num_threads()); //printf("%d,", omp_get_thread_num()); MergeSort(p, temp, mid + 1, r); } } } //MergeSort(p, temp, l, mid); //MergeSort(p, temp, mid + 1, r); //printf("%d,", omp_get_num_threads()); /*for (int i = l; i <= r; i++) { temp[i] = p[i]; } int i1 = l; int i2 = mid + 1; for (int cur = l; cur <= r; cur++) { if (i1 == mid + 1) p[cur] = temp[i2++]; else if (i2 > r) p[cur] = temp[i1++]; else if (temp[i1] < temp[i2]) p[cur] = temp[i1++]; else p[cur] = temp[i2++]; }*/ Merge(p, temp, l, r); } void MergeSortParallel(KeyInt *p, KeyInt *temp, int N)//4核归并排序 { int i; int *p1 = new int[N / 4]; int *p11 = new int[N / 4]; for (i = 0; i < N / 4; i++) p1[i] = p[i]; int *p2 = new int[N / 4]; int *p22 = new int[N / 4]; for (i = 0; i < N / 4; i++) p2[i] = p[i+N/4]; int *p3 = new int[N / 4]; int *p33 = new int[N / 4]; for (i = 0; i < N / 4; i++) p3[i] = p[i+N/4+N/4]; int *p4 = new int[N - N / 4 * 3]; int *p44 = new int[N - N / 4 * 3]; for (i = 0; i < (N - N / 4 * 3); i++) p4[i] = p[i+N/4+N/4+N/4]; #pragma omp parallel { #pragma omp sections { #pragma omp section { MergeSort(p1, p11, 0, N / 4-1); } #pragma omp section { MergeSort(p2, p22, 0, N / 4-1); } #pragma omp section { MergeSort(p3, p33, 0, N / 4 - 1); } #pragma omp section { MergeSort(p4, p44, 0, N - N / 4 * 3-1); } } } delete[] p11; delete[] p22; delete[] p33; delete[] p44; int* temp1 = new int[N / 4 + N / 4]; int* temp11 = new int[N / 4 + N / 4]; for (i = 0; i < N / 4; i++) { temp1[i] = p1[i]; temp11[i] = p1[i]; } delete[] p1; for (i = 0; i < N / 4; i++) { temp1[i + N / 4] = p2[i]; temp11[i + N / 4] = p2[i]; } delete[] p2; int* temp2 = new int[N-(N / 4 + N / 4)]; int* temp22 = new int[N - (N / 4 + N / 4)]; for (i = 0; i < N / 4; i++) { temp2[i] = p3[i]; temp22[i] = p3[i]; } delete[] p3; for (i = 0; i < (N - N / 4 * 3); i++) { temp2[i + N / 4] = p4[i]; temp22[i + N / 4] = p4[i]; } delete[] p4; Merge(temp1, temp11, 0, N / 4 + N / 4 - 1); Merge(temp2, temp22, 0, N - (N / 4 + N / 4) - 1); delete[] temp11; delete[] temp22; int* temp3 = new int[N]; int* temp33 = new int[N]; for (i = 0; i < N / 4 + N / 4; i++) { temp3[i] = temp1[i]; temp33[i] = temp1[i]; } delete[] temp1; for (i = 0; i < N - (N / 4 + N / 4); i++) { temp3[i + N / 4 + N / 4] = temp2[i]; temp33[i + N / 4 + N / 4] = temp2[i]; } delete[] temp2; Merge(temp3, temp33, 0, N-1); for (i = 0; i < N; i++) p[i] = temp3[i]; delete[] temp3; delete[] temp33; }
void QuickSort(KeyInt *p, int low, int high)//快排 { if (low >= high) { return; } int first = low; int last = high; int key = p[first];/*用字表的第一个记录作为枢轴*/ while (first < last) { while (first < last && p[last] >= key) { --last; } p[first] = p[last];/*将比第一个小的移到低端*/ while (first < last && p[first] <= key) { ++first; } p[last] = p[first]; /*将比第一个大的移到高端*/ } p[first] = key;/*枢轴记录到位*/ QuickSort(p, low, first - 1); QuickSort(p, first + 1, high); } void QuickSortAverage(KeyInt *p, int low, int high)//快排+三数取中+插入 { if (high - low + 1 < 20) { InsertSort(p, low, high); return; }//else时,正常执行快排 int first = low; int last = high; //int key = p[first];/*用字表的第一个记录作为枢轴*/ int key = SelectPivotMedianOfThree(p, low, high); while (first < last) { while (first < last && p[last] >= key) { --last; } p[first] = p[last];/*将比第一个小的移到低端*/ while (first < last && p[first] <= key) { ++first; } p[last] = p[first]; /*将比第一个大的移到高端*/ } p[first] = key;/*枢轴记录到位*/ QuickSortAverage(p, low, first - 1); QuickSortAverage(p, first + 1, high); } void QuickSortSame(KeyInt *p, int low, int high)//快排+三数取中+插入+聚集相等元素 { if (high - low + 1 < 20) { InsertSort(p, low, high); return; } int temp; int first = low; int last = high; int left = low; int right = high; int leftLen = 0; int rightLen = 0; //一次分割 int key = SelectPivotMedianOfThree(p, low, high);//使用三数取中法选择枢轴 while (low < high) { while (high > low && p[high] >= key) { if (p[high] == key)//处理相等元素 { //swap(p[right], p[high]); temp = p[right]; p[right] = p[high]; p[high] = temp; right--; rightLen++; } high--; } p[low] = p[high]; while (high > low && p[low] <= key) { if (p[low] == key) { //swap(p[left], p[low]); temp = p[left]; p[left] = p[low]; p[low] = temp; left++; leftLen++; } low++; } p[high] = p[low]; } p[low] = key; //一次快排结束 //把与枢轴key相同的元素移到枢轴最终位置周围 int i = low - 1; int j = first; while (j < left && p[i] != key) { //swap(p[i], p[j]); temp = p[i]; p[i] = p[j]; p[j] = temp; i--; j++; } i = low + 1; j = last; while (j > right && p[i] != key) { //swap(p[i], p[j]); temp = p[i]; p[i] = p[j]; p[j] = temp; i++; j--; } QuickSortSame(p, first, low - 1 - leftLen); QuickSortSame(p, low + 1 + rightLen, last); }
void QuickSortParallel(KeyInt *p, int low, int high)//2核快排 { p[0] = BOUNDARY / 2; /*for (int i = low; i <= high; i++) { if (abs(p[i] - BOUNDARY / 2) < 10) { int temp = p[i]; p[i] = p[0]; p[0] = temp; break; } }*/ int mid = Partition(p, low, high); #pragma omp parallel { #pragma omp sections { #pragma omp section { QuickSortAverage(p, low, mid-1); } #pragma omp section { QuickSortAverage(p, mid+1, high); } } } } void QuickSortParallel4Core(KeyInt *p, int low, int high)//4核快排 { p[0] = BOUNDARY / 2; /*for (int i = low; i <= high; i++) { if (abs(p[i] - BOUNDARY / 2) < 10) { int temp = p[i]; p[i] = p[0]; p[0] = temp; break; } }*/ int mid = Partition(p, low, high); p[low] = BOUNDARY / 4; int quarter1 = Partition(p, low, mid - 1); p[mid + 1] = BOUNDARY / 4 * 3; int quarter2 = Partition(p, mid + 1, high); #pragma omp parallel { #pragma omp sections { #pragma omp section { //double start1 = omp_get_wtime(); QuickSortAverage(p, low, quarter1-1); //double end1 = omp_get_wtime(); //printf("%lf\n", end1 - start1); } #pragma omp section { //double start2 = omp_get_wtime(); QuickSortAverage(p, quarter1 + 1, mid-1); //double end2 = omp_get_wtime(); //printf("%lf\n", end2 - start2); } #pragma omp section { //double start3 = omp_get_wtime(); QuickSortAverage(p, mid+1, quarter2-1); //double end3 = omp_get_wtime(); //printf("%lf\n", end3 - start3); } #pragma omp section { //double start4 = omp_get_wtime(); QuickSortAverage(p, quarter2+1, high); //double end4 = omp_get_wtime(); //printf("%lf\n", end4 - start4); } } } } /*函数作用:取待排序序列中low、mid、high三个位置上数据,选取他们中间的那个数据作为枢轴*/ int SelectPivotMedianOfThree(int *arr, int low, int high)//三数取中 { int temp; int mid = low + ((high - low) >> 1);//计算数组中间的元素的下标 //使用三数取中法选择枢轴 if (arr[mid] > arr[high])//目标: arr[mid] <= arr[high] { //swap(arr[mid], arr[high]); temp = arr[mid]; arr[mid] = arr[high]; arr[high] = temp; } if (arr[low] > arr[high])//目标: arr[low] <= arr[high] { //swap(arr[low], arr[high]); temp = arr[low]; arr[low] = arr[high]; arr[high] = temp; } if (arr[mid] > arr[low]) //目标: arr[low] >= arr[mid] { //swap(arr[mid], arr[low]); temp = arr[mid]; arr[mid] = arr[low]; arr[low] = temp; } //此时,arr[mid] <= arr[low] <= arr[high] return arr[low]; //low的位置上保存这三个位置中间的值 //分割时可以直接使用low位置的元素作为枢轴,而不用改变分割函数了 } int Partition(int * a, int low, int high)//分隔 { int pivotkey = a[low]; while (low<high) { while (low<high && a[high] >= pivotkey) --high; a[low] = a[high]; while (low<high && a[low] <= pivotkey) ++low; a[high] = a[low]; } //此时low==high a[low] = pivotkey; return low; } void NonRecursiveQuickSort(int *a, int len)//用栈实现快排 { stack<Region> regions;//定义一个栈变量 Region region; region.low = 0; region.high = len - 1; regions.push(region); while (!regions.empty()) { region = regions.top(); regions.pop(); int p = Partition(a, region.low, region.high); if (p - 1>region.low) { Region regionlow; regionlow.low = region.low; regionlow.high = p - 1; regions.push(regionlow); } if (p + 1<region.high) { Region regionhigh; regionhigh.low = p + 1; regionhigh.high = region.high; regions.push(regionhigh); } } }
KeyInt* randomCreate(int N) { int i = 0; KeyInt *p; p =(KeyInt*) malloc(N * sizeof(KeyInt)); for (i = 0; i < N; i++) p[i] = myrandom(BOUNDARY); //p[i] = random(BOUNDARY); return (p); } bool happened(double probability)//probability 0~1 { if (probability <= 0) { return false; } if (probability<MinProb) { return rand() == 0 && happened(probability*(RAND_MAX + 1)); } if (rand() <= probability*(RAND_MAX + 1)) { return true; } return false; } int myrandom(int n)//产生0~n-1之间的等概率随机数 { int t = 0; if (n <= RAND_MAX) { int R = RAND_MAX - (RAND_MAX + 1) % n;//尾数 t = rand(); while (t > R) { t = rand(); } return t % n; } else { int r = n % (RAND_MAX + 1);//余数 if (happened((double)r / n))//取到余数的概率 { return n - r + myrandom(r); } else { return rand() + myrandom(n / (RAND_MAX + 1))*(RAND_MAX + 1); } } } void DisPlay(int N, KeyInt *p) { for (int i = 0; i < 100; i++) printf("%d\n", p[i]); }
以上排序算法都是对随机函数rand()生成的随机数组进行排序的,我对各个排序的性能以及并行加速比进行了比较与分析。我们知道,rand()函数是通过线性同余法生成的伪随机数,范围是0~2^15-1(32767),这个范围对像快排这样的排序算法来说就显得较小。所以这里需要在rand()函数的基础上进行改进,就是我上面写的myrandom()函数,产生0~n-1范围内的等概率随机数。
这里我说明一下并行设计需要注意的一些问题:
1:算法能随CPU核数扩展,即CPU核数升级后不需要修改算法就可以取得加速比性能的线性增加。
2:算法能有一个较好的能耗效率,算法并不是越快越好,而是需要在速度和CPU能耗方面取得均衡,有时候为了追求效率,但是却让CPU能耗提高了许多。最好的做法是加速比能够达到一定目标的情况下尽量降低CPU能耗。也就是说不需要片面去追求将程序并行化。有些时候程序串行执行比并行执行慢不了多少,但是CPU能耗却降低了不少。
3:需要控制线程的粒度,否则线程粒度太细,频繁创建线程会导致大量的额外开销,从而使得效率大大降低。
4:在设计并行排序算法时,还要考虑内存管理的开销,由于并行算法使用了多个线程,如果内存分配和释放操作频繁的话,那么花费在这方面的开销将是非常巨大的。