目的:
在双核处理器上用多线程将快排运行速度加倍。
硬件条件:
电脑必须是双核及以上,这样两个线程才能真正分布到两个处理器上运行。
开发环境:
VC6.0
经典的快排如下(我做了一个非常小的修改,见程序中的注释):
#include
#include
#include
#include
using namespace std;
int a[1000]={
1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
};
void quicksort(int p,int r);
int partition(int p,int r)
{
int x=a[r];
int t;
int i=p-1;
for(int j=p;j<=r-1;j++)
if(a[j]<=x)
{
i++;
t=a[i];
a[i]=a[j];
a[j]=t;
}
for(int i1=0;i1<1000;i1++)//这是和经典快排唯一的不同:两重循环只是让程序运行时间长一些,方便测出运行时间。在下去的多线程版本的快排中也是这样做的。
for(int j1=0;j1<1000;j1++)
{
}
t=a[i+1];
a[i+1]=a[r];
a[r]=t;
return i+1;
}
void quicksort(int p,int r)
{
if(p
int q = partition(p,r);
quicksort(p,q-1);
quicksort(q+1,r);
}
}
int main()
{
HANDLE hThread2;
unsigned int threadID1;
clock_t start, end,t1;
int j=0,k=0;
double time;
start=clock();
quicksort(0,999);
end=clock();
time=(double)(end-start);
cout<<" time is : "<
第一,增加了一个
unsigned __stdcall quicksort_thread(void *),它和经典版本的void quicksort(int p,int r)完全一样,之所以 要有这个函数,只是我用来作为创建的子线程的入口函数,而子线程也只是第一次把它作为入口函数会进入一次,以后会进入正常的void quicksort(int p,int r)。
第二,main函数也要改动一点。在main函数中先用partition(int p,int r)将原数据分成两部分,前一部分给子线程处理,后一部分给主线程处理(经典版是整个全部给主线程处理(虽然经典版程序员没有创建新线程,但进程本身的执行也是个线程,为了和程序员手动创建的线程区分区分,我把它叫做主线程。总之,经典版快排只有一个主线程,多线程版的有个主线程,还有个子线程))。注意,如果主线程中这次的partition(int p,int r)操作无法分为平均,也就是在中间左右,那么采用多线程的效率会大幅下降。比如说,如果分的结果是前一部分只有1个元素,剩下的全部作为第二部分给主线程处理,那么前一部分很快被子线程执行完,而子线程执行完是不会帮助主线程去一起处理第二部分的数据的,这样效率几乎没提高。
代码如下:
#include
#include
#include
#include
using namespace std;
int first_thread_p,first_thread_r;
int thread_end=0;//子线程执行完后,将这个置为1,主线程就知道子线程也执行完了,这个变量是用来同步的。
int a[1000]={
1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,534
,1,3,20,43,2,7,4,87,34,65,12,77,11,34,6,7,8,53,2,23,199,55,43,4,34,76,433,6,4,3,2,55,6,3,66,32,5,67,32,2,6,66,7,8,7,9,0,8,7,62,3,4,5,6,7,9,3,4,5,6,2,45,6,7,98,5,34,2,4,6,78,9,67,59,64,22,4,5,60,70,89,61,4,322,23,226,744,8,6,65,533,422,344,264,233,367,434,444,55,20
};
void quicksort(int p,int r);
int partition(int p,int r)
{
int x=a[r];
int t;
int i=p-1;
for(int j=p;j<=r-1;j++)
if(a[j]<=x)
{
i++;
t=a[i];
a[i]=a[j];
a[j]=t;
}
for(int i1=0;i1<1000;i1++)
for(int j1=0;j1<1000;j1++)
{
}
t=a[i+1];
a[i+1]=a[r];
a[r]=t;
return i+1;
}
unsigned __stdcall quicksort_thread(void *)
{
int p=first_thread_p;
int r=first_thread_r;
if(p
int q = partition(p,r);
quicksort(p,q-1);
quicksort(q+1,r);
}
thread_end=1;
return 0;
}
void quicksort(int p,int r)
{
if(p
int q = partition(p,r);
quicksort(p,q-1);
quicksort(q+1,r);
}
}
int main()
{
HANDLE hThread2;
unsigned int threadID1;
clock_t start, end,t;
int j=0,k=0;
double total_time;
start=clock();
t=partition(0,999);
first_thread_p=0;first_thread_r=t-1;
hThread2 = (HANDLE)_beginthreadex(NULL, 0, &quicksort_thread, NULL, 0, &threadID1);
quicksort(t+1,999);
while(thread_end==0)
{
}
end=clock();
total_time=(double)(end-start);
cout<<"total time is : "<
}
运行结果:
经典快排的运行时间是(运行了3次,得到了3组数据(事实上,还得到了更多的数据,大部分都是这个数据,我去掉了个别和大部分数据差异特别大的)):
3992
3993
3971
而经过简单改造成的多线程版本的快排的运行时间是( 运行了3次,3组数据):
1883
2005
1883
通过比较可以看出,节省了近一半的时间。如果在运行时查看资源管理器,也可以看到,经典版的程序CPU利用率
只有50%左右,说明只有一个处理器在工作,另一个处理器在睡大觉。运行多线程版(在这里其实是2个线程)时CPU利用率是100%,说明
此时两个处理器都在运行。——结语:多线程充分利用了多处理器的硬件条件,加速了快排的执行。
这是快排多线程版本一,它的局限在于在主线程中第一次用partition(int p,int r)时,必须将原数据分别为大小尽可能相等的两部分,原因上面解释过了。
我还有个快排的多线程的版本二,那是另一种思路,可以避开这个问题。
版本二的思路会比较复杂,但解决问题的思路更有意思一些,见我的另一篇博客《用多线程加速快排(方式二)》