磁盘文件排序 编程珠玑

  开始看编程珠玑了,第一个就是进行磁盘排序的问题,想到了也只是归并排序,但题目要求1M内存,这个算法不可行。编程珠玑写到使用位图(分两次操作读写可以成功实现,小于内存1M),详情看编程珠玑第一章。

题目:给定10^7数据,对大数据进行排序。要求内存只有1M,时间可以接受,较短。

解决方法:1.多路归并。2.位图操作。

在这里看了july的文章,写的真心不错。推荐:http://blog.csdn.net/v_JULY_v/article/details/6451990

其中有个生成不同随机数的程序附下:

 1 #include <iostream>  

 2 #include <time.h>  

 3 #include <assert.h>  

 4 using namespace std;  

 5   

 6 const int size = 10000000;  

 7 int num[size];  

 8   

 9 int main()  

10 {  

11     int n;  

12     FILE *fp = fopen("data.txt", "w");  

13     assert(fp);  

14   

15     for (n = 1; n <= size; n++)    

16         //之前此处写成了n=0;n<size。导致下面有一段小程序的测试数据出现了0,特此订正。  

17         num[n] = n;  

18     srand((unsigned)time(NULL));  

19     int i, j;  

20   

21     for (n = 0; n < size; n++)  

22     {  

23         i = (rand() * RAND_MAX + rand()) % 10000000;  

24         j = (rand() * RAND_MAX + rand()) % 10000000;  

25         swap(num[i], num[j]);  

26     }  

27   

28     for (n = 0; n < size; n++)  

29         fprintf(fp, "%d ", num[n]);  

30     fclose(fp);  

31     return 0;  

32 }  

位图程序实现排序,首先排前5000000(每个数字对应一个bit)个后排剩下的5000000数据(每次都小于1M),代码如下(模拟两次):

 1 #include <iostream>  

 2 #include <bitset>  

 3 #include <assert.h>  

 4 #include <time.h>  

 5 using namespace std;  

 6   

 7 const int max_each_scan = 5000000;  

 8   

 9 int main()  

10 {  

11     clock_t begin = clock();  

12     bitset<max_each_scan> bit_map;  

13     bit_map.reset();  

14       

15     // open the file with the unsorted data  

16     FILE *fp_unsort_file = fopen("data.txt", "r");  

17     assert(fp_unsort_file);  

18     int num;  

19   

20     // the first time scan to sort the data between 0 - 4999999  

21     while (fscanf(fp_unsort_file, "%d ", &num) != EOF)  

22     {  

23         if (num < max_each_scan)  

24             bit_map.set(num, 1);  

25     }  

26       

27     FILE *fp_sort_file = fopen("sort.txt", "w");  

28     assert(fp_sort_file);  

29     int i;  

30       

31     // write the sorted data into file  

32     for (i = 0; i < max_each_scan; i++)  

33     {  

34         if (bit_map[i] == 1)  

35             fprintf(fp_sort_file, "%d ", i);  

36     }  

37       

38     // the second time scan to sort the data between 5000000 - 9999999  

39     int result = fseek(fp_unsort_file, 0, SEEK_SET);  

40     if (result)  

41         cout << "fseek failed!" << endl;  

42     else  

43     {  

44         bit_map.reset();  

45         while (fscanf(fp_unsort_file, "%d ", &num) != EOF)  

46         {  

47             if (num >= max_each_scan && num < 10000000)  

48             {  

49                 num -= max_each_scan;  

50                 bit_map.set(num, 1);  

51             }  

52         }  

53         for (i = 0; i < max_each_scan; i++)  

54         {  

55             if (bit_map[i] == 1)  

56                 fprintf(fp_sort_file, "%d ", i + max_each_scan);  

57         }  

58     }  

59       

60     clock_t end = clock();  

61     cout<<"用位图的方法,耗时:"<<endl;  

62     cout << (end - begin) / CLK_TCK << "s" << endl;  

63     fclose(fp_sort_file);  

64     fclose(fp_unsort_file);  

65     return 0;  

66 }  

你可能感兴趣的:(编程珠玑)