如果要排序的数据太多了,就不能一次性加载到内存中进行排序,只能分而治之,然后再合并。 这里参考归并排序的过程简单地实现了一下,封装的还很不好。 另外在对多个文件合并时偷了懒,感觉需要用平衡二叉树,但是实现起来比较麻烦,就先用C++标准库中的multiset的排序功能了。 stl的set、map、multiset都是有序,并且是红黑树数据结构,查找、删除等性能都是log(n).
mergesort.h
#ifndef __MERGESORT_H
#define __MERGESORT_H
namespace MyNamespace
{
typedef int (*Comparer)(const void *a, const void *b);
template
int defaultCompare(T *a, T *b)
{
if (*a < *b) return -1;
if (*b < *a) return 1;
return 0;
}
template
void arrayCopy(T *a, const T *b, int n)
{
while (--n >= 0) a[n] = b[n];
}
inline int minIndex(int a, int b)
{
return a < b ? a : b;
}
template
int mergeSort(T a[], int n, Comparer comparer)
{
T *b = NULL;
int i, j, t, start1, end1, start2, end2, len;
int cmpResult;
if (n < 2)
return 0;
b = new T[n];
if (b == NULL) return -1;
//len为每次分块的大小,即按照1,2,4,6,8,...,n增长
len = 1;
while (len < n)
{
t = 0;
i = 0;
j = i + len;
while (j != n)
{
//start1,end1,start2,end2用于表示进行比较的两个数据块下标,
//两个分块的下标范围为[start1,end1),[start2,end2)。
start1 = minIndex(n, i);
end1 = minIndex(n, i + len);
start2 = minIndex(n, i + len);
end2 = minIndex(n, i + len + len);
//i,j分别表示当前进行比较的两个元素的下标
i = start1;
j = start2;
while (i < end1 && j < end2)
{
if (comparer != NULL)
cmpResult = comparer(a+i, a+j);
else
cmpResult = defaultCompare(a+i, a+j);
b[t++] = (cmpResult <= 0) ? a[i++] : a[j++];
}
if (i < end1)
{
while (i < end1)
b[t++] = a[i++];
}
else
{
while (j < end2)
b[t++] = a[j++];
}
//assert(i == end1 && j == end2 && t == end2);
i = t;
}
len = minIndex(n, len * 2);
arrayCopy(a, b, n);
}
delete []b;
b = NULL;
return 0;
}
template
int mergeSort(T a[], int n)
{
return mergeSort(a, n, NULL);
}
};
#endif //__MERGESORT_H
filemergesort.h
#ifndef __FILEMERGESORT_H
#define __FILEMERGESORT_H
#include "mergesort.h"
#include
#include
#include
#include
#include
#include
main.c
#include
#include
#include
#include "mergesort.h"
#include "filemergesort.h"
using namespace std;
int main()
{
FileMergeSort fileMergeSort("\n", 1024*1024*10);
int ret = fileMergeSort.sort("Quote.csv");
std::cout << ret << std::endl;
system("pause");
return 0;
}
Quote.csv
1373871611,SH600000,8.56,8.57,8.56,8.55,8.71,8.41,8.49,891040704.00,1038509.00
1373871665,SH600005,2.31,2.32,2.31,2.31,2.35,2.30,2.31,45215176.00,194893.00
1373871626,SH600009,12.81,12.82,12.81,12.86,13.07,12.73,12.80,39855712.00,30992.00
1373871665,SH600010,4.08,4.09,4.08,4.05,4.13,4.01,4.05,197604608.00,485515.00
1373871607,SH600011,5.46,5.47,5.47,5.45,5.56,5.43,5.50,123869376.00,226039.00
1373871626,SH600015,9.37,9.38,9.37,9.28,9.55,9.24,9.21,290867808.00,309105.00
1373871605,SH600016,9.15,9.16,9.16,9.16,9.41,9.00,9.11,1331242880.00,1451737.00
1373871607,SH600019,4.07,4.08,4.07,4.05,4.11,4.03,4.03,73100704.00,180051.00
1373871605,SH600022,1.63,1.64,1.64,1.62,1.65,1.61,1.63,19511916.00,119705.00
1373871600,SH600027,3.14,3.15,3.15,3.12,3.20,3.12,3.13,96782808.00,307247.00
1373871605,SH600028,4.55,4.56,4.56,4.59,4.62,4.52,4.59,283333152.00,619985.00
1373871611,SH600029,2.77,2.78,2.77,2.77,2.81,2.76,2.76,52826140.00,189341.00
1373871660,SH600030,10.96,10.97,10.96,10.80,11.29,10.65,10.54,1668629248.00,1518316.00
1373871603,SH600031,7.30,7.31,7.31,7.28,7.45,7.26,7.28,211298784.00,287873.00
1373871660,SH600036,11.66,11.67,11.65,11.74,11.91,11.55,11.62,789394304.00,674696.00
1373871603,SH600037,7.18,0.00,7.18,6.60,7.18,6.55,6.53,292799488.00,416500.00
......
Quote.csv.sorted
1373871600,SH600027,3.14,3.15,3.15,3.12,3.20,3.12,3.13,96782808.00,307247.00
1373871603,SH600031,7.30,7.31,7.31,7.28,7.45,7.26,7.28,211298784.00,287873.00
1373871603,SH600037,7.18,0.00,7.18,6.60,7.18,6.55,6.53,292799488.00,416500.00
1373871605,SH600016,9.15,9.16,9.16,9.16,9.41,9.00,9.11,1331242880.00,1451737.00
1373871605,SH600022,1.63,1.64,1.64,1.62,1.65,1.61,1.63,19511916.00,119705.00
1373871605,SH600028,4.55,4.56,4.56,4.59,4.62,4.52,4.59,283333152.00,619985.00
1373871607,SH600011,5.46,5.47,5.47,5.45,5.56,5.43,5.50,123869376.00,226039.00
1373871607,SH600019,4.07,4.08,4.07,4.05,4.11,4.03,4.03,73100704.00,180051.00
1373871611,SH600000,8.56,8.57,8.56,8.55,8.71,8.41,8.49,891040704.00,1038509.00
1373871611,SH600029,2.77,2.78,2.77,2.77,2.81,2.76,2.76,52826140.00,189341.00
1373871626,SH600009,12.81,12.82,12.81,12.86,13.07,12.73,12.80,39855712.00,30992.00
1373871626,SH600015,9.37,9.38,9.37,9.28,9.55,9.24,9.21,290867808.00,309105.00
1373871660,SH600030,10.96,10.97,10.96,10.80,11.29,10.65,10.54,1668629248.00,1518316.00
1373871660,SH600036,11.66,11.67,11.65,11.74,11.91,11.55,11.62,789394304.00,674696.00
1373871665,SH600005,2.31,2.32,2.31,2.31,2.35,2.30,2.31,45215176.00,194893.00
1373871665,SH600010,4.08,4.09,4.08,4.05,4.13,4.01,4.05,197604608.00,485515.00