《数据结构与算法分析——C语言描述》 第七章
初始化串很有意思。数字以文本模式存放在文件中,每个数字大小不相同,那么字符长度也不一样,想要提前知道一个文件有多少数字只能遍历一遍文件计数,硬盘读写慢的要命,就算是ssd也才读写500M/s,内存(不包括cache命中)20000M/s。但多相合并根据数字数量按照斐波那契列分配到文件中,所以一定要读一个数存一个数。
verson1
这个有个问题,文件结尾是空格,就算用feof并不能知道是文件结尾,造成迭代多一次添加了不必要的哑串。改成另一种模式,循环的进入口是是否成功读入数字,换成单循环。
void initRun(char *inputFileName) { int max_memory[M]; //初始化顺序串 FILE *ori = fopen(inputFileName, "r"); char name[20]; for (int i = 0; i < K; i++)//打开文件写 file[i] = fopen(fileName(name, i), "w"); int dummyNumCnt = 0; int writeNum = 0;//0表示t1,1表示t2,…… int fibonacci[K];//K阶斐波那契数列 initFibonacci(fibonacci);//初始化 memset(runLen, 0, sizeof(runLen));//所有文件的顺序串数量为0 int end = 0; while (!end) { for (writeNum = 0; writeNum < K; writeNum++) {//一个一个文件来 while (runNum[writeNum] < fibonacci[K - 1 - writeNum]) {//每个文件逐次的满足斐波那契数列 if (end == 0) { int readNum = 0; while (readNum < M && fscanf(ori, "%d", &max_memory[readNum]) != EOF) { readNum++; } if (readNum < M) { end = 1; dummyNumCnt += (M - readNum); for (int i = readNum; i < M; i++) max_memory[i] = 0; } quickSort_my(max_memory, M); //交替写到tb1或写到tb2…… write(max_memory, readNum, file[writeNum]); } else { memset(max_memory, 0, sizeof(max_memory)); write(max_memory, M, file[writeNum]); dummyNumCnt += M; } runNum[writeNum]++; } } updateFibonacci(fibonacci); } fclose(ori); for (int i = 0; i < K; i++) fclose(file[i]); }
verson2
实在想不到怎样用K路合并,想到的是一个文件对另外K-1个文件进行2路合并,宏定义的K为3的时候没有问题。K改成4、其他的就出问题了。代码写得一坨屎。。。这个断断续续写了我六七天了。深感智商之捉急。
想了一下,K大于等于4出问题的原因是,仅靠最长文件序号、写的序号、读的序号是不能判断一次循环哪些文件是处理过的,得用表示。
#include <stdio.h> #include <stdlib.h> #include<string.h> #include<queue> #include"fatal.h" #define M 3 #define K 3 typedef int ElementType; void insertionSort(int *a, int n) { int j, p; int temp; for (p = 1; p < n; p++) { temp = a[p]; for (j = p; j > 0 && temp < a[j - 1]; j--) a[j] = a[j - 1]; a[j] = temp; } } void swap_my(ElementType *a, ElementType *b) { ElementType temp; temp = *a; *a = *b; *b = temp; } ElementType median3(ElementType a[], int left, int right) { int center = (left + right) / 2; if (a[left] > a[center]) swap_my(&a[left], &a[center]); if (a[left] > a[right]) swap_my(&a[left], &a[right]); if (a[center] > a[right]) swap_my(&a[center], &a[right]); swap_my(&a[center], &a[right - 1]); return a[right - 1]; } #define CUTOFF (3) void qsort_my(ElementType a[], int left, int right) { if (left + CUTOFF <= right) { int i, j; ElementType pivot; pivot = median3(a, left, right); i = left; j = right - 1; while (1) { while (a[++i] < pivot) {} while (a[--j] > pivot) {} if (i < j) swap_my(&a[i], &a[j]); else break; } swap_my(&a[i], &a[right - 1]); qsort_my(a, left, i - 1); qsort_my(a, i + 1, right); } else insertionSort(a + left, right - left + 1); } void quickSort_my(ElementType a[], int n) { qsort_my(a, 0, n - 1); } FILE* file[K + 1];//文件指针数组 int runLen[K + 1];//每个文件对应的顺序串长度 int runNum[K + 1];//每个文件的顺序串数量 char name[200];//生成的名字 void write(int *a, int n, FILE *out) { for (int i = 0; i < n; i++) { fprintf(out, "%d ", a[i]); } } char* fileName(char *buf, int i) { strcpy(buf, "T"); char num[5]; strcat(buf, _itoa(i + 1, num, 10)); return buf; } typedef std::pair<int, int> Pair_int; auto cmp = [](const Pair_int& left, const Pair_int& right) { return (left.first) > (right.first); };//lambda表达式,算是一种比较精简的函数吧 int RandInt(int i, int j) { int temp; temp = (int)(i + (1.0*rand() / RAND_MAX)*(j - i)); return temp; } void getRandomInt(int *A, int n) { for (int i = 0; i < n; i++) { A[i] = i + 1; } for (int i = 1; i < n; i++) { //std::swap(A[i], A[RandInt(0, i)]); int randAdrr = RandInt(0, i); int t = A[i]; A[i] = A[randAdrr]; A[randAdrr] = t; } } #define N 100 void writeRandIntToFile() { int a[N]; getRandomInt(a, N); FILE *fp = fopen("ta1", "w"); for (int &i : a) fprintf(fp, "%d ", i); fclose(fp); } void initFibonacci(int *arr) {//给K阶的斐波那契数列的初始化 int i; for (i = 0; i < K - 2; i++) arr[i] = 0; arr[i] = 1;//k-2 arr[i + 1] = 1;//k-1 } void updateFibonacci(int *arr) { int sum = 0; for (int i = 0; i < K - 1; i++) { sum += arr[i]; arr[i] = arr[i + 1]; } arr[K - 1] += sum; } void handleRun(int &readNum, int &writeNum, int *fibonacci, int *max_memory) { readNum = 0; quickSort_my(max_memory, M); if (runNum[writeNum] < fibonacci[K - 1 - writeNum]) { write(max_memory, M, file[writeNum]); runNum[writeNum]++; if (runNum[writeNum] == fibonacci[K - 1 - writeNum]) {//判断是否写满了当前迭代的斐波那契数列 writeNum++; if (writeNum == K) { updateFibonacci(fibonacci); writeNum = 0; } } } else {//当前的允许顺序串数量为0,进行下一次迭代 writeNum = 0; updateFibonacci(fibonacci); handleRun(readNum, writeNum, fibonacci, max_memory); } } void initRun(char *inputFileName) { int max_memory[M];//模拟的最大内存 //初始化顺序串 FILE *ori = fopen(inputFileName, "r"); char name[20]; for (int i = 0; i < K; i++)//打开文件写 file[i] = fopen(fileName(name, i), "w"); int dummyNumCnt = 0;//记录的哑元数量 int writeNum = 0;//0表示t1,1表示t2,…… int fibonacci[K];//K阶斐波那契数列 initFibonacci(fibonacci);//初始化 for (int i = 0; i < K; i++)//每个文件的顺序串长度初始为M runLen[i] = M; int readNum = 0;//最大内存的下标 while (fscanf(ori, "%d", &max_memory[readNum]) != EOF) { readNum++; if (readNum < M)//还没读满最大的内存 continue; handleRun(readNum, writeNum, fibonacci, max_memory); } if (readNum != 0) {//一个顺序串未读满,补上0 dummyNumCnt += (M - readNum); while (readNum < M) max_memory[readNum++] = 0; handleRun(readNum, writeNum, fibonacci, max_memory); } memset(max_memory, 0, M*sizeof(int));//初始化哑串 while (writeNum < K) { if (runNum[writeNum] < fibonacci[K - 1 - writeNum]) {//是否写满了当前迭代的斐波那契数列 write(max_memory, M, file[writeNum]); runNum[writeNum]++; } else { writeNum++; } } fclose(ori); for (int i = 0; i < K; i++) fclose(file[i]); } int isFinish() { int cnt = 0; for (int i = 0; i < K + 1; i++) { if (runNum[i] >= 1) cnt++; if (cnt >= 2) { return 0; } } return 1; } void mergeRun(int longest, int read, int write) { int i, j; int a, b; int hasNum1 = 0, hasNum2 = 0; for (i = 0, j = 0; i < runLen[longest] && j < runLen[read];) { if (hasNum1 == 0) { fscanf(file[longest], "%d", &a); hasNum1 = 1; } if (hasNum2 == 0) { fscanf(file[read], "%d", &b); hasNum2 = 1; } if (a < b) { /*if (a < 0) Error("error1");*/ fprintf(file[write], "%d ", a); hasNum1 = 0; i++; } else { /*if (b < 0) Error("error2");*/ fprintf(file[write], "%d ", b); hasNum2 = 0; j++; } } while (i < runLen[longest]) { if (hasNum1 == 0) fscanf(file[longest], "%d", &a); fprintf(file[write], "%d ", a); hasNum1 = 0; i++; } while (j < runLen[read]) { if (hasNum2 == 0) fscanf(file[read], "%d", &b); fprintf(file[write], "%d ", b); hasNum2 = 0; j++; } } int nextReadNum(int longestNum, int writeNum, int now) { for (int i = now + 1; i < K + 1; i++) if (runNum[i]>0 && i != longestNum && i != writeNum) return i; return -1; } int firstReadNum(int longestNum, int writeNum) { int max = 0; int pos = -1; for (int i = 0; i < K + 1; i++) if (runNum[i]>max && i != longestNum && i != writeNum) { pos = i; max = runNum[i]; } return pos; } int main() { writeRandIntToFile(); char inputFileName[20] = "ta1"; //scanf("%s", inputFileName); initRun(inputFileName); int oldLongestNum=-1; int longestNum = 0; int nextLongestNum; int writeNum = K; int readNum = 1; int testCnt = 0; int oldLongestOpenTag = 0; while (!isFinish()) { int cnt = 0; file[longestNum] = fopen(fileName(name, longestNum), "r"); nextLongestNum = writeNum; while (cnt < K - 1 && !isFinish()) { cnt++; file[writeNum] = fopen(fileName(name, writeNum), "w"); if (readNum != oldLongestNum || (readNum == oldLongestNum && oldLongestOpenTag == 0)) file[readNum] = fopen(fileName(name, readNum), "r"); //runNum[writeNum] = 0; while (runNum[readNum] > 0) { mergeRun(longestNum, readNum, writeNum); runNum[longestNum]--; runNum[readNum]--; runNum[writeNum]++; testCnt++; } runLen[writeNum] = runLen[readNum] + runLen[longestNum]; fclose(file[writeNum]); int oldwriteNum = writeNum; writeNum = readNum; fclose(file[readNum]); readNum = nextReadNum(longestNum, oldwriteNum, readNum);//??? if(readNum==-1) readNum= nextReadNum(longestNum, oldwriteNum, -1); } if (runNum[longestNum] == 0) { fclose(file[longestNum]); oldLongestOpenTag = 0; } else oldLongestOpenTag = 1; oldLongestNum = longestNum; longestNum = nextLongestNum; readNum = firstReadNum(longestNum, writeNum);//findBiggesetNum } }
verson3
算是终结这个了,用了两个堆,一个队列来表示未处理的文件,已处理的文件,空的文件。逻辑很清晰,代码很优美。
#include <stdio.h> #include <stdlib.h> #include<string.h> #include<queue> #include"fatal.h" #define M 3//最大的内存 #define K 8//K路排序 #define N 222//要排序的数字量,1—N typedef int ElementType; void insertionSort(int *a, int n) { int j, p; int temp; for (p = 1; p < n; p++) { temp = a[p]; for (j = p; j > 0 && temp < a[j - 1]; j--) a[j] = a[j - 1]; a[j] = temp; } } void swap_my(ElementType *a, ElementType *b) { ElementType temp; temp = *a; *a = *b; *b = temp; } ElementType median3(ElementType a[], int left, int right) { int center = (left + right) / 2; if (a[left] > a[center]) swap_my(&a[left], &a[center]); if (a[left] > a[right]) swap_my(&a[left], &a[right]); if (a[center] > a[right]) swap_my(&a[center], &a[right]); swap_my(&a[center], &a[right - 1]); return a[right - 1]; } #define CUTOFF (3) void qsort_my(ElementType a[], int left, int right) { if (left + CUTOFF <= right) { int i, j; ElementType pivot; pivot = median3(a, left, right); i = left; j = right - 1; while (1) { while (a[++i] < pivot) {} while (a[--j] > pivot) {} if (i < j) swap_my(&a[i], &a[j]); else break; } swap_my(&a[i], &a[right - 1]); qsort_my(a, left, i - 1); qsort_my(a, i + 1, right); } else insertionSort(a + left, right - left + 1); } void quickSort_my(ElementType a[], int n) { qsort_my(a, 0, n - 1); } FILE* file[K + 1];//文件指针数组 int runLen[K + 1];//每个文件对应的顺序串长度 int runNum[K + 1];//每个文件的顺序串数量 char name[200];//生成的名字 typedef std::pair<int, int> Pair_int;//first是序号,second是runNum auto cmp = [](const Pair_int& left, const Pair_int& right) { return (left.second) < (right.second); };//lambda表达式,算是一种比较精简的函数吧,比较的位置是pair的第二个 std::queue<int> nullFile;//空文件列表 std::priority_queue<Pair_int, std::vector<Pair_int>, decltype(cmp)>fileHeap1(cmp);//这里不知道怎么用数组 std::priority_queue<Pair_int, std::vector<Pair_int>, decltype(cmp)>fileHeap2(cmp); void write(int *a, int n, FILE *out) { for (int i = 0; i < n; i++) { fprintf(out, "%d ", a[i]); } } char* fileName(char *buf, int i) { strcpy(buf, "T"); char num[5]; strcat(buf, _itoa(i + 1, num, 10)); return buf; } int RandInt(int i, int j) { int temp; temp = (int)(i + (1.0*rand() / RAND_MAX)*(j - i)); return temp; } void getRandomInt(int *A, int n) { for (int i = 0; i < n; i++) { A[i] = i + 1; } for (int i = 1; i < n; i++) { //std::swap(A[i], A[RandInt(0, i)]); int randAdrr = RandInt(0, i); int t = A[i]; A[i] = A[randAdrr]; A[randAdrr] = t; } } void writeRandIntToFile() { int a[N]; getRandomInt(a, N); FILE *fp = fopen("ta1", "w"); for (int &i : a) fprintf(fp, "%d ", i); fclose(fp); } void initFibonacci(int *arr) {//给K阶的斐波那契数列的初始化 int i; for (i = 0; i < K - 2; i++) arr[i] = 0; arr[i] = 1;//k-2 arr[i + 1] = 1;//k-1 } void updateFibonacci(int *arr) { int sum = 0; for (int i = 0; i < K - 1; i++) { sum += arr[i]; arr[i] = arr[i + 1]; } arr[K - 1] += sum; } void handleRun(int &readNum, int &writeNum, int *fibonacci, int *max_memory) { readNum = 0; quickSort_my(max_memory, M); if (runNum[writeNum] < fibonacci[K - 1 - writeNum]) { write(max_memory, M, file[writeNum]); runNum[writeNum]++; if (runNum[writeNum] == fibonacci[K - 1 - writeNum]) {//判断是否写满了当前迭代的斐波那契数列 writeNum++; if (writeNum == K) { updateFibonacci(fibonacci); writeNum = 0; } } } else {//当前的允许顺序串数量为0,进行下一次迭代 writeNum = 0; updateFibonacci(fibonacci); handleRun(readNum, writeNum, fibonacci, max_memory); } } void initRun(char *inputFileName) { int max_memory[M];//模拟的最大内存 //初始化顺序串 FILE *ori = fopen(inputFileName, "r"); char name[20]; for (int i = 0; i < K; i++)//打开文件写 file[i] = fopen(fileName(name, i), "w"); int dummyNumCnt = 0;//记录的哑元数量 int writeNum = 0;//0表示t1,1表示t2,…… int fibonacci[K];//K阶斐波那契数列 initFibonacci(fibonacci);//初始化 for (int i = 0; i < K; i++)//每个文件的顺序串长度初始为M runLen[i] = M; int readNum = 0;//最大内存的下标 while (fscanf(ori, "%d", &max_memory[readNum]) != EOF) { readNum++; if (readNum < M)//还没读满最大的内存 continue; handleRun(readNum, writeNum, fibonacci, max_memory); } if (readNum != 0) {//一个顺序串未读满,补上0 dummyNumCnt += (M - readNum); while (readNum < M) max_memory[readNum++] = 0; handleRun(readNum, writeNum, fibonacci, max_memory); } memset(max_memory, 0, M*sizeof(int));//初始化哑串 while (writeNum < K) { if (runNum[writeNum] < fibonacci[K - 1 - writeNum]) {//是否写满了当前迭代的斐波那契数列 write(max_memory, M, file[writeNum]); runNum[writeNum]++; } else { writeNum++; } } fclose(ori); for (int i = 0; i < K; i++) { fclose(file[i]); if (runNum[i] > 0) { fileHeap1.push(std::make_pair(i,runNum[i])); } else { nullFile.push(i);//把空的放到空文件队列中 } } nullFile.push(K);//第K+1个磁带是空的 } void mergeRun(int longest, int read, int write) { int i, j; int a, b; int hasNum1 = 0, hasNum2 = 0; for (i = 0, j = 0; i < runLen[longest] && j < runLen[read];) { if (hasNum1 == 0) { fscanf(file[longest], "%d", &a); hasNum1 = 1; } if (hasNum2 == 0) { fscanf(file[read], "%d", &b); hasNum2 = 1; } if (a < b) { /*if (a < 0) Error("error1");*/ fprintf(file[write], "%d ", a); hasNum1 = 0; i++; } else { /*if (b < 0) Error("error2");*/ fprintf(file[write], "%d ", b); hasNum2 = 0; j++; } } while (i < runLen[longest]) { if (hasNum1 == 0) fscanf(file[longest], "%d", &a); fprintf(file[write], "%d ", a); hasNum1 = 0; i++; } while (j < runLen[read]) { if (hasNum2 == 0) fscanf(file[read], "%d", &b); fprintf(file[write], "%d ", b); hasNum2 = 0; j++; } } int main() { writeRandIntToFile(); char inputFileName[20] = "ta1";//要排序的文件 //scanf("%s", inputFileName); initRun(inputFileName);//初始化顺序串 auto * notHandle = &fileHeap1;//没有处理的 auto * hasHandle = &fileHeap2;//已经处理的 int oldLongestNum; int longestNum; int writeNum; int readNum; int oldLongestOpenTag = 0; while ((*notHandle).size()>1) {//直到合并为1个文件 //打开最长的文件 longestNum = (*notHandle).top().first; (*notHandle).pop(); file[longestNum] = fopen(fileName(name, longestNum), "r"); while (!(*notHandle).empty()) {//合并剩下的文件 writeNum = nullFile.front();//弹出一个空文件 nullFile.pop(); file[writeNum] = fopen(fileName(name, writeNum), "w"); readNum = (*notHandle).top().first;//弹出一个文件 (*notHandle).pop(); if (readNum != oldLongestNum || (readNum == oldLongestNum && oldLongestOpenTag == 0))//上次的迭代最长的文件可能没读完,不需要重新打开 file[readNum] = fopen(fileName(name, readNum), "r"); while (runNum[readNum] > 0) {//把文件合并完 mergeRun(longestNum, readNum, writeNum); runNum[longestNum]--; runNum[readNum]--; runNum[writeNum]++; } runLen[writeNum] = runLen[readNum] + runLen[longestNum];//合并后顺序串为原来的两个顺序串长度加起来 (*hasHandle).push({ writeNum,runNum[writeNum] });//合并的,放到已处理中,下一次的循环的时候再处理 fclose(file[writeNum]); nullFile.push(readNum);//读完就是空文件了,放到空文件队列中 fclose(file[readNum]); } if (runNum[longestNum] == 0) {//看看最长的文件读完没 nullFile.push(longestNum);//读完了 fclose(file[longestNum]); oldLongestOpenTag = 0; } else { oldLongestOpenTag = 1;//没读完 (*hasHandle).push({ longestNum,runNum[longestNum] });//放到已经处理的文件堆中 } oldLongestNum = longestNum;//记录这次的最长的文件 std::swap(notHandle, hasHandle);//交换指针,这次已处理完的文件成为下一次未处理的文件 } }