外部排序 多路合并

《数据结构与算法分析——C语言描述》  第七章


这个还是挺有意思的,用很少的内存空间给很多的数排序,复杂度是logk(N/M)



#include <stdio.h>
#include <stdlib.h>
#include<string.h>
#include<queue>
//#include"fatal.h"
#define M 3
#define K 3


typedef int ElementType;

void insertionSort(int *a, int n) {
	int j, p;
	int temp;

	for (p = 1; p < n; p++) {
		temp = a[p];
		for (j = p; j > 0 && temp < a[j - 1]; j--)
			a[j] = a[j - 1];
		a[j] = temp;
	}
}

void swap_my(ElementType *a, ElementType *b) {
	ElementType temp;
	temp = *a;
	*a = *b;
	*b = temp;
}

ElementType median3(ElementType a[], int left, int right) {
	int center = (left + right) / 2;
	if (a[left] > a[center])
		swap_my(&a[left], &a[center]);
	if (a[left] > a[right])
		swap_my(&a[left], &a[right]);
	if (a[center] > a[right])
		swap_my(&a[center], &a[right]);
	swap_my(&a[center], &a[right - 1]);
	return a[right - 1];
}




#define CUTOFF (3)  

void qsort_my(ElementType a[], int left, int right) {
	if (left + CUTOFF <= right) {
		int i, j;
		ElementType pivot;
		pivot = median3(a, left, right);

		i = left;
		j = right - 1;
		while (1) {
			while (a[++i] < pivot) {}

			while (a[--j] > pivot) {}

			if (i < j)
				swap_my(&a[i], &a[j]);
			else
				break;
		}
		swap_my(&a[i], &a[right - 1]);
		qsort_my(a, left, i - 1);
		qsort_my(a, i + 1, right);
	}
	else
		insertionSort(a + left, right - left + 1);
}

void quickSort_my(ElementType a[], int n) {
	qsort_my(a, 0, n - 1);
}

FILE* file[2 * K];//文件指针数组
char name[200];//生成的名字
int runlen;//顺序串长度

void write(int *a, int n, FILE *out) {
	for (int i = 0; i < n; i++) {
		fprintf(out, "%d ", a[i]);
	}
}


char* fileName(char *buf, int part, int i) {
	if (part == 0)
		strcpy(buf, "ta");
	else
		strcpy(buf, "tb");
	char num[5];
	strcat(buf, itoa(i + 1, num, 10));
	return buf;
}

void open(int part, char* type) {//打开文件
	if (part == 0) {
		for (int i = 0; i < K; i++) {
			file[i] = fopen(fileName(name, part, i), type);
		}
	}
	else {
		for (int i = 0; i < K; i++) {
			file[K + i] = fopen(fileName(name, part, i), type);
		}
	}

}



void close(int part) {//关闭文件
	if (part == 0)
		for (int i = 0; i < K; i++)
			fclose(file[i]);
	else
		for (int i = 0; i < K; i++)
			fclose(file[K + i]);
}

int isAllEnd(int readPart) {//读取的文件是否全部都读完了
	for (int i = 0; i < K; i++)
		if (!feof(file[readPart*K + i]))
			return 0;
	return 1;
}

int isAllBiggerThanRunLen(int * cursor) {
	for (int i = 0; i < K; i++) {
		if (cursor[i] < runlen)
			return 0;
	}
	return 1;
}


typedef std::pair<int, int> Pair_int;
auto cmp = [](const Pair_int& left, const Pair_int& right) { return (left.first) > (right.first); };//lambda表达式,算是一种比较精简的函数吧


int RandInt(int i, int j) {
	int temp;
	temp = (int)(i + (1.0*rand() / RAND_MAX)*(j - i));
	return temp;
}

void getRandomInt(int *A, int n) {
	for (int i = 0; i < n; i++) {
		A[i] = i + 1;
	}

	for (int i = 1; i < n; i++) {
		//std::swap(A[i], A[RandInt(0, i)]);      
		int randAdrr = RandInt(0, i);
		int t = A[i];
		A[i] = A[randAdrr];
		A[randAdrr] = t;
	}
}

#define N 222


void writeRandIntToFile() {
	int a[N];
	getRandomInt(a, N);
	FILE *fp = fopen("ta1", "w");
	for (int &i : a)
		fprintf(fp, "%d ", i);
	fclose(fp);
}

int main() {

	
	writeRandIntToFile();

	int max_memory[M];

	//初始化顺序串

	FILE *ta1 = fopen("ta1", "r");
	open(1, "w");
	int n = 0;
	int writeNum = 0;//0表示tb1,1表示tb2,……
	while (!feof(ta1)) {
		int readNum = 0;
		while (readNum < M && fscanf(ta1, "%d", &max_memory[readNum]) != EOF) {
			n++;
			readNum++;
		}
		quickSort_my(max_memory, readNum);
		//交替写到tb1或写到tb2……
		write(max_memory, readNum, file[K + writeNum]);
		writeNum = (writeNum + 1) % K;
	}
	fclose(ta1);
	close(1);

	//K路排序
	runlen = M;
	int readpart = 1;//0表示ta1,ta2,1表示tb1,tb2
	while (runlen < n) {
		if (readpart == 1) {
			open(0, "w");
			open(1, "r");
		}
		else {
			open(0, "r");
			open(1, "w");
		}
		writeNum = 0;//写到哪,一半中的 第一个磁盘还是第二个磁盘
		while (!isAllEnd(readpart)) {
			int cursor[K];
			memset(cursor, 0, sizeof(cursor));


			int num;
			int isHasNumNoWrite[K];//0表示目前没有缓存数字
			memset(isHasNumNoWrite, 0, sizeof(isHasNumNoWrite));
			int writePart = readpart == 0 ? 1 : 0;
			std::priority_queue<Pair_int, std::vector<Pair_int>, decltype(cmp)> h(cmp);
			while (!isAllBiggerThanRunLen(cursor)) {//分别取出K个顺序串合并
				int isInsertNew = 0;
				for (int i = 0; i < K; i++) {
					if (isHasNumNoWrite[i] == 0 && !feof(file[readpart*K + i]) && cursor[i] < runlen) {
						if (fscanf(file[readpart*K + i], "%d", &num) == 1) {
							isHasNumNoWrite[i] = 1;
							h.push({ num,i });
							isInsertNew = 1;
						}

					}

				}
				if (isInsertNew == 0 && h.empty())//这里可是细节,这个最后只剩下空格没有数字的话也不算文件结尾,feof返回0
					break;
				Pair_int p = h.top();
				h.pop();
				fprintf(file[writePart*K + writeNum], "%d ", p.first);
				cursor[p.second]++;
				isHasNumNoWrite[p.second] = 0;
			}
			writeNum = (writeNum + 1) % K;
		}
		close(0);
		close(1);
		runlen *= K;
		readpart = (readpart == 0) ? 1 : 0;
	}
}


你可能感兴趣的:(外部排序 多路合并)