C++实现外部排序(置换-选择排序+败者树+最佳归并树),其中最佳归并树使用STL priority_queue实现

1.具体概念细节可以参考博文(外部排序,置换-选择排序,败者树,最佳归并树)

http://data.biancheng.net/view/76.html

2.C++实现

头文件

#pragma once

#include
#include
#include
#include
#include
#include
#include
using namespace std;


#pragma region 类型定义
typedef struct {//每块工作区存放变量
	int mergeSegNum;//归并段号
	int value;//具体值
	int key;//辅助变量,用来调整败者树
} *WorkSpace, WorkArea;
typedef struct SegTag {//用于建立最佳归并树
	int SegNum;//存放归并段号
	int length;//存放该段的长度		  
	bool operator <(const SegTag &a) const{ //重载<操作符,当segTag实例写入priority_queue中时按length字段由小到大排
		return this->length > a.length;
	}
}segTag;
typedef struct {
	ifstream ismSource;//文件流
	int segNum;//对应文件编号
}*StreamInfo, streamInfomation;
#pragma endregion


class ExternalSortEntity
{
public:
	ExternalSortEntity();
	~ExternalSortEntity();
	void GenNum();//往指定地址文件中写入随机数
	void GetMergeSegments();//获取归并段
	void AccordingWayNumGenLoserTree(StreamInfo sInfo, const int &treeLength);//多路归并也借用败者树
	void AddVirSegment();//判断是否要添加虚段
	void AccordingWayNumAdjustLoserTree(int elemIndex, const int &treeLength);
	void Merge();//将归并段归并
private:
	void GetFileName(int mergeSegNum);//根据归并段生成文件名
	void GenLoserTree();//生成败者树,使用置换-选择排序获取初始归并段
	void AdjustLoserTree(int elemIndex);//调整败者树

private:
#pragma region 常量
	const int WORKSPACECAPCITY = 4;//模拟内存工作区大小
	const int RANDOMNUMRANGE = 213;//随机数范围
	const int RECORDNUM = 24; //记录数目
	const int MAXTEXTTOTAL = RECORDNUM % WORKSPACECAPCITY == 0 ? (RECORDNUM / WORKSPACECAPCITY) : (RECORDNUM / WORKSPACECAPCITY + 1);//存放归并段的文件个数最大值
	const int WAYNUM = 3;//归并路数
	const string SOURCEPATH = "D:\\TestData\\source.txt";//初始文件地址
	const string TEXTHEAD = "D:\\TestData\\target";//用于动态生成目标文件名
	const string TEXTTAIL = ".txt";
#pragma endregion
	ofstream osmSource;//初始文件(存放随机数)对应输出流
	ifstream ismSource;//初始文件输入流
	ofstream *osmTarget;//用于往目标文件(存放归并段的文件)写数据
	ostringstream oss;//用于构造文件名
	int *loserTree;//败者树
	WorkSpace workSpace;//模拟内存块
	streampos pos;//记录文件指针位置
	string fileName;//文件名
	priority_queue pQueue;//优先队列从小到大排
	StreamInfo sInfo;//存放文件流和对应文件编号
};

源文件

#include "ExternalSortEntity.h"

ExternalSortEntity::ExternalSortEntity()
{
	this->osmSource.open(SOURCEPATH, ios::out);//打开流
	this->loserTree = new int[WORKSPACECAPCITY];
	this->workSpace = new WorkArea[WORKSPACECAPCITY];//树和模拟内存区初始化
	this->osmTarget = new ofstream[MAXTEXTTOTAL+1];
}

ExternalSortEntity::~ExternalSortEntity()
{
}

void ExternalSortEntity::GenNum() {
	srand((unsigned)time(NULL));
	//文件结尾不留空格
	for (int i = 0; i < RECORDNUM; i++) {
		if (i < (RECORDNUM - 1)) {
			this->osmSource << rand()*rand() % RANDOMNUMRANGE << " ";
		}
		else
		{
			this->osmSource << rand()*rand() % RANDOMNUMRANGE;
		}
	}
	this->osmSource.close();//随机数写入完毕后关闭流
}


void ExternalSortEntity:: GetFileName(int mergeSegNum) {
	this->oss << TEXTHEAD << mergeSegNum << TEXTTAIL;
	this->fileName = this->oss.str();
	this->oss.clear();
	this->oss.str("");//流内容清空
}

void ExternalSortEntity::GenLoserTree() {
	int num;//缓存从流中读的数据
			//工作区和败者树赋初值
	for (int i = 0; i < WORKSPACECAPCITY; i++) {
		this->loserTree[i] = this->workSpace[i].value = this->workSpace[i].mergeSegNum = this->workSpace[i].key = 0;
	}
	this->ismSource.open(SOURCEPATH, ios::in);//打开输入流
	
	int index = WORKSPACECAPCITY - 1;//文件往内存区时读入数据进模拟内存块的下标索引
	while (index >= 0 && this->ismSource >> num) {//从文件往内存中读入WORKSPACECAPCITY个数
		this->pos = this->ismSource.tellg();//记录文件指针指向的下一个数的地址,不使用文件指针位置的话,最后一次执行循环条件文件会时文件指针多后移一位
		this->workSpace[index].value = num;//对应内存块写入数据
		this->workSpace[index].mergeSegNum = 1;//设置初始归并段号为1
		this->AdjustLoserTree(index);//写入一个数的索引进败者树后,对败者树进行调整
		index--;//下一个数的内存块索引
	}
}

void ExternalSortEntity::AdjustLoserTree(int elemIndex) {//elemIndex为需要调整数的在内存块中的索引
	// loserTree[parentNode]为workSpace[elemIndex]的双亲节点,temp暂存败者索引
	int parentNode, temp;
	for (parentNode = (elemIndex + WORKSPACECAPCITY) / 2, temp = this->loserTree[parentNode]; parentNode > 0; parentNode /= 2, temp = this->loserTree[parentNode]) {
		// 段号小者 或者 段号相等且关键字更小的为胜者
		if (this->workSpace[temp].mergeSegNum < this->workSpace[elemIndex].mergeSegNum || (this->workSpace[temp].mergeSegNum == this->workSpace[elemIndex].mergeSegNum && this->workSpace[temp].value < this->workSpace[elemIndex].value)) {
			int s;
			s = elemIndex;
			elemIndex = this->loserTree[parentNode]; //elemIndex指示新的胜利者
			this->loserTree[parentNode] = s;
		}
	}
	this->loserTree[0] = elemIndex; // 最后的冠军
}

//败者树建立完毕后开始执行获取归并段
void ExternalSortEntity::GetMergeSegments() {
	this->GenLoserTree();
	int num;缓存从流中读的数据
	int *length = new int[MAXTEXTTOTAL];//记录对应归并段长度
	for (int i = 0; i < MAXTEXTTOTAL; i++) {
		length[i] = 0;
	}
	WorkArea min = this->workSpace[this->loserTree[0]];//存放败者树的冠军workArea[loserTree[0]]
	this->ismSource.seekg(this->pos);//将文件指针指向到第WORKCAPCOCITY+1数
	//开始生成归并段
	while (this->ismSource >> num) {
		if (!this->osmTarget[min.mergeSegNum].is_open()) {
			this->GetFileName(min.mergeSegNum);
			this->osmTarget[min.mergeSegNum].open(this->fileName, ios::out);
		}
		int temp;
		temp = min.mergeSegNum;
		this->osmTarget[min.mergeSegNum] << min.value;//往对应文件写值
		length[min.mergeSegNum]++;
		if (num < min.value) {//如果新读入内存的数小于败者树冠军,将该数放入下一个归并段
			this->workSpace[this->loserTree[0]].mergeSegNum++;
		}
		this->workSpace[this->loserTree[0]].value = num;//将新读入的数替换已经写入文件的数(败者树冠军)
		this->AdjustLoserTree(this->loserTree[0]);//重新调整败者树,调整位置为新读入内存树的索引
		min = this->workSpace[this->loserTree[0]];//调整完毕后,min重新赋值
		if (min.mergeSegNum == temp) {//相等表示并未切换到下一个归并段,数之间用空格隔开
			this->osmTarget[min.mergeSegNum] << " ";
		}
	}
	//文件中数写入完毕,下面将内存中数写入对应文件(归并段);
	for (int i = 0; i < WORKSPACECAPCITY; i++) {
		if (!this->osmTarget[min.mergeSegNum].is_open()) {
			this->GetFileName(min.mergeSegNum);
			this->osmTarget[min.mergeSegNum].open(this->fileName, ios::out);
		}
		int temp;
		temp = min.mergeSegNum;
		this->osmTarget[min.mergeSegNum] << min.value;
		length[min.mergeSegNum]++;
		this->workSpace[this->loserTree[0]].mergeSegNum = MAXTEXTTOTAL + 1;//内存中数写入文件完毕,将当前数的段号调整为最大段号+1,就是为了在之后几次调整败者树时,它始终未败者;
		this->AdjustLoserTree(this->loserTree[0]);
		min = this->workSpace[this->loserTree[0]];
		if (min.mergeSegNum == temp) {
			this->osmTarget[min.mergeSegNum] << " ";
		}
	}
	//关闭流,并将段号,长度存入优先队列,方便使用最佳归并算法完成归并
	for (int i = 0; i < MAXTEXTTOTAL; i++) {
		if (this->osmTarget[i].is_open()) {
			segTag wTag;
			wTag.SegNum = i;
			wTag.length = length[i];
			this->pQueue.push(wTag);
			this->osmTarget[i].close();
		}
	}
	//释放资源
	delete[] workSpace;
	workSpace = nullptr;
	delete[] loserTree;
	loserTree = nullptr;
	delete[] osmTarget;
	osmTarget = nullptr;
	delete[] length;
}

//判断需不要创建虚段用于建立最佳归并树
void ExternalSortEntity::AddVirSegment() {
	bool isAddVirSegment = (this->pQueue.size() - 1) % (WAYNUM - 1) == 0 ? false : true;
	if (isAddVirSegment) {
		int addNum = WAYNUM - (this->pQueue.size() - 1) % (WAYNUM - 1) - 1;
		for (int i = 0; i < addNum; i++) {
			segTag sTag = { 0,0 };
			this->pQueue.push(sTag);
		}
	}
}

void ExternalSortEntity::AccordingWayNumGenLoserTree(StreamInfo sInfo, const int &treeLength) {
	int num;
	//败者树和工作取初始化
	for (int i = 0; i < treeLength; i++) {
		this->loserTree[i] = this->workSpace[i].value = this->workSpace[i].mergeSegNum = this->workSpace[i].key = 0;
	}
	int index = treeLength - 1;
	for (int i = 0; i < WAYNUM; i++) {
		if (this->sInfo[i].ismSource.is_open()) {
			this->sInfo[i].ismSource >> num;
			this->workSpace[index].value = num;
			this->workSpace[index].mergeSegNum = this->sInfo[i].segNum;
			this->workSpace[index].key = 1;
			this->AccordingWayNumAdjustLoserTree(index, treeLength);
			index--;
		}
	}
}

void ExternalSortEntity::AccordingWayNumAdjustLoserTree(int elemIndex, const int &treeLength) {
	int parentNode, temp;
	// ls[parentNode]为workSpace[elemIndex]的双亲节点,temp暂存败者索引
	for (parentNode = (elemIndex + treeLength) / 2, temp = loserTree[parentNode]; parentNode > 0; parentNode /= 2, temp = loserTree[parentNode]) {
		//段号大者 或者 段号相等且关键字更小的为胜者
		if (this->workSpace[temp].key < this->workSpace[elemIndex].key || (this->workSpace[temp].key == this->workSpace[elemIndex].key && this->workSpace[temp].value < this->workSpace[elemIndex].value)) {
			int s;
			s = elemIndex;
			elemIndex = this->loserTree[parentNode]; //elemIndex指示新的胜利者
			this->loserTree[parentNode] = s;
		}
	}
	this->loserTree[0] = elemIndex; // 最后的冠军
}
void ExternalSortEntity::Merge() {
	//添加虚拟段
	AddVirSegment();
	ofstream osmMerge;
	this->sInfo = new streamInfomation[WAYNUM];
	int afterMergeSegMentNum = MAXTEXTTOTAL + 1;//归并后产生的文件编号
	while (this->pQueue.size() > 1) {
		//打开wayNum个输入流
		int totalLength = 0;//进行归并的数的总个数
		int count = 0;//记录打开的流的个数
		for (int i = 0; i < WAYNUM; i++) {//开始打开要进行归并的文件
			if (this->pQueue.top().SegNum != 0) {
				this->GetFileName(this->pQueue.top().SegNum);
				this->sInfo[i].ismSource.open(this->fileName, ios::in);
				this->sInfo[i].segNum = this->pQueue.top().SegNum;
				count++;
			}
			totalLength += this->pQueue.top().length;
			this->pQueue.pop();//删除队列中已经被读取的segTag
		}
		//分别从流中读取一个数到内存生成败者树
		this->loserTree = new int[count];
		this->workSpace = new WorkArea[count];
		this->AccordingWayNumGenLoserTree(this->sInfo, count);
		this->GetFileName(afterMergeSegMentNum);
		osmMerge.open(this->fileName, ios::out);//打开文件用于存储wayNum路归并后的结果
		for (int i = 0; iworkSpace[this->loserTree[0]].value << " ";
			}
			else
			{
				osmMerge << this->workSpace[this->loserTree[0]].value;
			}
			//写入完毕后判断写入文件的数属于哪一个归并段,之后从那个归并段读数写入内存workSpace[loserTree[0]]
			for (int j = 0; j < WAYNUM; j++) {
				if (this->sInfo[j].segNum == this->workSpace[this->loserTree[0]].mergeSegNum) {
					if (this->sInfo[j].ismSource.peek() == EOF) {//用于判断文件是否读完
						this->workSpace[this->loserTree[0]].key++;//读完将辅助变量+1,在之后的败者树调整时,它一直为败者
					}
					else
					{
						this->sInfo[j].ismSource >> this->workSpace[this->loserTree[0]].value;
					}
				}
			}
			//再对败者树调整使workSpace[loserTree[0]]为冠军即最小值
			this->AccordingWayNumAdjustLoserTree(this->loserTree[0], count);
		}
		segTag sTag = { afterMergeSegMentNum,totalLength };
		this->pQueue.push(sTag);//将归并产生的文件编号和长度写入队列,用于下一次归并
		afterMergeSegMentNum++;//下一个归并文件编号
		osmMerge.close();
		//关闭流
		for (int i = 0; i < WAYNUM; i++) {
			if (this->sInfo[i].ismSource.is_open()) {
				this->sInfo[i].ismSource.close();
			}
		}
	}
	delete[] sInfo;
	sInfo = nullptr;
}

入口测试

#include"ExternalSortEntity.h"
#include"windows.h"
void process() {
	long t1 = GetTickCount();
	ExternalSortEntity SEntity;
	SEntity.GenNum();
	SEntity.GetMergeSegments();
	SEntity.Merge();
	long t2 = GetTickCount();
	cout << "运行时间:" << (t2 - t1)<<"ms"<< endl;
}
int main(void) {
	process();
	system("pause");
}

 

你可能感兴趣的:(C/C++,总结,外部排序)