(九)《数据结构与算法》 青岛大学-王卓 哈弗曼树与哈弗曼编码及其应用

哈弗曼树与哈弗曼编码及其应用

这个周末对哈弗曼编码进行了编写,本来打算自己写,结果发现笨的一批,全程参考此博客,而且还耗费了两天时间。写在这里主要算是给自己一个记录吧,毕竟也花费时间整了。

参考链接: https://blog.csdn.net/qq_40738840/article/details/85406330?ops_request_misc=&request_id=&biz_id=102&utm_term=%E5%93%88%E5%BC%97%E6%9B%BC%E7%BC%96%E7%A0%81%E4%B8%8E%E8%A7%A3%E7%A0%81&utm_medium=distribute.pc_search_result.none-task-blog-2allsobaiduweb~default-7-85406330.142v10control,157v4control&spm=1018.2226.3001.4187

此博主的程序中在读取二进制文件时,会出错,我本人做了些小小的改动,已经可以正常运行了。

/*

	功能:实现对文件的压缩
	程序流程:
	1、对文件进行IO加载;
	2、对输入的字符进行统计,并计算每个字符的权重,保存在weight文件中;
	3、进行构造哈夫曼树,且进行创建哈夫曼编码;
		1、哈夫曼树的创建:
			构造森林全是根,选用两小造新树;
			删除两小添新人,重复2、3剩单根。
			根据输入的权重,进行两小结合。
		2、哈夫曼编码:
			左分支标0,有分支标1;
			把从根到每个叶子的标号连接起来,作为叶子代表的字符编码;
			保存到二进制文件中;
		3、解码:
			加载二进制文件到哈弗曼结构中;
			通过查找0、1寻找叶子节点;
*/

#include
#include
#include			// getline()函数
#include			// atoi()函数
#include 
#include 
using namespace std;

// 哈弗曼树的存储结构
typedef struct
{
	int ascii;			// 字符
	int weight;			// 权重
	int parent, Ich, rch;// 双亲,左孩子,右孩子
}HTNode,*HuffmanTree;

// 对data文件进行加载,并计算每个字符的权重,保存到weight.txt中
void WeightCalculate(string inputPath, string outputPath,int &n)		// n表示字符的个数
{
	int cof[256];   //存储相应字符出现的次数,字符ASCII为下标。charater_occur_frequency 
	for (size_t i = 0; i < 256; i++)		// 初始化字符统计表
	{
		cof[i] = 0;
	}
	// 加载文件
	fstream ifs(inputPath, ios::in);
	if (!ifs)
	{
		cout << "文件打开失败!" << endl;
		exit(1);
	}

	int sum = 0;	// 总行数
	string s;		// 获取一行数据
	while (true)
	{
		sum++;			// 用来记录换行符的个数
		getline(ifs, s);
		if (!ifs) break;
		for (size_t i = 0; i < s.size(); i++)
		{
			int a = s[i];
			cof[a]++;		// 计数
		}
	}
	ifs.close();			// 关闭文件
	//int a = '\n';		// 换行符
	//cof[a] += sum;		// 换行符的个数

	// 写入文件的总个数,并将每个字符依次输入到文件中
	n = 0;
	for (size_t i = 0; i < 256; i++)if (cof[i] != 0) ++n;
	cout << "字符总个数:" << n  << endl;

	fstream ofs(outputPath, ios::out);		// 文件输出
	if (!ofs) cout << "文件打开失败!" << endl;
	ofs << n <<endl;						// 将字符总个数加载到文件中;不知道为什么多一行空字符,所以这一行不加endl;
	for (size_t i = 0; i < 256; i++)
	{
		if (cof[i] != 0)
		{
			char ch = i;
			ofs << ch << " "<< cof[i] << endl;
		}
	}
	ofs.close();
}

// 构造哈弗曼树,依照规则,且左子树权重小于右子树权重
void CreateHuffman(HuffmanTree &HTree,int n,string weightFilePath,string huffmanTreePath,string huffmanCodeFilePath)
{
	// 打开weight.txt文件
	fstream ifs(weightFilePath, ios::in);
	if (!ifs) cout << "weight文件加载失败" << endl;
	HTree = new HTNode[2 * n];		// n表示结点数,构造2n-1个,其中0号不用
	for (size_t i = 1; i < 2*n; i++)		// 全部清零
	{
		HTree[i].ascii = HTree[i].weight = HTree[i].parent = HTree[i].Ich = HTree[i].rch = 0;
	}
	string s;
	getline(ifs, s);			// 获取第一行的总数

	int count = 1;
	while (getline(ifs,s))  // 从文件中读取ASCII码及相应权值
	{
		if (!ifs) break;
		HTree[count].ascii = s[0];
		string weight = s.substr(2, s.size());		// 截取字符串
		HTree[count].weight = atoi(weight.c_str());		
		cout << "截取的字符串:" << weight << endl;
		cout << "总行字符串" << s << endl;
		cout << HTree[count].ascii << "   " << HTree[count].weight << endl;
		++count;
	}

	ifs.close();

	for (size_t i = n+1; i < 2*n; i++)
	{
		// 寻找最小、此小值,记录下标
		int min1 = INT_MAX,min2 = INT_MAX;
		int index1 = 0,index2 = 0;

		for (size_t j = 1; j < i; j++)		// 找到最小值、次小值
		{
			if (HTree[j].parent == 0)	// 双亲为0表示尚未操作
			{
				if (min1 > HTree[j].weight)
				{
					min2 = min1;		// 先赋值给此小值
					index2 = index1;
					min1 = HTree[j].weight;
					index1 = j;
				}
				else if (min2 > HTree[j].weight)
				{
					min2 = HTree[j].weight;
					index2 = j;
				}
			}
		}

		// 更新数组表 --------->5处状态更新
		HTree[i].weight = HTree[index1].weight + HTree[index2].weight;		// 双亲权值更新
		HTree[index1].parent = HTree[index2].parent = i;			// 孩子的双亲结点更新
		if (HTree[index1].weight < HTree[index2].weight)			// 两个节点权值不同,左小右大;相同,下标小者在左
		{
			HTree[i].Ich = index1;	// 下标赋值
			HTree[i].rch = index2;
		}
		else if (HTree[index1].weight > HTree[index2].weight)
		{
			HTree[i].Ich = index2;	
			HTree[i].rch = index1;
		}
		else
		{
			if (index1 < index2)
			{
				HTree[i].Ich = index1;	// 下标赋值
				HTree[i].rch = index2;
			}
			else
			{
				HTree[i].Ich = index2;
				HTree[i].rch = index1;
			}
		}
	}

	// 写入文件
	fstream outFile(huffmanTreePath, ios::out);
	if (!outFile) cout << "哈弗曼树文件打开失败!" << endl;
	outFile << n << endl;			// 结点个数
	for (size_t i = 1; i < 2*n; i++)
	{
		outFile << " " << HTree[i].ascii << " " << HTree[i].weight << " " << HTree[i].parent << " " << HTree[i].Ich << " " << HTree[i].rch << endl;
	}
	outFile.close();

	// 建立编码表,写入字符,权值,编码
	outFile.open(huffmanCodeFilePath, ios::out);
	if (!outFile) cout << "哈弗曼编码表.txt,打开失败!" << endl;

	// 利用栈从叶子出发,读取每个字符的编码,再写入文件			->这一步太绕了
	stack<char> code;
	for (size_t i = 1; i <= n; i++)		// 对n个字符求编码
	{
		int j = i;			// 从第i个开始
		while (HTree[j].parent!=0)		// 只要双亲结点!=0就继续
		{
			int p = HTree[j].parent;		// 找双亲结点的索引
			if (p!=0)						
			{
				int l, r;		// 定义左子树和右子树的索引值
				l = HTree[p].Ich;		// 根据双亲,找左子树的索引值
				r = HTree[p].rch;		// 根据双亲,找右子树的索引值
				if (j == l) code.push('0');		// 如果根据双亲索引的值在左子树,则记为0
				if (j == r) code.push('1');		// 如果根据双亲索引的值在右子树,则记为1
				j = p;				// 将双亲重新赋值给j,从叶子结点反向搜索
			}
		}

		outFile << HTree[i].ascii << " " << HTree[i].weight << " ";			// 写入字符,权值
		while (!code.empty())		// 写入编码
		{
			outFile << code.top();			// 写入编码
			code.pop();
		}
		outFile << endl;
	}
	outFile.close();
}

// 对文件进行压缩
void Code(string resoureFilePath,string huffmanCodePath, string binaryFilePath)
{
	char code[20];
	fstream ifs(huffmanCodePath, ios::in);
	if (!ifs) cout << "哈弗曼文件打开错误!" << endl;
	string s, codeList[256];			// 每个编码,和编码组
	int ch, w;							// 字符和权重
	while (true)
	{
		ifs >> ch >> w >> s;
		if (!ifs) break;
		codeList[ch] = s;					// 根据哈希表建立映射关系,  将字符ch表示为下标,将s表示为编码
	}
	ifs.close();

	ifs.open(resoureFilePath, ios::in);			// 打开所要压缩的文本文件
	if (!ifs) cout << "打开源文件失败!" << endl;

	ofstream ofs(binaryFilePath, ios::binary);			// 输出二进制文件
	if (!ofs) cout << "二进制文件打开失败!" << endl;

	while (true)
	{
		getline(ifs, s);		// 获取源文件的一行数据
		if (!ifs) break;
		int a;					// 为字符转整型做准备,因为txt文件中,存储的就是int类型
		string s2;
		for (size_t i = 0; i < s.size(); i++)
		{
			a = s[i];				//每一个char转换为int
			int j;
			for (j = 0; j < codeList[a].size(); j++)
			{
				s2 = codeList[a];		// 根据上面的映射表,进行映射寻值,根据a作为下标,也就是值,作为下标,寻找编码值
				code[j] = s2[j];
			}
			code[j] = '\0';			// 加上这一句为了表示终止符
			ofs.write((char*)code,20*sizeof(char));			// 二进制文件必须用write和read

		}
	}
	ifs.close();
	ofs.close();
}

// 再次进行解码
void Decode(string binaryFilePath,string huffmanTreePath, string decodePath)
{
	char code[20];
	fstream ifs(huffmanTreePath, ios::in);		// 打开哈弗曼树文件
	if (!ifs) cout << "无法打开哈弗曼树文件!" << endl;
	int n;		// 节点数
	ifs >> n;   // n个节点
	HuffmanTree HTree;		//创建哈弗曼树
	HTree = new HTNode[2*n];		// 申请内存
	for (size_t i = 1; i < 2*n; i++)
	{
		ifs >> HTree[i].ascii >> HTree[i].weight >> HTree[i].parent >> HTree[i].Ich >> HTree[i].rch;
	}
	ifs.close();

	// 处理编码信息
	ifs.open(binaryFilePath, ios::in|ios::binary);	
	if (!ifs) cout << "二进制文件打开错误!" << endl;

	fstream ofs(decodePath, ios::out);		// 将文件保存到该路径下
	if (!ofs) cout << "打开decode.txt失败!" << endl;

	// 解码开始
	char ch;
	int root = 2 * n - 1;
	while (true)
	{
		ifs.read((char*)code, 20 * sizeof(char));
		if (!ifs) break;
		for (size_t i = 0; code[i]!='\0'; i++)
		{
			ch = code[i];
			if (ch == '0') root = HTree[root].Ich;				// 碰到0,向左走,走到叶子节点,那么这个值就是字符
			else if (ch == '1') root = HTree[root].rch;

			if (HTree[root].Ich == 0)					// 不管左子树还是右子树,他们的孩子都是0,所以随便找一个就可以了。
			{
				char cht = HTree[root].ascii;
				ofs << cht;
				root = 2 * n - 1;
			}
		}
	}	
	ofs.close();
	ifs.close();
}


int main()
{
	string inputPath = "data.txt";			// 加载源文件
	string outputPath = "weight.txt";		// 输出权重文件
	string huffmanTree = "HuffmanTree.txt";	// 哈弗曼树
	string huffmanCodePath = "huffmanCodePath.txt";	// 输出哈弗曼编码文件
	string binaryFilePath = "binaryFilePath.dat";		// 二进制文件
	string decodePath = "decode.txt";			// 解码路径
	int n = 0;
	WeightCalculate(inputPath, outputPath,n);
	HuffmanTree HT;
	CreateHuffman(HT, n,outputPath,huffmanTree,huffmanCodePath);
	Code(inputPath, huffmanCodePath, binaryFilePath);
	Decode(binaryFilePath, huffmanTree, decodePath);
	return 0;
}

加载的文件内容,如下:
Life is picturesque, there are thick ink splashing, there are light pen light description; Life is like a song, there are light chanting and shallow singing, there are also Huang zhongdalu. You don’t see the life of Li Taibai and Du Gongbu. They have created the greatness of life; Pei Duofei, Shelley’s life, light pen light description, finally write the true meaning of life.

你可能感兴趣的:(数据结构与算法,数据结构,算法,c++)