基于哈夫曼编码思想的编码/解码小程序

最近在做课程设计,实现了个哈夫曼编码/解码的小demo,分享出来。
功能描述大多写在注释里了,模块儿相对比较独立,可以拆分出来修改使用。
支持中英文/阿拉伯数字…其他语言没有测试过。

//哈夫曼:基于贪心算法的哈夫曼树与哈夫曼编码
#include "pch.h"
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
#include 
using namespace std;

//用于记录当前字符和出现次数
struct word
{
	char a;//非中文字符只有a,且a>=0
	char b;//中文字符a、b小于0
	int times;//出现次数
	string haffman;//编码
	int num;//0或者1
	int index;//在数组中的位置
	word* left = NULL;
	word* right = NULL;
	//比较函数
	bool operator >(const word &a)const
	{
		return times > a.times;
	}
};

//vector中查找本字符,返回位置,否则返回-1
int find(char a, char b);
//深度遍历,获取叶子节点haffman编码
void getover(string str, word* node);



priority_queue< word, vector<word>, greater<word> > q;
vector<word*> words;
//相对路径
fstream fin("haffmanIn.txt");
fstream fout("haffmanOut.txt");
//绝对路径
//fstream fin("D:\\课程实验\\算法设计\\Algorithm\\哈夫曼\\haffmanIn.txt");
//fstream fout("D:\\课程实验\\算法设计\\Algorithm\\哈夫曼\\haffmanOut.txt");

word* head;
string sec;//获取到的二进制码;



//读取文件,初始化words数组
void initWords()
{
	char a, b = 0;
	while (fin.peek() != EOF) {
		a = fin.get();
		if (a < 0) {//为中文,多读取一个字
			b = fin.get();
		}
		int index = find(a, b);
		if (index == -1) {//没有出现过
			word *w = new word();
			w->a = a;
			w->b = b;
			w->times = 1;
			words.push_back(w);
		}
		else
		{
			words[index]->times++;
		}
	}
	for (int i = 0; i < words.size(); i++)
	{
		words[i]->index = i;
	}
}
//vector中查找本字符,返回位置,否则返回-1
int find(char a, char b) {
	if (words.size() != 0) {
		//非中文
		if (a >= 0) {
			for (int i = 0; i < words.size(); i++) {
				if (words[i]->a == a) return i;
			}
		}
		//中文
		else
		{
			for (int i = 0; i < words.size(); i++) {
				if (words[i]->a == a && words[i]->b == b) return i;
			}
		}
	}
	return -1;
}

//使用优先队列构建haffman编码树
void CreatTree()
{
	//获取小根堆
	for (int i = 0; i < words.size(); i++)
	{
		q.push(*words[i]);
	}

	//依次出队 再入队
	while (q.size() != 1)
	{
		word *a;//出队的两个
		word *b;

		word *c = new word();//入队的一个

		a = words[q.top().index];
		q.pop();
		b = words[q.top().index];
		q.pop();

		//设置左孩子属性
		a->num = 0;
		//设置右孩子属性
		b->num = 1;

		//设置父节点
		c->left = a;
		c->right = b;
		c->times = a->times + b->times;

		//父节点加入队列 和 数组
		c->index = words.size();
		words.push_back(c);
		if (q.empty())
		{
			head = c;
			break;
		}
		q.push(*c);
	}
	//深度遍历,获取叶子节点haffman编码
	getover("", head);
}

//深度遍历,获取叶子节点haffman编码
void getover(string str, word* node)
{
	//如果是叶子节点
	if (node->left == NULL && node->right == NULL)
	{
		node->haffman.assign(str);
	}
	//如果不是
	else
	{
		if (node->left != NULL)
		{
			str.append("0");
			getover(str, node->left);
			str.erase(str.length() - 1, 1);
		}
		if (node->right != NULL)
		{
			str.append("1");
			getover(str, node->right);
		}
	}
}

//打印所有字符的编码
void paintHaffman() {
	for (int i = 0; i < words.size(); i++) {
		//当左右孩子都为空时才是叶子节点(字符)
		if (words[i]->left == NULL && words[i]->right == NULL) {
			if (words[i]->a == 10)
			{
				cout << "\\n";
			}
			else if (words[i]->a == 32)
			{
				cout << "space";
			}
			else
			{
				cout << words[i]->a;
			}
			if (words[i]->a < 0)
				cout << words[i]->b;
			cout << "\t" << words[i]->times << "\t" << words[i]->haffman << endl;
		}
	}
}

//输出haffman
void OutputTree(word* node) {
	//如果是叶子节点
	if (node->left == NULL && node->right == NULL)
	{
		if (node->a == 10)
		{
			cout << "\\n";
		}
		else if (node->a == 32)
		{
			cout << "space";
		}
		else
		{
			cout << node->a;
		}
		if (node->a < 0)
			cout << node->b;
		cout << "\t" << node->times << "\t" << node->haffman << endl;
	}
	//如果不是
	else
	{
		if (node->left != NULL)
		{
			OutputTree(node->left);
		}
		if (node->right != NULL)
		{
			OutputTree(node->right);
		}
	}
}

//输出文章的二进制编码
void OutputCode() {
	fin.seekg(0);
	while (fin.peek() != EOF)
	{
		char a, b = 0;
		a = fin.get();
		if (a < 0) {//为中文,多读取一个字
			b = fin.get();
		}
		int index = find(a, b);
		sec.append(words[index]->haffman);
	}
}
//数值分析
void getComrAadio() {
	unsigned long long origin = 0;
	unsigned long long  haffman = 0;
	unsigned long long sum = 0;
	for (int i = 0; i < (words.size() + 1) / 2; i++) {
		sum += words[i]->num;
		if (words[i]->a < 0)
		{
			origin += words[i]->num << 4;
		}
		else
		{
			origin += words[i]->num << 3;
		}
		haffman += words[i]->num*words[i]->haffman.length();
	}
	//平均码长
	cout << "平均码长:" << endl;
	cout << "\t原文件:" << (double)origin / sum << endl;
	cout << "\thaffman:" << (double)haffman / sum << endl;
	//压缩率
	cout << "压缩率:" << (double)haffman / origin << endl;
}

//解码
void dehaffman() {
	word* p = head;
	for (int i = 0; i < sec.length(); ) {
		while (p->left != NULL || p->right != NULL)
		{
			char num = sec[i++];
			if (num == '0')
			{
				p = p->left;
			}
			else
			{
				p = p->right;
			}
		}
		fout << (char)p->a;
		if ((char)p->a < 0)
		{
			fout << (char)p->b;
		}
		p = head;
	}
}

int main() {
	//string filestr;
	//cin >> filestr;
	initWords();
	CreatTree();

	//按照录入顺序输出
	//paintHaffman();

	//按照树状输出
	//OutputTree(head);
	cout << endl;

	//输出haffman编码的二进制文档
	OutputCode();
	//cout << sec;


	//输出压缩率
	cout << endl;
	getComrAadio();

	//将解码的文件放入文档
	dehaffman();
}

你可能感兴趣的:(基于哈夫曼编码思想的编码/解码小程序)