Huffman编码

     Huffman编码是最流行的基于统计学的数据压缩方法,下面我们简单介绍它的实现步骤:
1. 将信源符号按照概率递减顺序排列;
2. 取两个概率最小的符号分别分配以“0”和“1”,然后把它们的概率相加,并作为一个新的符号的概率,与其它未分配符号按照(1)重新排列;
3. 重复(1)(2)过程,直至所有概率相加得1;
4. 寻找从每一个信源符号到概率为1处的路径,记录下路径上的“0”和“1”;
5. 写出每一个符号的“0”和“1”序列(从树根到信源符号节点)。

缺点:

    从理论上讲,采用Huffman编码可以获得最佳编码效果,但是在实际中,由于计算机中存储和处理的最小数据单位是比特(bit),因此在某种情况下,实际的压缩编码效果往往达不到理论的压缩比。例如:信源符号{X, Y},其对应的概率为{2/3, 1/3},则根据理论计算,符号X, Y的最佳码长分别是:X: log(2/3)2 = 0.588(bit),Y: log(1/3)2 = 1.58(bit)
    这表明,要获得最佳效果,符号{X, Y }的码字长度应分别为0.588bit和1.58bit,而计算机不可能有非整数位出现,只能按整数位进行,即采用哈夫曼编码对{X,Y}进行编码,得{X,Y}的码字分别为0和1,也就是两符号的信息编码长度都为1。可见,对于大概率符号X并未赋予较短的码字,实际编码效果没有达到理论编码效果。由上述分析可见,Huffman编码的主要缺点在于其编码方法是对每个符号进行编码,每个符号的码长只能是整数。为此提出算术编码,以解决计算机中必须以整数位进行编码的问题。

编码实现:

头文件

/ Huffman.h: interface for the Huffman class.
//
//////////////////////////////////////////////////////////////////////

#if !defined(AFX_HUFFMAN_H__08A1863A_6641_4FE9_9596_5EEBE76B53F7__INCLUDED_)
#define AFX_HUFFMAN_H__08A1863A_6641_4FE9_9596_5EEBE76B53F7__INCLUDED_

#if _MSC_VER > 1000
#pragma once
#endif // _MSC_VER > 1000

#include <string>

/***********************数据结构***********************/

//哈弗曼树节点
typedef struct 
{
	unsigned int weight;
	unsigned int parent;
	unsigned int lchild;
	unsigned int rchild;
}HuffTreeNode,*HuffTree;

//字符-权值-编码映射
typedef struct
{
	char c;
	unsigned int weight;
	char *code;
}CharMapNode,*CharMap;


/*************************类定义****************************/

class Huffman  
{
private:
	void select(int n, int &s1, int &s2);
	HuffTree huffTree;	//哈弗曼树
	CharMap chars;		//字符表
	int n;				//字符数
	std::string text;		//原文
	std::string code;		//编码
public:	
	void InputCharsWeight();
	void CountCharsWeight();
	void Decode();
	void ReadTextFromFile(char *filename);
	void ReadCodeFromFile(char *filename);
	void SaveTextToFile(char *filename);
	void SaveCodeToFile(char *filename);
	void PrintCode();
	void MakeCharMap();
	void PrintText();	
	void PrintCharCode();
	void PrintCharWeight();
	void SetCharMap(CharMap m, int number);
	void Encode();
	Huffman();
	virtual ~Huffman();

};

#endif // !defined(AFX_HUFFMAN_H__08A1863A_6641_4FE9_9596_5EEBE76B53F7__INCLUDED_)

 

Huffman类

// Huffman.cpp: implementation of the Huffman class.
//
//////////////////////////////////////////////////////////////////////

#include "Huffman.h"
#include <iostream>
#include <fstream>

using namespace std;

//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////

Huffman::Huffman()
{
	huffTree = NULL;
	chars = NULL;
	n = 0;
}

Huffman::~Huffman()
{

}

//对Text串进行哈弗曼编码
void Huffman::Encode()
{
	code = "";
	for (string::size_type i = 0; i != text.size(); ++i)
	{
		for (int j = 1; j <= n; ++j)
			if (chars[j].c == text[i])
				code += chars[j].code;  //code为数组名,可表示数组存放的地址
	}
}

//设置字符表
void Huffman::SetCharMap(CharMap m, int number)
{
	chars = m;
	n = number;
}

//在huffTree[1..n]中选择parent为0且weight最小的两个节点,其序号为s1,s2
void Huffman::select(int n, int &s1, int &s2)
{
	s1 = s2 = 0;
	for (int i = 1; i <= n; ++i)
	{
		if (huffTree[i].parent != 0)
			continue;
		if (s1 == 0)
			s1 = i;
		else if (s2 == 0)
		{
			//此处采用的策略,使得整个过程中s1的权值小于s2的权值
			if (huffTree[i].weight < huffTree[s1].weight)
			{
				s2 = s1;
				s1 = i;
			}
			else
				s2 = i;
		}
		else
		{
			if (huffTree[i].weight < huffTree[s1].weight)
			{
				s2 = s1;
				s1 = i;
			}
			else if (huffTree[i].weight < huffTree[s2].weight)
				s2 = i;
		}
	}
}

void Huffman::PrintCharWeight()
{
	for (int i = 1; i <= n; ++i)
	{
	/*	switch (chars[i].c)
		{
		case '\t':
			cout << "\\t";
			break;
		case '\n':
			cout << "\\n";
			break;
		default:*/
			cout << chars[i].c;
		//	break;
		//}
			cout << "——" << chars[i].weight << endl;
	}
}

void Huffman::PrintCharCode()
{
	for (int i = 1; i <= n; ++i)
	{
		/*switch (chars[i].c)
		{
		case '\t':
			cout << "\\t";
			break;
		case '\n':
			cout << "\\n";
			break;
		default:*/
			cout << chars[i].c;
		//	break;
		//}
			cout << "——" << chars[i].code << endl;
	}
}

//输出文本串
void Huffman::PrintText()
{
	cout << text << endl;
}

//输出0-1编码
void Huffman::PrintCode()
{
	cout << code << endl;
}

//根据各字符的权值建立字符-编码表
void Huffman::MakeCharMap()
{
	if (n <= 1)
		return;
	int m = 2 * n - 1;		//哈弗曼树所需节点数
	huffTree = new HuffTreeNode[m+1];		//0号单元未使用
	//初始化
	int i;
	for (i = 1; i <= n; ++i)   //从1开始
	{
		huffTree[i].weight = chars[i].weight;
		huffTree[i].parent = 0;
		huffTree[i].lchild = 0;
		huffTree[i].rchild = 0;
	}
	for (i = n + 1; i <= m; ++i)
	{
		huffTree[i].weight = 0;
		huffTree[i].parent = 0;
		huffTree[i].lchild = 0;
		huffTree[i].rchild = 0;
	}
	//建哈弗曼树
	for (i = n + 1; i <= m; ++i)
	{
		int s1,s2;
		select(i - 1, s1, s2);
		huffTree[s1].parent = huffTree[s2].parent = i;
		huffTree[i].lchild = s1;
		huffTree[i].rchild = s2;
		huffTree[i].weight = huffTree[s1].weight + huffTree[s2].weight;
	}

	//从叶子到根节点逆向求每个字符的哈弗曼编码
	char *cd = new char[n];	//分配求编码的工作空间(每个字符编码结果最长n-1再加上'\0')
	cd[n-1] = '\0';			//编码结束符
	for(i = 1; i <= n; ++i)		//逐个字符求哈弗曼编码
	{
		int start = n - 1;
		int c,f;
		//从叶子到根逆向求编码
		for (c = i, f = huffTree[i].parent; f != 0; c = f, f = huffTree[f].parent)
		{
			if (huffTree[f].lchild == c)	//左孩子编码为0
				cd[--start] = '0';
			else							//右孩子编码为1
				cd[--start] = '1';
		}
		chars[i].code = new char[n - start];	//为第i个字符编码分配空间
		strcpy(chars[i].code,&cd[start]);
	}
	delete cd;
}

//从文件读入原文
void Huffman::ReadTextFromFile(char *filename)
{
	ifstream infile(filename);
	if(!infile)
	{
		cerr << "无法打开文件!" <<endl;
		return;
	}
	char c;
	while(infile.get(c))
	{
		text += c;
	}
}

//将编码存入文件
void Huffman::SaveCodeToFile(char *filename)
{
	ofstream outfile(filename);
	if (!outfile)
	{
		cerr << "保存文件出错!" << endl;
		return;
	}
	outfile << code;
}

//从文件读入编码
void Huffman::ReadCodeFromFile(char *filename)
{
	ifstream infile(filename);
	if (!infile)
	{
		cerr << "无法打开文件!" <<endl;
		return;
	}
	infile >> code;
}

//将0-1编码串解码
void Huffman::Decode()
{
	text = "";
	string::size_type i,count;
	for (i = 0; i < code.size(); i += count)
	{
		//每个字符的编码结果最长n-1,从1至n-1依次尝试
		for (count = 1; count < n; ++count)
		{
			for (int j = 1; j <= n; ++j)
				if (code.substr(i, count) == chars[j].code)//code.substr(a,b)表示对字符串code截取从第a个到第b个,依次与chars[].code比较
				{
					text += chars[j].c;  //text表示原文存放的数组名,作指针使用
					goto next;
				}
		}
next:
		;
	}
}

//统计原文中各字符的权值
void Huffman::CountCharsWeight()
{
	if (text.empty())
		return;
	if (chars != NULL)
		delete chars;
	int i = 0;
	n = 0;
	chars = new CharMapNode[2];
	chars[1].c = text[i];
	chars[1].weight = 1;
	++n;
	for (i = 1; i != text.size(); ++i)
	{
		int j;
		for (j = 1; j <= n; ++j)	//遍历当前字符表,如果已存在该字符,权值+1
		{
			if (text[i] == chars[j].c)
			{
				++chars[j].weight;
				break;
			}
		}
		if (j > n)		//该字符不存在,添加该字符
		{
			++n;
			CharMap newchars = new CharMapNode[n + 1];
			memcpy(newchars, chars, n * sizeof(CharMapNode));
			delete chars;
			chars = newchars;
			chars[n].c = text[i];
			chars[n].weight = 1;
		}
	}
}

//输入字符和对应权值
void Huffman::InputCharsWeight()
{
	cout << "请输入字符集大小n(n>1):" << endl;
	cin >> n;
	if (chars != NULL)
		delete chars;
	chars = new CharMapNode[n+1];	//0号单元未使用
	cout << "请输入字符和权值:" << endl;
	for (int i = 1; i <= n; ++i) 
	{
		cin.ignore();		//清除输入缓冲区
		cin.get(chars[i].c);	//输入单个字符,可以是空白符
		cin >> chars[i].weight;
	}
}

void Huffman::SaveTextToFile(char *filename)
{
	ofstream outfile(filename);
	if (!outfile)
	{
		cerr << "保存文件出错!" << endl;
		return;
	}
	outfile << text;
}

 

主函数

#include <iostream>
#include "Huffman.h"

using namespace std;

int main()
{
	Huffman huffman;
	huffman.ReadTextFromFile("text.txt");
	/****************第一步输入字符和对应权值******************/
	cout << "请选择:  1.程序自动统计字符和权值(推荐)  2.用户输入" << endl;
	int r;
	do
	{
		cin >> r;
	}
	while((r != 1) && (r != 2));
	if (r == 1)
		huffman.CountCharsWeight();
	else
		huffman.InputCharsWeight();

	cout << "字符及对应权值:" << endl;
	huffman.PrintCharWeight(); //计算每个字符对应的权值
	system("pause");
	cout << endl;

	/****************第二步建哈弗曼树,输出字符与编码的对应关系******************/
	huffman.MakeCharMap();  //实现哈弗曼编码,对应存入chars[i].c-chars[i].code
	cout << "字符及对应的编码:" << endl;
	huffman.PrintCharCode();  //打印每个字符及其对应的编码,即chars[i].c-chars[i].code
	system("pause");
	cout << endl;

	/****************第三步对字符进行编码,将结果输出并存入文件******************/
	cout << "对原文进行编码:" << endl;
	cout << "原文:" << endl;
	huffman.PrintText();  //输出文本串
	huffman.Encode();  //对文本串进行编码
	cout << "编码:" << endl;
	huffman.PrintCode();
	huffman.SaveCodeToFile("code.txt");
	system("pause");
	cout << endl;

	/****************第四步从文件读入0、1代码串解码后输出并存入文件******************/
	cout << "对编码进行解码:" << endl;
	huffman.ReadCodeFromFile("code.txt");
	cout << "编码:" << endl;
	huffman.PrintCode();
	huffman.Decode();
	cout << "原文:" << endl;
	huffman.PrintText();
	huffman.SaveTextToFile("resulttext.txt");
	cout << "\n Over ^_^" << endl;
	system("pause");

	return 0;
}


输出界面

 
 
 

 

你可能感兴趣的:(数据结构,String,struct,delete,System,interface)