【数据压缩】Huffman原理与代码实现

转载请注明出处:http://blog.csdn.net/luoshixian099/article/details/50374452

勿在浮沙筑高台

  Huffman算法也是一种无损压缩算法,但与上篇文章LZW压缩算法不同,Huffman需要得到每种字符出现概率的先验知识。通过计算字符序列中每种字符出现的频率,为每种字符进行唯一的编码设计,使得频率高的字符占的位数短,而频率低的字符长,来达到压缩的目的。通常可以节省20%~90%的空间,很大程度上依赖数据的特性!Huffman编码是变长编码,即每种字符对应的编码长度不唯一。

前缀码:任何一个字符的编码都不是同一字符集中另一种字符编码的前缀。Huffman编码为最优前缀码,即压缩后数据量最小。

---------------------------------------------------------------------------------------------------------------

 Huffman算法:

  1.统计字符序列的每种字符的频率,并为每种字符建立一个节点,节点权重为其频率;

  2.初始化最小优先队列中,把上述的结点全部插入到队列中;

  3.取出优先队列的前两种符号节点,并从优先队列中删除;

  4.新建一个父节点,并把上述两个节点作为其左右孩子节点,父节点的权值为左右节点之和;

  5.如果此时优先队列为空,则退出并返回父节点的指针!否则把父节点插入到优先队列中,重复步骤3;

----------------------------------------------------------------------------------------------------------------

  通过上述建造的Huffman树,可以看到,每种字符结点都是叶子结点,编码方法:从根节点开始向左定义编码'0',向右定义为'1',遍历到叶子结点所得到的二值码串,即为此种字符的编码值。由于字符码字为前缀码,在译码过程中,每种字符可以参照Huffman树被唯一的译码出,但是前缀码的缺点是,错误具有传播功能,当有1位码字错误,此后的译码过程很可能都不正确;

【数据压缩】Huffman原理与代码实现_第1张图片

 代码实现:

优先队列采用堆排序算法

【数据压缩】Huffman原理与代码实现_第2张图片

/*
CSDN 勿在浮沙筑高台 
http://blog.csdn.net/luoshixian099
数据压缩--Huffman编码 2015年12月21日 
*/
//main.cpp
#include <iostream>
#include <vector>
#include "compress.h"
using namespace std;
void ShowCode(PNode root, vector<char> &code);//显示编码
void FreeTREE(PNode root);
int main()
{
	char A[] = "EEEEEEBBBBBBBAAADDDDDCCCCCCCCC";//原始数据
	UINT Length = sizeof(A)-1;
	Priority_Q queue(A, Length); //建立优先队列-堆排序
	//输入每组字符的频率
	for (UINT i = 0; i <= queue.Heap_Size;i++)
	{
		cout << (char)(queue.table[i]->key) << "  Frequency:  " << queue.table[i]->Frequency << endl;
	}
	cout << "--------------------" << endl;
	PNode root = Build_Huffman_Tree(queue);//构建Huffman树
	vector<char> code;
	ShowCode(root, code); //显示编码数据
	FreeTREE(root);
	return 0;
}
void FreeTREE(PNode root)//释放内存
{
	if ( root!=NULL)
	{
		FreeTREE(root->_left);
		FreeTREE(root->_right);
		delete root;
	}
}
void ShowCode(PNode root,vector<char> &code)
{
   if (root!=NULL)
   {
	   if (root->_left == NULL && root->_right == NULL)  //叶子结点
	   {
		   cout << (char)(root->key) << "  code : " ;
		   for (UINT i = 0; i < code.size() ; i++)
		   {
			   cout << (int)code[i];
		   }
		   cout << endl;
		   return;
	   }
	   code.push_back(0);
	   ShowCode(root->_left,code);
	   code[code.size()-1] = 1;
	   ShowCode(root->_right,code);
	   code.resize(code.size()-1);
   }
}
/*
compress.cpp
*/
#include "compress.h"
Priority_Q::Priority_Q(char *A,int Length) //统计各种字符的频率
{
	for (int i = 0; i < 256; i++)
	{
		table[i] = new Node;
	}
	Heap_Size = 0;
	for (int i = 0; i < Length; i++)  //统计字符频率
	{
		bool Flag = true;
		for (int j = 0; j < Heap_Size; j++)
		{
           if ( table[j]->key == *(A+i) )
           {
			   table[j]->Frequency = table[j]->Frequency + 1;
			   Flag = false;
			   break;
           }
		}
		if (Flag)  //加入新的字符
		{
			table[Heap_Size]->key = *(A + i);
			table[Heap_Size]->Frequency = table[Heap_Size]->Frequency + 1;
			Heap_Size++;
		}
	}
	Heap_Size--;
	Build_Min_Heap(Heap_Size); //建立优先队列
}
void Priority_Q::Build_Min_Heap(UINT Length)//建立优先队列
{
	for (int i = (int)(Length / 2); i >= 0; i--)
	{
		Min_Heapify(i);
	}
}
void Priority_Q::Min_Heapify(UINT i)
{
	UINT Smaller = i;
	UINT Left = 2 * i + 1;
	UINT Right = 2 * i + 2;

	if (Left <= Heap_Size && table[Left]->Frequency < table[i]->Frequency)  //判断是否小于其孩子的值
	{
		Smaller = Left;
	}
	if (Right <= Heap_Size && table[Right]->Frequency < table[Smaller]->Frequency)
	{
		Smaller = Right;
	}
	if (Smaller != i)                      //如果小于,就与其中最大的孩子调换位置
	{
		Swap(i, Smaller);
		Min_Heapify(Smaller);
	}
}
void Priority_Q::Swap(int x, int y) //交换两个元素的数据
{
	PNode temp = table[x];
	table[x] = table[y];
	table[y] = temp;
}
PNode CopyNode(PNode _src, PNode _dst)//拷贝数据
{
	_dst->Frequency = _src->Frequency;
	_dst->key = _src->key;
	_dst->_left = _src->_left;
	_dst->_right = _src->_right;
	return _dst;
}
PNode Priority_Q::Extract_Min()  //输出队列最前结点
{
	if (Heap_Size == EMPTY)
		return NULL;
	if (Heap_Size == 0)
	{
		Heap_Size = EMPTY;
		return table[0];
	}
	if (Heap_Size >= 0)                 
	{
		Swap(Heap_Size, 0);
		Heap_Size--;
		Min_Heapify(0);
	}
	return table[Heap_Size+1];
}
void Priority_Q::Insert(PNode pnode)//优先队列的插入
{
	Heap_Size++;
	CopyNode(pnode, table[Heap_Size]);
	delete pnode;
	UINT i = Heap_Size;
	while ( i > 0 && table[Parent(i)]->Frequency > table[i]->Frequency )
	{
		Swap(i, Parent(i));
		i = Parent(i);
	}
}
Priority_Q::~Priority_Q()
{
	for (int i = 0; i < 256; i++)
	{
		delete table[i];
	}
}
PNode Build_Huffman_Tree(Priority_Q &queue) //建立Huffman树
{
	PNode parent=NULL,left=NULL,right=NULL;
	while (queue.Heap_Size != EMPTY)
	{
		left = new Node;
		right = new Node;
		parent = new Node;
		CopyNode(queue.Extract_Min(), left); //取出两个元素
		CopyNode(queue.Extract_Min(), right);
		//复制左右节点数据
		parent->Frequency = right->Frequency + left->Frequency;//建立父节点
		parent->_left = left;
		parent->_right = right;
		if (queue.Heap_Size == EMPTY)
			break;
		queue.Insert(parent);  //再插入回优先队列
	}
	return parent;
}
/*
compress.h
*/
#ifndef COMPRESS
#define COMPRESS
#include <vector>
#define UINT unsigned int 
#define UCHAR unsigned char 
#define EMPTY 0xFFFFFFFF
#define Parent(i) (UINT)(((i) - 1) / 2)
typedef struct Node   //结点
{
	Node::Node():key(EMPTY), Frequency(0),_left(NULL),_right(NULL){}
	UINT key;
	UINT Frequency;
	struct Node * _left;
	struct Node * _right;
}Node,*PNode;
class Priority_Q  //优先队列
{
public:
	Priority_Q(char *A, int Length);
	~Priority_Q();
	void Insert(PNode pnode); //插入
	PNode Extract_Min();  //取出元素
	UINT Heap_Size;  //队列的长度
	PNode table[256];  //建立256种结点
private:
	void Build_Min_Heap(UINT Length); //建立队列
	void Swap(int x, int y);   //交换两个元素
	void Min_Heapify(UINT i); //维护优先队列的性质
	
};
PNode Build_Huffman_Tree(Priority_Q &queue);//构建优先队列
#endif // COMPRESS

参考:

    http://wenku.baidu.com/view/04a8a13b580216fc700afd2e.html

    http://blog.csdn.net/abcjennifer/article/details/8020695


你可能感兴趣的:(Huffman,优先队列,数据压缩)