C++实现哈夫曼编码--使用哈夫曼编码树压缩和解压缩

      压缩就是位域的操作,假设A对应0000,B对应1111,则AB压缩后为00001111即为0x0F,AB原本为2个字节,压缩后变为1个字节。其它数据类似一样的压缩操作即可。

      解压缩就是取出每一个位,如果是0,则走到哈夫曼编码树的左孩子,如果是1,则走到哈夫曼编码树的右孩子,接着判断是否走到了叶子节点,如果是,输出叶子节点对应的编码值即可。依次类推,解压出全部数据。

      如下的代码只是为了更好的演示压缩和解压过程,基本没有太多考虑效率等问题。

      

#ifndef _HANFUMAN_H_
#define _HANFUMAN_H_

typedef struct _t_HANFUMAN_TREE
{
	unsigned char data;    //编码的数据值,0-255之间,如果不是叶子节点,设置为0
	unsigned short weight; //编码数字的权重,可以是出现的概率,这里使用data出现的次数

	_t_HANFUMAN_TREE* parent;
	_t_HANFUMAN_TREE* left;
	_t_HANFUMAN_TREE* right;
}HANFUMAN_TREE,*PHANFUMAN_TREE;

#define MAX_CODE_BYTES 16

#define INDEX_TYPE_TREE 0x00
#define INDEX_TYPE_INFO 0x01

typedef struct _t_HANFUMAN_SELECT_HELPER
{
	_t_HANFUMAN_SELECT_HELPER()
	{
		Init();
	}
	void Init()
	{
		firstMinIndex = -1;
		secondMinIndex = -1;
		firstMinType = INDEX_TYPE_TREE; //默认值为子树类型
		secondMinType = INDEX_TYPE_TREE; //默认值为子树类型
	}
	int firstMinIndex; 
	int secondMinIndex;
	unsigned char firstMinType;
	unsigned char secondMinType;
}HANFUMAN_SELECT_HELPER,*PHANFUMAN_SELECT_HELPER;

typedef struct _t_DATA_INFO
{
	unsigned char data;  
	unsigned short times; //data出现的次数
}DATA_INFO,*PDATA_INFO;

typedef struct _t_HANFUMAN_CODE_ITEM
{
	unsigned char data[MAX_CODE_BYTES]; //最长表示MAX_CODE_BYTES*8长度的编码位域  
	unsigned short codeLen;//编码的位域长度
}HANFUMAN_CODE_ITEM,*PHANFUMAN_CODE_ITEM;

BOOL TestHanfuMan();

//创建哈夫曼编码树
PHANFUMAN_TREE CreateHanfuManTree(PDATA_INFO pDataInfo,int len);
void EnumHanfuManCode(PHANFUMAN_TREE tree);
void DestroyTree(PHANFUMAN_TREE tree);

#endif

#include 
#include 
#include 
#include 

#include "HanfuMan.h"

#define TEST_DATA_LEN 16

void InsertSort(PDATA_INFO pDataInfo,int len,unsigned char data,unsigned short dataTimes);
//哈夫曼编码,返回值为编码后的数据位数 一个字节有8位
int HanfuManEncode(unsigned char* data,int dataLen,unsigned char **encodeData);
//哈夫曼解压缩,返回值为解压缩后的数据字节数
int HanfuManDecode(PHANFUMAN_TREE tree,unsigned char* data,int dataBitLen,unsigned char **decodeData);

//编码表,用于0-255之间
static HANFUMAN_CODE_ITEM g_HanfuManCodeTable[256] = {0};

BOOL TestHanfuMan()
{
	BOOL bRet = FALSE;
	unsigned char *p = (unsigned char*)new unsigned char[TEST_DATA_LEN];
	if(!p)
	{
		return FALSE;
	}
	memset(p,0,TEST_DATA_LEN);

	srand(time(NULL));
	for(int i=0;iright; //如果是1,走到右子树
		}
		else
		{
			pTree = pTree->left; //如果是0,走到左子树
		}

		//叶子节点,则输出解码数据
		if(!pTree->left && !pTree->right)
		{
			(*decodeData)[decodeIndex++] = pTree->data;
			pTree = tree;
		}
	}

	return decodeIndex;
}

void InsertSort(PDATA_INFO pDataInfo,int len,unsigned char data,unsigned short dataTimes)
{
	if(0 == len)
	{
		pDataInfo[0].data = data;
		pDataInfo[0].times = dataTimes;
		return;
	}

	int inserIndex = 0;
	//使用插入排序
	for(inserIndex=0;inserIndex= pDataInfo[inserIndex].times)
		{
			continue;
		}
		break;
	}

	for(int i=len-1;i>=inserIndex;i--)
	{
		memcpy(&pDataInfo[i+1],&pDataInfo[i],sizeof(DATA_INFO));
	}
	//插入新数据
	pDataInfo[inserIndex].data = data;
	pDataInfo[inserIndex].times = dataTimes;
}

void InsertSortTree(PHANFUMAN_TREE *pSubTree,int subTreeCount,PHANFUMAN_TREE insertTree)
{
	if(0 == subTreeCount)
	{
		pSubTree[0] = insertTree;
		return;
	}

	int inserIndex = 0;
	//使用插入排序
	for(inserIndex=0;inserIndexweight >= (pSubTree[inserIndex])->weight)
		{
			continue;
		}
		break;
	}

	for(int i=subTreeCount-1;i>=inserIndex;i--)
	{
		pSubTree[i+1] = pSubTree[i];
	}
	//插入新数据
	pSubTree[inserIndex] = insertTree;
}

void RefreshSubTrees(PHANFUMAN_TREE *pSubTree,int subTreeCount,PHANFUMAN_TREE mergeTree)
{
	for(int i=2;idata = 0;

	pLeftSubTree->parent = mergeRoot;
	mergeRoot->weight = pLeftSubTree->weight;
	//pLeftSubTree 默认不为空
	if(pRightSubTree)
	{
		mergeRoot->weight += pRightSubTree->weight;
		pRightSubTree->parent = mergeRoot;
	}

	mergeRoot->parent = NULL;
	mergeRoot->left = pLeftSubTree;
	mergeRoot->right = pRightSubTree;

	return mergeRoot;
}

//创建新树,用于创建叶子节点
PHANFUMAN_TREE CreateLeaf(PDATA_INFO pDataInfo)
{
	PHANFUMAN_TREE leafTree = new HANFUMAN_TREE;

	if(!leafTree)
	{
		return NULL;
	}

	leafTree->data = pDataInfo->data;
	leafTree->weight = pDataInfo->times;

	leafTree->parent = NULL;
	leafTree->left = NULL;
	leafTree->right = NULL;

	return leafTree;
}

//创建哈夫曼编码树
PHANFUMAN_TREE CreateHanfuManTree(PDATA_INFO pDataInfo,int len)
{
	if(len<=0)
	{
		return NULL;
	}

	int dataIndex = 0;
	//最多只可能出现len+1/2个子树,用于保存编码过程可能出现的全部子树的根节点指针
	PHANFUMAN_TREE *pSubTree = (PHANFUMAN_TREE*) new PHANFUMAN_TREE[(len+1)/2];
	PHANFUMAN_TREE root = NULL;
	int subTreeCount = 0; //子树的个数
	HANFUMAN_SELECT_HELPER  selectHelper;

	memset(pSubTree,0,sizeof(PHANFUMAN_TREE)*((len+1)/2));

	while(dataIndex=2)
		{
			selectHelper.firstMinIndex = 0;
			selectHelper.secondMinIndex = 1;	
		}
		else
		{
			if(subTreeCount>=1)
			{
				selectHelper.firstMinIndex = 0;
			}
		}

		if(-1 == selectHelper.firstMinIndex)
		{
			selectHelper.firstMinIndex = dataIndex;
			selectHelper.firstMinType = INDEX_TYPE_INFO;
			if(++dataIndexweight)
			{
				selectHelper.secondMinIndex = selectHelper.firstMinIndex;

				selectHelper.firstMinIndex = dataIndex;
				selectHelper.firstMinType = INDEX_TYPE_INFO;

				if( (++dataIndexweight  ) )
				{
					selectHelper.secondMinIndex = dataIndex++;
					selectHelper.secondMinType = INDEX_TYPE_INFO;
				}
			}
			else
			{
				if( (-1==selectHelper.secondMinIndex) || (pDataInfo[dataIndex].times < (pSubTree[selectHelper.secondMinIndex])->weight))
				{
					selectHelper.secondMinIndex = dataIndex++;
					selectHelper.secondMinType = INDEX_TYPE_INFO;
				}
			}
		}//至此,已经选择出了2个最小权重的

		if(INDEX_TYPE_TREE == selectHelper.firstMinType && INDEX_TYPE_TREE == selectHelper.secondMinType)
		{
			//合并2棵子树
			PHANFUMAN_TREE mergeTree = MergeTree(pSubTree[0],pSubTree[1]);
			if(!mergeTree)
			{
				exit(0);
			}
			RefreshSubTrees(pSubTree,subTreeCount,mergeTree);

			subTreeCount--;
		}
		if(INDEX_TYPE_TREE == selectHelper.firstMinType && INDEX_TYPE_INFO == selectHelper.secondMinType)
		{
			PHANFUMAN_TREE newLeaf = CreateLeaf(&pDataInfo[selectHelper.secondMinIndex]);
			if(!newLeaf)
			{
				exit(0);
			}
			PHANFUMAN_TREE mergeTree = MergeTree(pSubTree[0],newLeaf);
			if(!mergeTree)
			{
				exit(0);
			}
			for(int i=1;i1)
	{
		//合并2棵子树
		PHANFUMAN_TREE mergeTree = MergeTree(pSubTree[0],pSubTree[1]);
		if(!mergeTree)
		{
			exit(0);
		}
		RefreshSubTrees(pSubTree,subTreeCount,mergeTree);

		subTreeCount--;
	}

	//最后子树中只剩下一课,这棵树即为编码树
	PHANFUMAN_TREE tree = pSubTree[0];
	delete [] pSubTree;
	
	return tree;
}

//释放树
void DestroyTree(PHANFUMAN_TREE tree)
{
	if(!tree)
	{
		return;
	}

	DestroyTree(tree->left); //刪除左子树
	DestroyTree(tree->right);//删除右子树

	delete tree; //删除根节点
	tree = NULL;
}

//通过叶子的父节点向上
void PrintHanfuManCode(PHANFUMAN_TREE tree,int *codeLen,unsigned char data)
{
	if(!tree)
	{
		return;
	}

	PHANFUMAN_TREE parent = tree->parent;
	if(!parent)
	{
		return;
	}

	PrintHanfuManCode(parent,codeLen,data);
	if(parent->left == tree)
	{
		(*codeLen)++;
		printf("0");
		//默认值就是为0,因此编码表元素不需要设置数据,长度增加1个位域即可
		g_HanfuManCodeTable[data].codeLen++;
	}
	else
	{
		(*codeLen)++;
		printf("1");
		//需要设置编码表元素的第g_HanfuManCodeTable[data].codeLen位为1
		int byteIndex = g_HanfuManCodeTable[data].codeLen/8;
		int bitIndexOfByte = g_HanfuManCodeTable[data].codeLen%8;
		unsigned char bitHelper[8] = {0x01,0x02,0x04,0x08,0x10,0x20,0x40,0x80};
		g_HanfuManCodeTable[data].data[byteIndex] |= bitHelper[bitIndexOfByte];
		g_HanfuManCodeTable[data].codeLen++;
		//如果某个字段的编码位域为1101,则设置后,g_HanfuManCodeTable[data].data[0]第1位为1,第二位为1,第三位为0,第四为为1,即1011
		//如果某个字段的编码位域为1011 1111 1101,则设置后,g_HanfuManCodeTable[data].data[0] = 1111 1101
		//g_HanfuManCodeTable[data].data[1] = 0000 1011 即保存的顺序和我们阅读的顺序刚好相反了
	}
}


//通过二次遍历编码树,枚举得到每个data的哈夫曼编码
void EnumHanfuManCode(PHANFUMAN_TREE tree)
{
	if(!tree)
	{
		return;
	}

	//叶子节点
	if(!tree->left && !tree->right)
	{
		int codeLen = 0;
		printf("data value = 0x%2x    HanfuMan Code = ",tree->data);
		PrintHanfuManCode(tree,&codeLen,tree->data);
		printf("   CodeLen = %d\r\n",codeLen);
		return;
	}
	
	if(tree->left)
	{
		EnumHanfuManCode(tree->left);
	}

	if(tree->right)
	{
		EnumHanfuManCode(tree->right);
	}
}
#include 
#include 
#include "HanfuMan.h"


int main(int agrc,char* argv[])
{
	TestHanfuMan();
	
	getchar();
	return 0;
}

         这里,是通过随机生成的压缩数据,测试数据大小  #define TEST_DATA_LEN 16。测试结果如下:

         C++实现哈夫曼编码--使用哈夫曼编码树压缩和解压缩_第1张图片

你可能感兴趣的:(数据结构与算法)