数据结构树--哈夫曼树

决心写一个哈夫曼树,巩固自己学的数据结构


 一、最优树的定义

 结点的路径长度定义为:

      从根结点到该结点的路径上

      分支的数目。

树的路径长度定义为:

      树中每个结点的路径长度之和。


  树的带权路径长度定义为:

     树中所有叶子结点的带权路径长度之和

     WPL(T) = Swklk(对所有叶子结点)


     在所有含 n 个叶子结点、并带相同权

值的 m 叉树中,必存在一棵其带权路径

长度取最小值的树,称为“最优树”。


二、如何构造最优树

(1)根据给定的 n个权值 {w1, w2,…, wn},构造 n 棵二叉树的集合

       F ={T1,   T2,  …, Tn}

    其中每棵二叉树中均只含一个带权值

w i 的根结点,其左、右子树为空树


     (2) F中选取其根结点的权值为最

     小的两棵二叉树,分别作为左、  

     右子树构造一棵新的二叉树,并

     置这棵新的二叉树根结点的权值

     为其左、右子树根结点的权值之

     和;


      (3)F中删去这两棵树,同时加入

      刚生成的新树;


     重复(2)(3)两步,直至F中只

     含一棵树为止。



忙完攻防大赛之后自己终于可以敲敲代码了。

把拖了两个礼拜的哈夫曼编码搞定了。哈哈。

代码除了书上的6.12算法 HuffanCoding ,建立哈夫曼树和构造哈夫曼编码,其它全部都是自己写的。

算法本身不难,就是多了对文件的操作。一些细节的地方自己还是调试了很长时间。

如果大家对哈夫曼树不是很清楚,下面两篇博客讲的很清楚:

http://www.thecodeway.com/blog/?p=870

http://www.cnblogs.com/syblogs/articles/2020145.html

简单描述下题目:

一个完整的哈夫曼编码、译码系统:

I  初始化,建立哈夫曼树,存于文件hfmTree中

E 编码 将建好的哈夫曼树,对ToBeTran中的正文进行编码,将结果存入CodeFile中

D 译码 利用建好的哈夫曼树将文件CodeFile中的代码进行译码,结果存入TextFile中

P 印代码文件 将文件CodeFile以紧凑格式显示在终端上。同时将此字符形式的编码文件写入文件CodePrin中

T 打印哈夫曼树,显示在终端上

Q 退出


最后,上代码:

huffman_define.h

#include <stdio.h>
#include <malloc.h>
#include <string.h>

//define  
#define  OK 1  
#define  ERROR 0  
#define OVERFLOW -1  
#define MAXWEIGHT 1000
  
//typedef  
typedef int Status;  
typedef char TElemType;  

typedef struct{
	unsigned int weight;
	unsigned int parent, lchild, rchild;
}HTNode, *HuffmanTree;
typedef char * *HuffmanCode;

//functions
Status Select(HuffmanTree HT, int i, unsigned int *s1, unsigned int *s2);
void HuffmanCoding(HuffmanTree &HT, HuffmanCode &HC, int * w, int n);



huffman_define.cpp

#include "huffman_define.h"


//选择最小的2个序列号
Status Select(HuffmanTree HT, int i, unsigned int *s1, unsigned int *s2)
{
	int count = 0;
	
	//to know if is over
	for (int j = 1; j <= i; j++)
	{
		if (HT[j].parent == 0)
			count++;
	}
	if (count == 0)
		return 2;
	
	//to init s1, s2
	*s1 = 0;
	*s2 = 0;

	for (int j = 1; j <= i; j++)
	{
		if (HT[j].parent != 0)
			continue;

		if (HT[j].weight < HT[*s1].weight)
			*s1 = j;
	}

	for (int j = 1; j <= i; j++)
	{
		if (HT[j].parent != 0)
			continue;

		if (HT[j].weight < HT[*s2].weight  && j != *s1)
			*s2 = j;
	}
	return 0;
}

void HuffmanCoding(HuffmanTree &HT, HuffmanCode &HC, int *w, int n){
//w存放n个字符的权值,构造哈夫曼树HT, 并求n个字符的哈夫曼编码HC
	int m  = 0;
	int i  = 0;
	unsigned int s1 = 0;
	unsigned int s2 = 0;
	int c  = 0;
	int start = 0;
	unsigned int f = 0;
	char *cd = NULL;
	HuffmanTree p = NULL;

	if (n <= 1)
		return;
	m = 2 * n - 1;
	HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));//0 not used?

	HT[0].weight = MAXWEIGHT;
	for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w)
	{
		p->weight = *w;
		p->lchild = 0;
		p->rchild = 0;
		p->parent = 0;
	}	
	for (; i <= m; ++i, ++p)
	{
		p->weight = 0;
		p->lchild = 0;
		p->rchild = 0;
		p->parent = 0;
	}

	for (i = n + 1; i <= m; ++i){//create huffman tree
		if (2 == Select(HT, i - 1, &s1, &s2))
			break;
		HT[s1].parent = i;
		HT[s2].parent = i;
		HT[i].lchild = s1;
		HT[i].rchild = s2;
		HT[i].weight = HT[s1].weight + HT[s2].weight;
	}

	//--get char huffman coding from the leave node
	HC = (HuffmanCode)malloc((n + 1) * sizeof(char*));
	cd = (char *)malloc(n * sizeof(char));
	cd[n-1] = '\0';
	for (i = 1; i <= n; ++i)
	{
		start = n - 1;
		for (c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent)
		{
			if (HT[f].lchild == c)
				cd[--start] = '0';
			else
				cd[--start] = '1';
		}
		HC[i] = (char *)malloc((n - start) * sizeof(char));
		strcpy(HC[i], &cd[start]);
	}
	free(cd);
}//HuffmanCoding

huffman.cpp

#include "huffman_define.h"

//functions
int Init(HuffmanTree &HT, HuffmanCode &HC, char a[], int b[]);
void Encoding(HuffmanCode HC, int n, char a[]);
void Decoding(HuffmanCode HC, int n, char a[]);
void Print();
void TreePrint(HuffmanTree HT, int n);

//filepaths
char TEST[20]  = "D:\\txt\\TEST.txt";

char hfmTree[20]  = "D:\\txt\\hufmTree.txt";
char ToBeTran[20] = "D:\\txt\\ToBeTran.txt";
char CodeFile[20] = "D:\\txt\\CodeFile.txt";
char TextFile[20] = "D:\\txt\\TextFile.txt";
char CodePrin[20] = "D:\\txt\\CodePrin.txt";
char TreePrin[20] = "D:\\txt\\TreePrin.txt";

int main()
{
	int n = 0;
	char a[100];
	int  b[100];

	HuffmanTree HT;
	HuffmanCode HC;
	char choise;
	while(1)
	{
		printf("\n\n-------------------------------------------------------------------------\n");
		printf("It's huffman coding_encoding system. Please Input your chioise\n");
		printf("I: Initialization\nE:Encoding\nD:Decoding\nP:Print\nT:Tree printing\nQ:Quit\n");
		scanf("%c", &choise);
		switch(choise)
		{
		case ('I'):
			n = Init(HT, HC, a, b);
			break;

		case ('E'):
			Encoding(HC, n, a);
			break;

		case ('D'):	
			Decoding(HC, n, a);
			break;

		case ('P'):
			Print();
			break;

		case ('T'):
			TreePrint(HT, n);
			break;

		case ('Q'):
			return 0;

		default:
			printf("Input wrong, please inpint again!~~\n");
			break;
		}
		getchar();
	}


	return 0;
}

int Init(HuffmanTree &HT, HuffmanCode &HC, char a[], int b[])
{
	memset(a, 0, 100);
	int n = 0;
	int i = 0;
	
	printf("please input n:\n");
	scanf("%d", &n);
	printf("please input %d code and weight:\n", n);
	for (i = 0; i < n; i++)
	{
		getchar();
		scanf("%c", &a[i]);
		scanf("%d", &b[i]);
	}

	HuffmanCoding(HT, HC, b, n);
	
	FILE *f1 = fopen(hfmTree, "w+");
	for (int i = 1; i < 2 * n - 1; i++)
	{
		fprintf(f1, "%d\t%d\t%d\t%d\t%d\n", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild);
	}
	fclose(f1);
	return n;
}


void Encoding(HuffmanCode HC, int n, char a[])
{
	FILE *f1 = fopen(ToBeTran, "r+");
	FILE *f2 = fopen(CodeFile, "w+");
	char temp = 0;

	while (fscanf(f1, "%c", &temp) != EOF)
	{
		for (int i = 0; i < n; i++)
		{
			if (a[i] == temp)
			{
				fprintf(f2, "%s\n", HC[i+1]);
				break;
			}
		}
	}
	fclose(f1);
	fclose(f2);
}

void Decoding(HuffmanCode HC, int n, char a[])
{
	char temp[100];
	memset(temp , 0, 100);
	FILE *f1 = fopen(CodeFile, "r+");
	FILE *f2 = fopen(TextFile, "w+");

	while (fscanf(f1, "%s", temp) != EOF)
	{
		for (int i = 1; i <= n; i++)
		{
			if (!strcmp(temp, HC[i]))
			{
				fprintf(f2, "%c", a[i-1]);
				break;
			}
		}
		memset(temp, 0, 100);
	}
	fclose(f1);
	fclose(f2);
}

void Print()
{
	int count = 0;
	char temp[100];
	memset(temp, 0, 100);
	FILE *f1 = fopen(CodeFile, "r+");
	FILE *f2 = fopen(CodePrin, "w+");

	while (fscanf(f1, "%s", temp) != EOF)
	{
		printf("%s", temp);
		fprintf(f2, "%s", temp);
		memset(temp, 0, 100);
		count += strlen(temp);
		if (count >= 50)
		{
			printf("\n");
			fprintf(f2, "\n");
			count = 0;
		}
	}
	printf("\n");
	fprintf(f2, "\n");
	fclose(f1);
	fclose(f2);
}

void TreePrint(HuffmanTree HT, int n)
{
	FILE *f1 = fopen(TreePrin, "w+");
	for (int i = 1; i <= n; i++)
	{
		printf("%d\t%d\t%d\t%d\t%d\n", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild);
		fprintf(f1, "%d\t%d\t%d\t%d\t%d\n", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild);
	}
	fclose(f1); 
}

自己做的主要工作是实现了Select 函数,就是从已经存在的节点中选取两个权重最小的节点。

另外,书上的算法有点小问题。

    HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));   //0 not used?

这里它申请了 m +  1 个节点大小的空间。根据书上的算法,他是从第1个开始的。没有用第0个节点。

所以我们要稍微改进下书上的代码:

for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w)

对于第0个节点,我们可以这样用:

#define MAXWEIGHT 1000

HT[0].weight = MAXWEIGHT;

然后在Select函数里面每次赋初值的时候将

s1 = s2 = 0;

这样就比较方便了。



你可能感兴趣的:(数据结构树--哈夫曼树)