重温经典之赫夫曼(Huffman)编码

 

先看看赫夫曼树
假设有n个权值{w1,w2,…,wn},构造一个有n个叶子结点的二叉树,每个叶子结点权值为wi,则其中带权路径长度WPL最小的二叉树称作赫夫曼树或最优二叉树。
 
赫夫曼树的构造,赫夫曼最早给出了带有一般规律的算法,俗称赫夫曼算法。如下:
(1)根据给定的n个权值{w1,w2,…,wn}构造n棵二叉树的集合F={T1,T2,…,Tn},其中Ti中只有一个权值为wi的根结点,左右子树为空。
(2)在F中选取两棵根结点的权值为最小的数作为左、右子树以构造一棵新的二叉树,且置新的二叉树的根结点的权值为左、右子树上根结点的权值之和。
(3)在F中删除这两棵树,同时将新得到的二叉树加入到F中。
(4)重复(2)和(3)直到F中只含一棵树为止,这棵树就是赫夫曼树。

 

例如下图便是赫夫曼树的构造过程。其中,根节点上标注的是所赋的权值。

 

 

设计一棵赫夫曼树,由此得到的二进制前缀编码就是赫夫曼编码。那么什么是前缀编码呢?所谓前缀编码,就是若要设计长短不等的编码,则必须是任意一个字符的编码都不是另一个字符编码的前缀。所以我们可以利用二叉树来设计二进制的前缀编码。

 

假设需要传送的字符为:A B A C C D A。如下图就是一个前缀编码的示例。

 

说了这么多理论,总该实践一下了,下面是赫夫曼编码的具体实现代码:

#include <stdio.h> #include <string.h> #include <malloc.h> #include <assert.h>  #define NUM 256  typedef struct{ 	int weight; 	int parent, lchild, rchild; }HTNode, *HuffmanTree;  /******* Choose two smallest from 0 to n in T *************/ void Select(HuffmanTree T, int len, int *s1, int *s2) { 	int i = 0; 	while (T[i++].parent != -1); 	*s1 = i-1; 	while (T[i++].parent != -1); 	*s2 = i-1; 	if (T[*s1].weight>T[*s2].weight) { 		i = *s1; 		*s1 = *s2; 		*s2 = i; 	} 	for (i=0; i<=len; i++) { 		if(T[i].parent == -1) { 			if (T[*s1].weight > T[i].weight) { 				*s2 = *s1; 				*s1 = i; 			} 			else if (T[*s2].weight >T[i].weight && i != *s1) 				*s2 = i; 		} 	} 	return; }  void show_binary(char ch) { 	char i;  	for (i = 0; i < 8; i++) {  		if (ch&0x80) 			printf("1"); 		else printf("0"); 		if (i == 3) 			printf(","); 		ch <<= 1; 	} 	printf(" "); }  void HuffmanCoding(FILE *psrc, FILE *pdst, FILE *pdeciphering) { 	int i; 	char ch; 		int m = 2*NUM-1; 		int size = m*sizeof(HTNode); 		HuffmanTree HT = (HuffmanTree)malloc(size); 		assert(HT); 		memset(HT, -1, size);  		for (i=0; i<NUM; i++) 			HT[i].weight = 0; 		while ((ch=fgetc(psrc)) != EOF) { 			(HT[ch].weight)++; 		} 		rewind(psrc); /******************printf the Huffman weight**** 	int j; 	for(j=0; j<NUM; j++) { 		printf("%c:%d\t", j, HT[j].weight); 	} **********************************************/ 		int s1, s2; 		for (i=NUM; i<m; i++) { 			Select(HT, i-1, &s1, &s2); 			HT[s1].parent = i; HT[s2].parent = i; 			HT[i].lchild = s1; HT[i].rchild = s2; 			HT[i].weight = HT[s1].weight + HT[s2].weight; 		} /*******************printf the HuffmanTree********* 		int j; 		for (j=0; j<m; j++) 			printf("%d:w%d p%d l%d r%d\t\t", j, HT[j].weight,  					HT[j].parent, HT[j].lchild, HT[j].rchild); **************************************************/		 		char **HC = (char**)malloc(NUM*sizeof(char*)); 		char* cd = (char*)malloc(NUM*sizeof(char)); 		cd[NUM-1] = '\0'; 		int start,c,f; 		for (i=0; i<NUM; i++) { 			start = NUM-1; 			for (c=i,f=HT[i].parent; f!=-1; c=f,f=HT[f].parent) { 				if (HT[f].lchild==c) cd[--start] ='0'; 				else cd[--start] ='1'; 			} 			HC[i] = (char *)malloc((NUM-start)*sizeof(char)); 			strcpy(HC[i], &cd[start]); 		} /************printf the Huffmancode****************************** 		int j; 		for (j=0; j<NUM; j++) { 			printf("%c:%s\t", j, HC[j]); 		} ****************************************************************/ 		char buff[100] = {0};	 		char k = 0, j = 0; 		while ((ch=fgetc(psrc)) != EOF) { 			i = -1;  			while (HC[ch][++i] != '\0') { 				buff[j] <<= 1; 				k++; 				if (HC[ch][i] == '1') 					buff[j] |= 0x01; 				if ((k %= 8) == 0) 					j++; 				if (j == 100) { 					j =0; 					fwrite(buff, 1, 100, pdst); 				}  			} 		} 		buff[j] <<= (8-k); 		fwrite(buff, 1, j + 1, pdst); 		/***************************************************** 		printf("\ndata write to %s\n", dstfile); 		for (i=0; i<=j; i++) 		show_binary(buff[i]); 		***************************************************/ 		rewind(pdst); 		fflush(pdst); 		c = 510; 		while (!feof(pdst)) { 			j = fread(buff, 1, 100, pdst); 			/******************************************** 			printf("\nfrom read:\n"); 			for (i=0; i<j; i++) 				show_binary(buff[i]); 			*******************************************/ 			for (i=0; i<j; i++) { 				for (k=0; k<8; k++) { 					if (buff[i]&0x80) 						c = HT[c].rchild; 					else c = HT[c].lchild; 					if (HT[c].lchild == -1) { 					fputc((char)c, pdeciphering); 					c = 510; 					} 					buff[i] <<= 1; 				}	 			} 		}  /**************free the memery and return*******************/ 		for(i=0; i<NUM; i++) { 			free(HC[i]); 		} 		free(cd); 		free(HC); 		free(HT); 		HT = NULL; 		fclose(pdst); 		fclose(psrc); 		fclose(pdeciphering); 	return;  }  int main(void) { 	char srcfile[100], dstfile[100],deciphering[100]; 	printf("Input source file:"); 	scanf("%s", srcfile); 	printf("Input dest file:"); 	scanf("%s", dstfile); 	printf("Input deciphering file:"); 	scanf("%s", deciphering); 	FILE *psrc = fopen(srcfile, "r"); 	FILE *pdst = fopen(dstfile, "w+"); 	FILE *pdeciphering = fopen(deciphering, "w"); 	if (psrc == NULL || pdst == NULL || pdeciphering == NULL) { 		printf("file opened failed\n"); 		return -1; 	}  	else  	HuffmanCoding(psrc, pdst, pdeciphering); 	return 0; } 

你可能感兴趣的:(c,算法,struct,File,null,input)