先看看赫夫曼树
假设有n个权值{w1,w2,…,wn},构造一个有n个叶子结点的二叉树,每个叶子结点权值为wi,则其中带权路径长度WPL最小的二叉树称作赫夫曼树或最优二叉树。
赫夫曼树的构造,赫夫曼最早给出了带有一般规律的算法,俗称赫夫曼算法。如下:
(1)根据给定的n个权值{w1,w2,…,wn}构造n棵二叉树的集合F={T1,T2,…,Tn},其中Ti中只有一个权值为wi的根结点,左右子树为空。
(2)在F中选取两棵根结点的权值为最小的数作为左、右子树以构造一棵新的二叉树,且置新的二叉树的根结点的权值为左、右子树上根结点的权值之和。
(3)在F中删除这两棵树,同时将新得到的二叉树加入到F中。
(4)重复(2)和(3)直到F中只含一棵树为止,这棵树就是赫夫曼树。
例如下图便是赫夫曼树的构造过程。其中,根节点上标注的是所赋的权值。
设计一棵赫夫曼树,由此得到的二进制前缀编码就是赫夫曼编码。那么什么是前缀编码呢?所谓前缀编码,就是若要设计长短不等的编码,则必须是任意一个字符的编码都不是另一个字符编码的前缀。所以我们可以利用二叉树来设计二进制的前缀编码。
假设需要传送的字符为:A B A C C D A。如下图就是一个前缀编码的示例。
说了这么多理论,总该实践一下了,下面是赫夫曼编码的具体实现代码:
#include <stdio.h> #include <string.h> #include <malloc.h> #include <assert.h> #define NUM 256 typedef struct{ int weight; int parent, lchild, rchild; }HTNode, *HuffmanTree; /******* Choose two smallest from 0 to n in T *************/ void Select(HuffmanTree T, int len, int *s1, int *s2) { int i = 0; while (T[i++].parent != -1); *s1 = i-1; while (T[i++].parent != -1); *s2 = i-1; if (T[*s1].weight>T[*s2].weight) { i = *s1; *s1 = *s2; *s2 = i; } for (i=0; i<=len; i++) { if(T[i].parent == -1) { if (T[*s1].weight > T[i].weight) { *s2 = *s1; *s1 = i; } else if (T[*s2].weight >T[i].weight && i != *s1) *s2 = i; } } return; } void show_binary(char ch) { char i; for (i = 0; i < 8; i++) { if (ch&0x80) printf("1"); else printf("0"); if (i == 3) printf(","); ch <<= 1; } printf(" "); } void HuffmanCoding(FILE *psrc, FILE *pdst, FILE *pdeciphering) { int i; char ch; int m = 2*NUM-1; int size = m*sizeof(HTNode); HuffmanTree HT = (HuffmanTree)malloc(size); assert(HT); memset(HT, -1, size); for (i=0; i<NUM; i++) HT[i].weight = 0; while ((ch=fgetc(psrc)) != EOF) { (HT[ch].weight)++; } rewind(psrc); /******************printf the Huffman weight**** int j; for(j=0; j<NUM; j++) { printf("%c:%d\t", j, HT[j].weight); } **********************************************/ int s1, s2; for (i=NUM; i<m; i++) { Select(HT, i-1, &s1, &s2); HT[s1].parent = i; HT[s2].parent = i; HT[i].lchild = s1; HT[i].rchild = s2; HT[i].weight = HT[s1].weight + HT[s2].weight; } /*******************printf the HuffmanTree********* int j; for (j=0; j<m; j++) printf("%d:w%d p%d l%d r%d\t\t", j, HT[j].weight, HT[j].parent, HT[j].lchild, HT[j].rchild); **************************************************/ char **HC = (char**)malloc(NUM*sizeof(char*)); char* cd = (char*)malloc(NUM*sizeof(char)); cd[NUM-1] = '\0'; int start,c,f; for (i=0; i<NUM; i++) { start = NUM-1; for (c=i,f=HT[i].parent; f!=-1; c=f,f=HT[f].parent) { if (HT[f].lchild==c) cd[--start] ='0'; else cd[--start] ='1'; } HC[i] = (char *)malloc((NUM-start)*sizeof(char)); strcpy(HC[i], &cd[start]); } /************printf the Huffmancode****************************** int j; for (j=0; j<NUM; j++) { printf("%c:%s\t", j, HC[j]); } ****************************************************************/ char buff[100] = {0}; char k = 0, j = 0; while ((ch=fgetc(psrc)) != EOF) { i = -1; while (HC[ch][++i] != '\0') { buff[j] <<= 1; k++; if (HC[ch][i] == '1') buff[j] |= 0x01; if ((k %= 8) == 0) j++; if (j == 100) { j =0; fwrite(buff, 1, 100, pdst); } } } buff[j] <<= (8-k); fwrite(buff, 1, j + 1, pdst); /***************************************************** printf("\ndata write to %s\n", dstfile); for (i=0; i<=j; i++) show_binary(buff[i]); ***************************************************/ rewind(pdst); fflush(pdst); c = 510; while (!feof(pdst)) { j = fread(buff, 1, 100, pdst); /******************************************** printf("\nfrom read:\n"); for (i=0; i<j; i++) show_binary(buff[i]); *******************************************/ for (i=0; i<j; i++) { for (k=0; k<8; k++) { if (buff[i]&0x80) c = HT[c].rchild; else c = HT[c].lchild; if (HT[c].lchild == -1) { fputc((char)c, pdeciphering); c = 510; } buff[i] <<= 1; } } } /**************free the memery and return*******************/ for(i=0; i<NUM; i++) { free(HC[i]); } free(cd); free(HC); free(HT); HT = NULL; fclose(pdst); fclose(psrc); fclose(pdeciphering); return; } int main(void) { char srcfile[100], dstfile[100],deciphering[100]; printf("Input source file:"); scanf("%s", srcfile); printf("Input dest file:"); scanf("%s", dstfile); printf("Input deciphering file:"); scanf("%s", deciphering); FILE *psrc = fopen(srcfile, "r"); FILE *pdst = fopen(dstfile, "w+"); FILE *pdeciphering = fopen(deciphering, "w"); if (psrc == NULL || pdst == NULL || pdeciphering == NULL) { printf("file opened failed\n"); return -1; } else HuffmanCoding(psrc, pdst, pdeciphering); return 0; }