转自:Tanky Woo 转载地址:http://www.wutianqi.com/?p=2848
Huffman树,中文霍夫曼树或哈夫曼树,又称最优二叉树,是一种带权路径长最短的树。
1.从原始元素集合T中拿出两个频度最小的元素组成一个二叉树,二叉树的根为这两个节点频度的和。
2.然后从集合T中删除这两个元素,把根元素加入到T集合中,如此反复直集合T为空。
1.优先级队列
2.顺序存储结构
3.二叉链表(本文所用)
typedef struct{ unsigned int weight; unsigned int parent, lchild, rchild; }HTNode, *HuffmanTree; //动态分配数组存储哈夫曼树 typedef char* HuffmanCode; //动态分配数组存储哈夫曼编码表
//选出weight最小的两个结点,s1保存最小的,s2保存第二小的 void SelectMin(HuffmanTree HT, int nNode) { int i, j; for(i = 1; i <= nNode; i++) if(!HT[i].parent) { s1 = i; break; } for(j = i+1; j <= nNode; j++) if(!HT[j].parent) { s2 = j; break; } for(i = 1; i <= nNode; i++) if((HT[i].weight < HT[s1].weight) && (!HT[i].parent) && (s2 != i)) s1 = i; for(j = 1; j <= nNode; j++) if((HT[j].weight < HT[s2].weight) && (!HT[j].parent) && (s1 != j)) s2 = j; // 以上只筛选出最小的两个,这里保证s1的weight比s2的小 if(HT[s1].weight > HT[s2].weight) { int tmp = s1; s1 = s2; s2 = tmp; } }
可以分为三部分,分别是Huffman树的初始化,创建,以及编码
代码里已经详细注释了:
// w[]存放nNode个字符的权值(均大于0),构造哈夫曼树HT, // 并求出nNode个字符的哈夫曼编码HC void HuffmanCoding(HuffmanTree &HT, HuffmanCode HC[], int *w, int nNode) { int i, j; char *hfcode; int p; int cdlen; if(nNode < 1) return; m = 2*nNode-1; //哈夫曼树的结点数 /////////////////////////////以下是求Huffman树的初始化///////////////////////////// HT = (HTNode*) malloc ((m+1) *sizeof(HTNode)); //0号单元未用 for(i = 1; i <= nNode; i++) //初始化 { HT[i].weight = w[i-1]; HT[i].parent = 0; HT[i].lchild = 0; HT[i].rchild = 0; } for(i = nNode+1; i <= m; i++) { HT[i].weight = 0; HT[i].parent = 0; HT[i].lchild = 0; HT[i].rchild = 0; } puts("\n哈夫曼树的构造过程如下所示: "); printf("HT初态:\n 结点 weight parent lchild rchild"); for(i = 1; i <= nNode; i++) printf("\n%4d%8d%8d%8d%8d", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild); printf("按任意键,继续..."); getchar(); /////////////////////////////以下是求Huffman树的构建///////////////////////////// for(i = nNode+1; i <= m; i++) { // 建立哈夫曼树 // 在HT[1..i-1]中选择parent为0且weight最小的两个节点 // 其序号分别是s1和s2 SelectMin(HT, i-1); HT[s1].parent = i; HT[s2].parent = i; cout << "S1 && S2: " << HT[s1].weight << " " << HT[s2].weight << endl; HT[i].lchild = s1; HT[i].rchild = s2; HT[i].weight = HT[s1].weight + HT[s2].weight; printf("\nselect: s1 = %d s2 = %d\n", s1, s2); printf("结点 weight parent lchild rchild"); for(j = 1; j <= i; j++) printf("\n%4d%8d%8d%8d%8d", j, HT[j].weight, HT[j].parent, HT[j].lchild, HT[j].rchild); printf("按任意键,继续..."); getchar(); } /////////////////////////////以下是求Huffman树的编码///////////////////////////// // 可以看看算法导论上对于DFS求路径的方法,分三色:白,灰,黑,这里weight=0,1,2和那里思想是类似的 // 可以拿7,5,2,4这组数据来模拟下面的过程,以求更简单理解 // 从根出发 //递归遍历哈夫曼树,求哈夫曼编码 hfcode = (char *) malloc (nNode * sizeof(char)); //分配求编码的工作空间 p = m; cdlen = 0; for(i = 1; i <= m; i++) HT[i].weight = 0; //遍历哈夫曼树时用作结点状态的标志 while(p) //退出条件:p = 结点m的parent,即为0 { if(HT[p].weight == 0) //向左 { HT[p].weight = 1; if(HT[p].lchild != 0) { p = HT[p].lchild; hfcode[cdlen++] = '0'; } else if(HT[p].rchild == 0) { HC[p] = (char *) malloc ((cdlen+1) * sizeof(char)); hfcode[cdlen] = '\0'; //保证后面的不会被复制 strcpy(HC[p], hfcode); //复制编码 } } else if(HT[p].weight == 1) //向右 { HT[p].weight = 2; if(HT[p].rchild != 0) { p = HT[p].rchild; hfcode[cdlen++] = '1'; } } else { // HT[p].weight == 2 退回到父结点,编码长度减一 HT[p].weight = 0; p = HT[p].parent; --cdlen; } } }
/* * Tanky Woo * Blog: www.WuTianQi.com * Description: Huffman Tree * Date: 2011.12.3 */ #include <iostream> using namespace std; int m, s1, s2; // m是总结点个数,s1,s2用于筛选出最小和第二小的两个数 typedef struct{ unsigned int weight; unsigned int parent, lchild, rchild; }HTNode, *HuffmanTree; //动态分配数组存储哈夫曼树 typedef char* HuffmanCode; //动态分配数组存储哈夫曼编码表 void HuffmanCoding(HuffmanTree &HT, HuffmanCode HC[], int *w, int nNode); int main() { HuffmanTree HT; // 哈夫曼树 HuffmanCode *HC; // 保存哈夫曼编码 int *w, nNode, i; // w记录权值 puts("输入结点数: "); scanf("%d", &nNode); HC = (HuffmanCode *) malloc (nNode* sizeof(HuffmanCode)); w = (int *) malloc (nNode * sizeof(int)); printf("输入 %d 个结点的权值\n", nNode); for(i = 0; i < nNode; i++) scanf("%d", &w[i]); HuffmanCoding(HT, HC, w, nNode); puts("\n各结点的哈夫曼编码:"); for(i = 1; i <= nNode; i++) printf("%2d(%4d):%s\n", i, w[i-1], HC[i]); getchar(); } //选出weight最小的两个结点,s1保存最小的,s2保存第二小的 void SelectMin(HuffmanTree HT, int nNode) { int i, j; for(i = 1; i <= nNode; i++) if(!HT[i].parent) { s1 = i; break; } for(j = i+1; j <= nNode; j++) if(!HT[j].parent) { s2 = j; break; } for(i = 1; i <= nNode; i++) if((HT[i].weight < HT[s1].weight) && (!HT[i].parent) && (s2 != i)) s1 = i; for(j = 1; j <= nNode; j++) if((HT[j].weight < HT[s2].weight) && (!HT[j].parent) && (s1 != j)) s2 = j; // 以上只筛选出最小的两个,这里保证s1的weight比s2的小 if(HT[s1].weight > HT[s2].weight) { int tmp = s1; s1 = s2; s2 = tmp; } } // w[]存放nNode个字符的权值(均大于0),构造哈夫曼树HT, // 并求出nNode个字符的哈夫曼编码HC void HuffmanCoding(HuffmanTree &HT, HuffmanCode HC[], int *w, int nNode) { int i, j; char *hfcode; int p; int cdlen; if(nNode < 1) return; m = 2*nNode-1; //哈夫曼树的结点数 /////////////////////////////以下是求Huffman树的初始化///////////////////////////// HT = (HTNode*) malloc ((m+1) *sizeof(HTNode)); //0号单元未用 for(i = 1; i <= nNode; i++) //初始化 { HT[i].weight = w[i-1]; HT[i].parent = 0; HT[i].lchild = 0; HT[i].rchild = 0; } for(i = nNode+1; i <= m; i++) { HT[i].weight = 0; HT[i].parent = 0; HT[i].lchild = 0; HT[i].rchild = 0; } puts("\n哈夫曼树的构造过程如下所示: "); printf("HT初态:\n 结点 weight parent lchild rchild"); for(i = 1; i <= nNode; i++) printf("\n%4d%8d%8d%8d%8d", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild); printf("按任意键,继续..."); getchar(); /////////////////////////////以下是求Huffman树的构建///////////////////////////// for(i = nNode+1; i <= m; i++) { // 建立哈夫曼树 // 在HT[1..i-1]中选择parent为0且weight最小的两个节点 // 其序号分别是s1和s2 SelectMin(HT, i-1); HT[s1].parent = i; HT[s2].parent = i; cout << "S1 && S2: " << HT[s1].weight << " " << HT[s2].weight << endl; HT[i].lchild = s1; HT[i].rchild = s2; HT[i].weight = HT[s1].weight + HT[s2].weight; printf("\nselect: s1 = %d s2 = %d\n", s1, s2); printf("结点 weight parent lchild rchild"); for(j = 1; j <= i; j++) printf("\n%4d%8d%8d%8d%8d", j, HT[j].weight, HT[j].parent, HT[j].lchild, HT[j].rchild); printf("按任意键,继续..."); getchar(); } /////////////////////////////以下是求Huffman树的编码///////////////////////////// // 可以看看算法导论上对于DFS求路径的方法,分三色:白,灰,黑,这里weight=0,1,2和那里思想是类似的 // 可以拿7,5,2,4这组数据来模拟下面的过程,以求更简单理解 // 从根出发 //递归遍历哈夫曼树,求哈夫曼编码 hfcode = (char *) malloc (nNode * sizeof(char)); //分配求编码的工作空间 p = m; cdlen = 0; for(i = 1; i <= m; i++) HT[i].weight = 0; //遍历哈夫曼树时用作结点状态的标志 while(p) //退出条件:p = 结点m的parent,即为0 { if(HT[p].weight == 0) //向左 { HT[p].weight = 1; if(HT[p].lchild != 0) { p = HT[p].lchild; hfcode[cdlen++] = '0'; } else if(HT[p].rchild == 0) { HC[p] = (char *) malloc ((cdlen+1) * sizeof(char)); hfcode[cdlen] = '\0'; //保证后面的不会被复制 strcpy(HC[p], hfcode); //复制编码 } } else if(HT[p].weight == 1) //向右 { HT[p].weight = 2; if(HT[p].rchild != 0) { p = HT[p].rchild; hfcode[cdlen++] = '1'; } } else { // HT[p].weight == 2 退回到父结点,编码长度减一 HT[p].weight = 0; p = HT[p].parent; --cdlen; } } }
欢迎大家一起探讨。
严蔚敏《数据结构》
最后给出一个题目:
http://acm.hdu.edu.cn/showproblem.php?pid=1053
HuffmanTree的题目,不过没用到求具体编码,所以相对要简单多了。
结题报告:
http://www.wutianqi.com/?p=2514(二叉链表做法)
http://www.cnblogs.com/Open_Source/archive/2010/07/10/1904936.html(STL优先级队列做法)