/* Name: 赫夫曼编码 Copyright: 始发于goal00001111的专栏;允许自由转载,但必须注明作者和出处 Author: goal00001111 Date: 16-12-08 21:16 Description: 赫夫曼编码 本程序实现了使用赫夫曼编码压缩数据;输入一串字符串sourceCode——为方便理解,暂时要求字符串只包含大写字母和空格,如果你愿意, 很容易就可以推广到所有的字符——计算出字符串中各个字母的权重,然后对其进行赫夫曼编码,输出赫夫曼树。 将赫夫曼树的叶子结点存储到有序二叉树中,输出原字符串经压缩后得到的用'0'和'1'表示的新字符串destCode; 然后利用赫夫曼树将字符串destCode进行译码,得到目标字符串objCode,比较objCode和sourceCode,发现完全一样! 编码译码成功! 最后销毁有序二叉树和赫夫曼树。 本程序的一个亮点是使用了二叉堆来存储需要合并的赫夫曼树结点,这样在求最小值时时间复杂度可以降低到log(n)。 另外关于赫夫曼编码的详细内容请参考维基百科: http://zh.wikipedia.org/wiki/%E5%93%88%E5%A4%AB%E6%9B%BC%E7%BC%96%E7%A0%81 和数据结构自考网:http://student.zjzk.cn/course_ware/data_structure/web/shu/shu6.6.2.1.htm 关于二叉堆的详细内容请参考百度百科:http://baike.baidu.com/view/668854.html */ #include<iostream> using namespace std; typedef char ElemType; typedef struct sNode { double weight; ElemType data; } *Source; typedef struct hNode { double weight; ElemType data; int lc, rc; } *HuffmanTree; typedef struct cNode { ElemType data; string str; struct cNode *lc, *rc; } *Btree; HuffmanTree CreateHuffmanTree(const Source w, int n);//创建一棵赫夫曼树 void BuildHeap(HuffmanTree t, int n); //构造一个二叉堆;小顶堆 void PercDown(HuffmanTree t, int pos, int n);//构造二叉堆的功能子函数 void DeleteMin(HuffmanTree t, int len); //删除二叉堆的根,并通过上移使得新得到的序列仍为二叉堆 void InsertHfNode(HuffmanTree t, int len, struct hNode x); //把x插入到原长度为len的二叉堆 void Preorder(HuffmanTree t, int p); //先序遍历赫夫曼树 void Postorder(Btree & t, HuffmanTree a, int n); //后序遍历赫夫曼树,并记录叶子结点编码 bool InsertBtNode(Btree & t, Btree s); //向一个二叉排序树t中插入一个结点s void Inorder(Btree t); //中序遍历二叉排序树 Btree Search(Btree p, ElemType data); //查找值为data的结点的递归算法 string Coding(string s, Btree t); //利用记录了叶子结点编码的排序二叉树,对sourceCode进行编码,返回编码后的字符串 string Decode(string s, HuffmanTree hT); //利用赫夫曼树对destCode进行解码 void DestroyBTree(Btree & t); //销毁一棵二叉排序树 void DestroyHfmanTree(HuffmanTree & t, int n); //销毁一棵赫夫曼树 int main() { string sourceCode; getline(cin, sourceCode, '\n'); int n = sourceCode.size(); const int MAX = 27; //原码由26个大写字母加空格组成 Source w = new struct sNode[MAX]; //读取各个字母并初始化权重 w[MAX-1].data = ' '; w[MAX-1].weight = 0; for (int i=MAX-2; i>=0; i--) { w[i].data = 'A' + i; w[i].weight = 0; } //读取各个字母的权重 for (int i=0; i<n; i++) { if (sourceCode[i] == ' ') w[26].weight++; else w[sourceCode[i]-'A'].weight++; } //获取出现了的大写字母和空格 n = 0; for (int i=0; i<MAX; i++) { if (w[i].weight > 0) w[n++] = w[i]; } // //直接输入原码和权重 // for (int i=0; i<n; i++) // { // cin >> w[i].weight >> w[i].data; // } for (int i=0; i<n; i++) { cout << w[i].weight << " " << w[i].data << endl; } HuffmanTree hT = CreateHuffmanTree(w, n);//构造赫夫曼树 // for (int i=1; i<2*n; i++) // cout << hT[i].weight << " "; // cout << endl; //先序遍历赫夫曼树,并输出结点权重和叶子结点的data Preorder(hT, 1); cout << endl; //后序遍历赫夫曼树,并记录叶子结点编码 Btree bT = NULL; Postorder(bT, hT, n); //中序遍历记录了叶子结点编码的排序二叉树 Inorder(bT); //利用记录了叶子结点编码的排序二叉树,对sourceCode进行编码 string destCode = Coding(sourceCode, bT); cout << destCode << endl; //利用赫夫曼树对destCode进行解码 string objCode = Decode(destCode, hT); cout << objCode << endl; DestroyBTree(bT); //销毁二叉排序树 //Inorder(bT); //再输出试试看 DestroyHfmanTree(hT, n); //销毁赫夫曼树 //Preorder(hT, 1); //再输出试试看 system("pause"); return 0; } //创建一棵赫夫曼树 HuffmanTree CreateHuffmanTree(const Source w, int n) { HuffmanTree hT = new struct hNode[2*n]; //第一个结点不用 for (int i=0; i<n; i++) { hT[i+1].data = w[i].data; hT[i+1].weight = w[i].weight; hT[i+1].lc = hT[i+1].rc = 0; } BuildHeap(hT, n);//构造一个二叉堆;小顶堆 struct hNode add; int left = n; int right = n; while (left > 1) { hT[++right] = hT[1]; add.weight = hT[1].weight; add.lc = right; //存储左孩子下标 DeleteMin(hT, left--); hT[left+1] = hT[1]; add.weight += hT[1].weight; add.rc = left+1; //存储右孩子下标 DeleteMin(hT, left--); InsertHfNode(hT, ++left, add); //for (int i=1; i<=right; i++) // cout << hT[i].weight << " "; // cout << endl; // system("pause"); } return hT; } //构造一个二叉堆;小顶堆 void BuildHeap(HuffmanTree t, int len) { for (int i=len/2+len%2; i>0; i--) { PercDown(t, i, len); } } //构造二叉堆的功能子函数 void PercDown(HuffmanTree t, int pos, int len) { int child; struct hNode min = t[pos]; while (pos * 2 <= len) { child = pos * 2; if (child != len && t[child+1].weight < t[child].weight) child++; if (min.weight > t[child].weight) t[pos] = t[child]; else break; pos = child; } t[pos] = min; } //删除二叉堆的根,并通过上移使得新得到的序列仍为二叉堆 void DeleteMin(HuffmanTree t, int len) { struct hNode last = t[len--];//二叉堆的最后一个元素 int child, pos = 1; while (pos * 2 <= len) //把二叉堆的某些元素往前移,使得新得到的序列仍为二叉堆 { child = pos * 2; if (child != len && t[child+1].weight < t[child].weight) //若i有右儿子,且右儿子小于左儿子,c指向右儿子 child++; if (last.weight > t[child].weight) //若i的小儿子小于二叉堆的最后一个元素,把其移到i的位置 t[pos] = t[child]; else break; pos = child; } t[pos] = last; //把二叉堆的最后一个元素放到适当的空位,此时得到的序列仍为二叉堆 } //把x插入到原长度为len的二叉堆 void InsertHfNode(HuffmanTree t, int len, struct hNode x) { int i; for (i=len; i/2>0 && t[i/2].weight>x.weight; i/=2) t[i] = t[i/2]; t[i] = x; } //后序遍历赫夫曼树,并记录叶子结点编码 void Postorder(Btree & t, HuffmanTree a, int n) { int *stack = new int[n]; int *tag = new int[n]; char *buf = new char[n]; bool flag = true; int top = -1; int p = 1; while (a[p].lc > 0 || top >= 0) { while (a[p].lc > 0) //先一直寻找左孩子 { flag = true; //此时p指向的是新叶子(未输出过的叶子) stack[++top] = p; //结点入栈 p = a[p].lc; tag[top] = 0; //表示右孩子没有被访问 buf[top] = '0'; //左孩子标记'0' } if (flag) //如果p指向的是新叶子 { //cout << a[p].data << " : "; //输出叶子结点 // for (int i=0; i<=top; i++) // cout << buf[i]; // cout << endl; Btree s = new struct cNode; s->data = a[p].data; for (int i=0; i<=top; i++) s->str += buf[i]; s->lc = s->rc = NULL; if (!(InsertBtNode(t, s))) //插入一个结点s delete s; } if (top >= 0) //所有左孩子处理完毕后 { if (tag[top] == 0) //如果右孩子没有被访问 { flag = true; //此时p指向的是新叶子(未输出过的叶子) p = stack[top]; //读取栈顶元素,但不退栈 ,因为要先输出其右孩子结点 p = a[p].rc; tag[top] = 1; //表示右孩子被访问,下次直接退栈 buf[top] = '1'; //右孩子标记'1' } else //栈顶元素出栈 { flag = false; //此时p指向的是旧叶子(已输出过的叶子),不再输出 top--; } } } } //先序遍历赫夫曼树 void Preorder(HuffmanTree t, int p) { if (t == NULL) return; if (t[p].lc > 0) { cout << t[p].weight << endl; Preorder(t, t[p].lc); //遍历左子树 Preorder(t, t[p].rc); //遍历右子树 } else cout << t[p].weight << " " << t[p].data << endl; } //向一个二叉排序树t中插入一个结点s bool InsertBtNode(Btree & t, Btree s) { if (t == NULL) { t = s; return true; } else if (t->data > s->data) //把s所指结点插入到左子树中 return InsertBtNode(t->lc, s); else if (t->data < s->data) //把s所指结点插入到右子树中 return InsertBtNode(t->rc, s); else //若s->data等于b的根结点的数据域之值,则什么也不做 return false; } //中序遍历二叉排序树 void Inorder(Btree t) { if (t) { Inorder(t->lc); //遍历左子树 cout << t->data << " : " << t->str << endl; //输出该结点 Inorder(t->rc); //遍历右子树 } } //查找值为data的结点的递归算法 Btree Search(Btree p, ElemType data) { if (p == NULL || p->data == data) //空树或找到结点 return p; if (p->data > data) return Search(p->lc, data); //在左孩子中寻找 else return Search(p->rc, data); //在右孩子中寻找 } //利用记录了叶子结点编码的排序二叉树,对sourceCode进行编码,返回编码后的字符串 string Coding(string s, Btree t) { Btree p = NULL; string dest; for (int i=0; i<s.size(); i++) { p = Search(t, s[i]); if (p != NULL) { dest += p->str; //dest += ' '; } } return dest; } //利用赫夫曼树对destCode进行解码 string Decode(string s, HuffmanTree hT) { string dest; int p = 1; int i = 0; while (i < s.size()) { while (hT[p].lc > 0)//非叶子结点 { if (s[i++] == '0') p = hT[p].lc; //向左结点前进 else p = hT[p].rc; //向右结点前进 } dest += hT[p].data; //存储叶子结点 p = 1; } return dest; } //销毁一棵二叉排序树 void DestroyBTree(Btree & t) { if (t != NULL) { DestroyBTree(t->lc); DestroyBTree(t->rc); delete t; t = NULL; } } //销毁一棵赫夫曼树 void DestroyHfmanTree(HuffmanTree & t, int n) { for (int i=n-1; i>=0; i--) { delete &t[i]; } t = NULL; }