决心写一个哈夫曼树,巩固自己学的数据结构
一、最优树的定义
结点的路径长度定义为:
从根结点到该结点的路径上
分支的数目。
树的路径长度定义为:
树中每个结点的路径长度之和。
树的带权路径长度定义为:
树中所有叶子结点的带权路径长度之和
WPL(T) = Swklk(对所有叶子结点)。
在所有含 n 个叶子结点、并带相同权
值的 m 叉树中,必存在一棵其带权路径
长度取最小值的树,称为“最优树”。
二、如何构造最优树
(1)根据给定的 n个权值 {w1, w2,…, wn},构造 n 棵二叉树的集合
F ={T1, T2, …, Tn},
其中每棵二叉树中均只含一个带权值
为 w i 的根结点,其左、右子树为空树
(2)在 F中选取其根结点的权值为最
小的两棵二叉树,分别作为左、
右子树构造一棵新的二叉树,并
置这棵新的二叉树根结点的权值
为其左、右子树根结点的权值之
和;
(3)从F中删去这两棵树,同时加入
刚生成的新树;
重复(2)和(3)两步,直至F中只
含一棵树为止。
忙完攻防大赛之后自己终于可以敲敲代码了。
把拖了两个礼拜的哈夫曼编码搞定了。哈哈。
代码除了书上的6.12算法 HuffanCoding ,建立哈夫曼树和构造哈夫曼编码,其它全部都是自己写的。
算法本身不难,就是多了对文件的操作。一些细节的地方自己还是调试了很长时间。
如果大家对哈夫曼树不是很清楚,下面两篇博客讲的很清楚:
http://www.thecodeway.com/blog/?p=870
http://www.cnblogs.com/syblogs/articles/2020145.html
简单描述下题目:
一个完整的哈夫曼编码、译码系统:
I 初始化,建立哈夫曼树,存于文件hfmTree中
E 编码 将建好的哈夫曼树,对ToBeTran中的正文进行编码,将结果存入CodeFile中
D 译码 利用建好的哈夫曼树将文件CodeFile中的代码进行译码,结果存入TextFile中
P 印代码文件 将文件CodeFile以紧凑格式显示在终端上。同时将此字符形式的编码文件写入文件CodePrin中
T 打印哈夫曼树,显示在终端上
Q 退出
最后,上代码:
huffman_define.h
#include <stdio.h> #include <malloc.h> #include <string.h> //define #define OK 1 #define ERROR 0 #define OVERFLOW -1 #define MAXWEIGHT 1000 //typedef typedef int Status; typedef char TElemType; typedef struct{ unsigned int weight; unsigned int parent, lchild, rchild; }HTNode, *HuffmanTree; typedef char * *HuffmanCode; //functions Status Select(HuffmanTree HT, int i, unsigned int *s1, unsigned int *s2); void HuffmanCoding(HuffmanTree &HT, HuffmanCode &HC, int * w, int n);
#include "huffman_define.h" //选择最小的2个序列号 Status Select(HuffmanTree HT, int i, unsigned int *s1, unsigned int *s2) { int count = 0; //to know if is over for (int j = 1; j <= i; j++) { if (HT[j].parent == 0) count++; } if (count == 0) return 2; //to init s1, s2 *s1 = 0; *s2 = 0; for (int j = 1; j <= i; j++) { if (HT[j].parent != 0) continue; if (HT[j].weight < HT[*s1].weight) *s1 = j; } for (int j = 1; j <= i; j++) { if (HT[j].parent != 0) continue; if (HT[j].weight < HT[*s2].weight && j != *s1) *s2 = j; } return 0; } void HuffmanCoding(HuffmanTree &HT, HuffmanCode &HC, int *w, int n){ //w存放n个字符的权值,构造哈夫曼树HT, 并求n个字符的哈夫曼编码HC int m = 0; int i = 0; unsigned int s1 = 0; unsigned int s2 = 0; int c = 0; int start = 0; unsigned int f = 0; char *cd = NULL; HuffmanTree p = NULL; if (n <= 1) return; m = 2 * n - 1; HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode));//0 not used? HT[0].weight = MAXWEIGHT; for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w) { p->weight = *w; p->lchild = 0; p->rchild = 0; p->parent = 0; } for (; i <= m; ++i, ++p) { p->weight = 0; p->lchild = 0; p->rchild = 0; p->parent = 0; } for (i = n + 1; i <= m; ++i){//create huffman tree if (2 == Select(HT, i - 1, &s1, &s2)) break; HT[s1].parent = i; HT[s2].parent = i; HT[i].lchild = s1; HT[i].rchild = s2; HT[i].weight = HT[s1].weight + HT[s2].weight; } //--get char huffman coding from the leave node HC = (HuffmanCode)malloc((n + 1) * sizeof(char*)); cd = (char *)malloc(n * sizeof(char)); cd[n-1] = '\0'; for (i = 1; i <= n; ++i) { start = n - 1; for (c = i, f = HT[i].parent; f != 0; c = f, f = HT[f].parent) { if (HT[f].lchild == c) cd[--start] = '0'; else cd[--start] = '1'; } HC[i] = (char *)malloc((n - start) * sizeof(char)); strcpy(HC[i], &cd[start]); } free(cd); }//HuffmanCoding
huffman.cpp
#include "huffman_define.h" //functions int Init(HuffmanTree &HT, HuffmanCode &HC, char a[], int b[]); void Encoding(HuffmanCode HC, int n, char a[]); void Decoding(HuffmanCode HC, int n, char a[]); void Print(); void TreePrint(HuffmanTree HT, int n); //filepaths char TEST[20] = "D:\\txt\\TEST.txt"; char hfmTree[20] = "D:\\txt\\hufmTree.txt"; char ToBeTran[20] = "D:\\txt\\ToBeTran.txt"; char CodeFile[20] = "D:\\txt\\CodeFile.txt"; char TextFile[20] = "D:\\txt\\TextFile.txt"; char CodePrin[20] = "D:\\txt\\CodePrin.txt"; char TreePrin[20] = "D:\\txt\\TreePrin.txt"; int main() { int n = 0; char a[100]; int b[100]; HuffmanTree HT; HuffmanCode HC; char choise; while(1) { printf("\n\n-------------------------------------------------------------------------\n"); printf("It's huffman coding_encoding system. Please Input your chioise\n"); printf("I: Initialization\nE:Encoding\nD:Decoding\nP:Print\nT:Tree printing\nQ:Quit\n"); scanf("%c", &choise); switch(choise) { case ('I'): n = Init(HT, HC, a, b); break; case ('E'): Encoding(HC, n, a); break; case ('D'): Decoding(HC, n, a); break; case ('P'): Print(); break; case ('T'): TreePrint(HT, n); break; case ('Q'): return 0; default: printf("Input wrong, please inpint again!~~\n"); break; } getchar(); } return 0; } int Init(HuffmanTree &HT, HuffmanCode &HC, char a[], int b[]) { memset(a, 0, 100); int n = 0; int i = 0; printf("please input n:\n"); scanf("%d", &n); printf("please input %d code and weight:\n", n); for (i = 0; i < n; i++) { getchar(); scanf("%c", &a[i]); scanf("%d", &b[i]); } HuffmanCoding(HT, HC, b, n); FILE *f1 = fopen(hfmTree, "w+"); for (int i = 1; i < 2 * n - 1; i++) { fprintf(f1, "%d\t%d\t%d\t%d\t%d\n", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild); } fclose(f1); return n; } void Encoding(HuffmanCode HC, int n, char a[]) { FILE *f1 = fopen(ToBeTran, "r+"); FILE *f2 = fopen(CodeFile, "w+"); char temp = 0; while (fscanf(f1, "%c", &temp) != EOF) { for (int i = 0; i < n; i++) { if (a[i] == temp) { fprintf(f2, "%s\n", HC[i+1]); break; } } } fclose(f1); fclose(f2); } void Decoding(HuffmanCode HC, int n, char a[]) { char temp[100]; memset(temp , 0, 100); FILE *f1 = fopen(CodeFile, "r+"); FILE *f2 = fopen(TextFile, "w+"); while (fscanf(f1, "%s", temp) != EOF) { for (int i = 1; i <= n; i++) { if (!strcmp(temp, HC[i])) { fprintf(f2, "%c", a[i-1]); break; } } memset(temp, 0, 100); } fclose(f1); fclose(f2); } void Print() { int count = 0; char temp[100]; memset(temp, 0, 100); FILE *f1 = fopen(CodeFile, "r+"); FILE *f2 = fopen(CodePrin, "w+"); while (fscanf(f1, "%s", temp) != EOF) { printf("%s", temp); fprintf(f2, "%s", temp); memset(temp, 0, 100); count += strlen(temp); if (count >= 50) { printf("\n"); fprintf(f2, "\n"); count = 0; } } printf("\n"); fprintf(f2, "\n"); fclose(f1); fclose(f2); } void TreePrint(HuffmanTree HT, int n) { FILE *f1 = fopen(TreePrin, "w+"); for (int i = 1; i <= n; i++) { printf("%d\t%d\t%d\t%d\t%d\n", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild); fprintf(f1, "%d\t%d\t%d\t%d\t%d\n", i, HT[i].weight, HT[i].parent, HT[i].lchild, HT[i].rchild); } fclose(f1); }
另外,书上的算法有点小问题。
HT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode)); //0 not used?
这里它申请了 m + 1 个节点大小的空间。根据书上的算法,他是从第1个开始的。没有用第0个节点。
所以我们要稍微改进下书上的代码:
for (p = HT + 1, i = 1; i <= n; ++i, ++p, ++w)
对于第0个节点,我们可以这样用:
#define MAXWEIGHT 1000
HT[0].weight = MAXWEIGHT;
然后在Select函数里面每次赋初值的时候将
s1 = s2 = 0;
这样就比较方便了。