7-9 Huffman Codes(30 分)
In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i]
is the frequency of c[i]
and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by Mstudent submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 '0's and '1's.
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Yes
Yes
No
No
思路主要有几点:
1、每个字母的编码长度都<=n-1, n是字母个数,也就是题目中的N。
2、Huffman树不唯一,但是带权路径长度(WPL)是唯一的,根据第二行给出的频率计算WPL与之后每组输入的WPL对比。 (另一个方法:WPL=所有非叶节点的权值之和)
3、建树判断是否有前缀码。
4、判断是否有不同字母的编码相同,这种情况下去判断前缀码是无效的,因为它们在树中的位置是重合的,
这一点是我一开始没有考虑到的一点。
需要注意的有:
1、输入的字符顺序并不是乱的,是按给出频率的顺序输入的。
2、scanf读入数据时,格式很重要,不要漏了\n,\n的位置也很重要。
#include
#include
#include
#define maxsize 65
typedef struct treenode* Tree;
typedef Tree intArray[maxsize];
typedef struct HNode* MinHeap;
typedef char str[maxsize];
struct HNode {
intArray Data;
int size;
};
struct treenode {
int freq;
Tree left;
Tree right;
};
struct string {
char str[maxsize];
int length;
};
MinHeap CreateHeap(int n,int F[]) {
MinHeap H=(MinHeap)malloc( sizeof(struct HNode) );
int i;
char b;
H->size=0;
for(i=0;i<=n;i++) {
Tree T=(Tree)malloc( sizeof(struct treenode) );
T->freq=0;
T->left=T->right=NULL;
H->Data[i]=T;
}
scanf("%c %d",&b,&F[1]);
H->Data[++H->size]->freq=F[1];
for(i=2;i<=n;i++) {
scanf(" %c %d",&b,&F[i]);
H->Data[++H->size]->freq=F[i];
}
return H;
}
void PercDown(MinHeap H,int parent) {
int child;
Tree x;
x=H->Data[parent];
for(;parent*2<=H->size;parent=child) {
child=parent*2;
if(child+1<=H->size && H->Data[child]->freq>H->Data[child+1]->freq)
child++;
if(x->freq<=H->Data[child]->freq)
break;
else
H->Data[parent]=H->Data[child];
}
H->Data[parent]=x;
}
void BuildMinHeap(MinHeap H) {
int i;
for(i=H->size/2;i>0;i--) {
PercDown(H,i);
}
}
Tree Delete(MinHeap H) {
Tree min=NULL;
if(H->size) {
min=H->Data[1];
H->Data[1]=H->Data[H->size--];
}
if(H->size)
PercDown(H,1);
return min;
}
void Insert(MinHeap H,Tree T) {
int parent,child;
for(child=++H->size,parent=child/2;parent>0;child=parent,parent/=2) {
if(H->Data[parent]->freq>T->freq )
H->Data[child]=H->Data[parent];
else
break;
}
H->Data[child]=T;
}
Tree Huffman(MinHeap H) {
int i,n=H->size;
Tree T=NULL;
for(i=1;ileft=Delete(H);
T->right=Delete(H);
T->freq=T->left->freq + T->right->freq;
Insert(H,T);
}
return T;
}
int WPL(Tree T,int depth) {
if(T->left!=NULL)
return WPL(T->left,depth+1)+WPL(T->right,depth+1);
else
return T->freq*depth;
}
Tree BuildTree(int num,str* Q[],int F[]) {
int j,m;
Tree root=NULL,parent=NULL;
Tree prefixTree=(Tree)malloc( sizeof(struct treenode) );
prefixTree->freq=0;
prefixTree->left=prefixTree->right=NULL;
root=prefixTree;
for(j=1;j<=num;j++) {
parent=root;
for(m=0;(*Q[j])[m]!='\0';m++) {
if( (*Q[j])[m] == '0') {
if(!parent->left) {
prefixTree=(Tree)malloc( sizeof(struct treenode) );
prefixTree->freq=0;
prefixTree->left=prefixTree->right=NULL;
parent->left=prefixTree;
}
parent=parent->left;
}
else if( (*Q[j])[m] == '1') {
if(!parent->right) {
prefixTree=(Tree)malloc( sizeof(struct treenode) );
prefixTree->freq=0;
prefixTree->left=prefixTree->right=NULL;
parent->right=prefixTree;
}
parent=parent->right;
}
}
parent->freq=F[j];
}
return root;
}
void Destroy(Tree T) {
if(T) {
Destroy(T->left);
Destroy(T->right);
free(T);
}
}
int TreeTravel(Tree root,int flag) {
if(root && !flag) {
flag=TreeTravel(root->left,flag);
flag=TreeTravel(root->right,flag);
if( root->freq!=0 && (root->left!=NULL || root->right!=NULL) )
flag=1;
}
return flag;
}
int Stringcompare(str* Q[],int n) {
int i,j,flag;
for(i=1;inum-1)
flag=1;
else
sum+=strlen(*Q[j])*F[j];
}
if(flag) printf("No\n");
else if(!Stringcompare(Q,num)) printf("No\n");
else if(sum!=wpl) {
printf("No\n");
}
else {
root=BuildTree(num,Q,F);
flag=TreeTravel(root,0);
if(flag) printf("No\n");
else printf("Yes\n");
Destroy(root);
}
}
//system("pause");
return 0;
}