题目链接:https://pintia.cn/problem-sets/900290821590183936/problems/914044227287445504
本小白刚刚起步,最近在刷浙江大学PAT上的题目,做到了哈弗曼树的这一题,绞尽脑汁,网上百度的C代码(可能有的C++可以实现,但是我还不会C++)拉到PTA上去跑也没有能实现的,于是下决心一定要自己做出来,把每个细节都考虑到,一步步实现终于做出来了,每个测试点都正确,成就感满满的,一激动就开了个CSDN账号来分享一下C语言实现的Huffman Codes,谨供像我这样的小白参考一下。
原题:
05-树9 Huffman Codes(30 分)
In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
Output Specification:
For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes
Yes
No
No
我的原代码:
//在建树过程中特别要注意:使得H->Elements[]与HuffmanTree数据交换的过程中保证
//是以HuffmanTree的格式交换的,(例如:H->Element[].weight=T->weight就不行,
//必须为H->Element[]=*T)这样才能将HuffmanTree的左右指针保留,使整棵树的连续性得以保留 ,不然那会造成段错误
#include
#include
#include
typedef struct TreeNode *HuffmanTree;
struct TreeNode{
int weight;
HuffmanTree left,right;
};
typedef struct HeapStruct *MinHeap;
struct HeapStruct{
HuffmanTree Elements; //it's so genius to make the Elements' type to be HuffmanTree
int Size;
int Capacity;
};
MinHeap MinHeap_Create(int Maxsize) //construct a MinHeap
{
MinHeap H=(MinHeap)malloc(sizeof(struct HeapStruct));
H->Elements=(HuffmanTree)malloc((Maxsize+1)*sizeof(struct TreeNode)); //one more space for Elements[0]
H->Size=0;
H->Capacity=Maxsize;
H->Elements[0].weight=-1; //哨兵
return H;
}
void Insert(MinHeap H,HuffmanTree T)
{
int i;
if(H->Size==H->Capacity)
{
printf("最大堆已满");
return ;
}
i=++H->Size;
for(;H->Elements[i/2].weight>T->weight;i/=2) //
{ //
H->Elements[i]=H->Elements[i/2]; //
H->Elements[i/2]=*T; //
} //
H->Elements[i]=*T; //
}
MinHeap BuildMinHeap(int Weight[],int Maxsize,char CH[])
{
int i;
MinHeap H=MinHeap_Create(Maxsize);
HuffmanTree Temp=(HuffmanTree)malloc(sizeof(struct TreeNode));
for(i=0;iweight=Weight[CH[i]];
Temp->left=NULL;
Temp->right=NULL;
Insert(H,Temp);
}
free(Temp);
return H;
}
HuffmanTree DeleteMin(MinHeap H)
{
int Parent,Child;
HuffmanTree MinItem,temp;
if(H->Size==0)
{
printf("最小堆已空");
return ;
}
MinItem=(HuffmanTree)malloc(sizeof(struct TreeNode));
temp=(HuffmanTree)malloc(sizeof(struct TreeNode));
*MinItem=H->Elements[1];
*temp=H->Elements[H->Size--]; //先使temp指向最后一个元素,然后再将size-1
for(Parent=1;Parent*2<=H->Size;Parent=Child)
{
Child=Parent*2;
if(Child!=H->Size&&(H->Elements[Child].weight>H->Elements[Child+1].weight))
Child++;
if(temp->weight<=H->Elements[Child].weight)break; //此时可将小于child的temp赋给其parent
else
H->Elements[Parent]=H->Elements[Child];
}
H->Elements[Parent]=*temp; //temp is a pointer,add * to be a HuffmanTree
free(temp);
return MinItem;
}
HuffmanTree Huffman(MinHeap H) //construct a HuffmanTree
{
int i;
HuffmanTree T;
int k;
k=H->Size;
for(i=1;i//size个元素两两合并要size-1次
{
T=(HuffmanTree)malloc(sizeof(struct TreeNode));
T->left=DeleteMin(H);
// printf("chkl%d ",T->left->weight);
T->right=DeleteMin(H);
// printf("chkr%d ",T->right->weight);
T->weight=T->left->weight+T->right->weight;
// printf("chkw%d\n",T->weight);
Insert(H,T);
}
T=DeleteMin(H);
return T;
}
int Calwpl(HuffmanTree T,int Depth) //calculate the WPL value of a HuffmanTree
{
if(T->left==NULL&&T->right==NULL) //dont know why is the//if(!T->left&&!T->right)//is a Segmentation Fault(段错误)
{
// printf("!%d\n",Depth*T->weight);
return Depth*T->weight;
}
else
{
return
(Calwpl(T->left,Depth+1)+
Calwpl(T->right,Depth+1));
}
}
int isPreFix(char a[],char b[]) //借口应该是字符数组,而不是指向字符的指针。原来的错误写法:(char *a,char *b)
{
while(a&&b&&*a==*b) //*号不能掉,这时候是值的比较,指针是不可能相等的
{
a++;b++;
}
if(*a=='\0'||*b=='\0') //表示指针所指的值时要带星号
return 1;
else return 0;
}
int HasPreFix(char s[][200],int n)
{
int i,j;
for(i=0;ifor(j=i+1;j//*****最终错误******j!=i,故j=i+1
{
if(isPreFix(s[i],s[j]))
return 1;
}
}
return 0;
}
/*
void test(HuffmanTree T)
{
if(T->left!=NULL&&T->right!=NULL)
{
printf("*%d %d*",T->left->weight,T->right->weight);
test(T->left);
test(T->right);
}
else
printf("GG");
return;
}
*/
int main()
{
int n,i,freq[256];
char CH[100];
int num[100];
scanf("%d",&n);
for(i=0;iscanf(" %c %d",&CH[i],&num[i]);
freq[CH[i]]=num[i];
}
MinHeap H=BuildMinHeap(freq,n,CH);
// for(i=1;i<=n;i++)
// {
// printf("$%d ",H->Elements[i].weight);
// }
HuffmanTree T=Huffman(H);
// printf("@%d\n",T->weight);
// printf("@%d @%d\n",T->left->weight,T->right->weight);
// printf("@%d,@%d,@%d,@%d\n",T->left->left->weight,T->left->right->weight,T->right->left->weight,T->right->right->weight);
// test(T);
int wpl=Calwpl(T,0);
// printf("\n%d\n",wpl);
int k;
scanf("%d",&k);
while(k--){
char ch[256];
char str[256][200];
int thiswpl=0;
for(i=0;iscanf(" %c %s",&ch[i],str[i]); //输入%c之前一定要有空格,因为前面一个scanf输入之后有回车
thiswpl+=freq[ch[i]]*strlen(str[i]);
// printf("$%d\n",thiswpl);
// printf("$%d\n",freq[ch[i]]);
// printf("$%d\n",strlen(str[i]));
}
if(wpl==thiswpl&&!HasPreFix(str,n))
printf("Yes\n");
else
printf("No\n");
}
return 0;
}