In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the Ndistinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i]
is the frequency of c[i]
and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 '0's and '1's.
Output Specification:
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
Sample Input:
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Sample Output:
Yes
Yes
No
No
主要思路:
1、通过小根堆构建Huffman树
2、通过Huffman树计算最小编码长度
3、判断输入的编码是否符合要求(1、编码长度与Huffman编码长度相同 2、前缀编码)
#include
#include
using namespace std;
#define MaxNum 64
struct TreeNode//树的结点
{
int Weight=0;
TreeNode *Left=nullptr;
TreeNode *Right=nullptr;
};
struct HeapNode//堆
{
TreeNode Data[MaxNum];
int Size=0;
};
HeapNode *CreateHeap(int N)//创建一个新的小根堆
{
HeapNode *H=new(HeapNode);
H->Data[0].Weight=-1;
return H;
}
TreeNode *DeleteMin(HeapNode *H)//从堆中删除一个结点
{
int Parent=0,Child=0;
TreeNode temp;
TreeNode *MinItem=new(TreeNode);
*MinItem=H->Data[1];
temp=(H->Data[(H->Size)--]);
for (Parent = 1; Parent*2 <= H->Size ; Parent=Child)//寻找删除结点前堆中最后一个结点在新堆中的插入位置
{
Child=Parent*2;
if ((Child!=H->Size)&&((H->Data[Child].Weight)>(H->Data[Child+1].Weight)))
{
Child++;
}
if ((temp.Weight)<=(H->Data[Child].Weight))
{
break;
}else
{
H->Data[Parent]=H->Data[Child];
}
}
H->Data[Parent]=temp;
return MinItem;
}
void Insert(HeapNode *H,TreeNode *item)//插入新结点到堆中
{
int i=0;
i=++(H->Size);
for (;H->Data[i/2].Weight>item->Weight; i/=2)
{
H->Data[i]=H->Data[i/2];
}
H->Data[i]=*item;
}
HeapNode *ReadData(int N,HeapNode *H,int A[])//读取各个节点的权值输入数据
{
char s='\0';
int value=0;
for (int i=0; i>s;
cin>>value;
A[i]=value;
TreeNode *T=new(TreeNode);
T->Weight=value;
Insert(H, T);
}
return H;
}
TreeNode *Huffman(HeapNode *H)//构建Huffman树
{
TreeNode *T=nullptr;
int num=H->Size;
for (int i=0; iLeft=DeleteMin(H);
T->Right=DeleteMin(H);
T->Weight=T->Left->Weight+T->Right->Weight;
Insert(H, T);
}
T=DeleteMin(H);
return T;
}
int WPL(TreeNode *T,int Depth)//计算Huffman树的编码长度
{
if ((T->Left==nullptr)&&(T->Right==nullptr))
{
return Depth*(T->Weight);
}else
{
return (WPL(T->Left,Depth+1)+WPL(T->Right,Depth+1));
}
}
struct JNode
{
int Flag=0;
JNode *Left=nullptr;
JNode *Right=nullptr;
};
bool Judge(string S,JNode *J)//判断该次编码能否符合前缀编码的要求
{
int i=0;
for (; iLeft==nullptr)
{
JNode *J_1=new(JNode);
J->Left=J_1;
}else
{
if (J->Left->Flag==1)
{
return false;
}
}
J=J->Left;
}else
{
if (J->Right==nullptr)
{
JNode *J_1=new(JNode);
J->Right=J_1;
}else
{
if (J->Right->Flag==1)
{
return false;
}
}
J=J->Right;
}
}
J->Flag=1;
if (J->Left==nullptr&&J->Right==nullptr)
{
return true;
}else
{
return false;
}
}
int main(int argc, char const *argv[])
{
int N=0,n=0;
cin>>N;
HeapNode *H=CreateHeap(N);
int Value[MaxNum]={};
H=ReadData(N,H,Value);
TreeNode *T=Huffman(H);
int CodeLen=WPL(T,0);
cin>>n;
string temp="\0";
char c='\0';
bool result=false;
for (int i=0; i>c>>temp;
count+=temp.length()*Value[k];
if (!flag)
{
result=Judge(temp,J);
if (!result)
{
flag=1;
}
}
}
delete J;
if (result&&(count==CodeLen))//前缀编码且编码长度之和与Huffman编码相同
{
cout<<"Yes"<