In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Each input file contains one test case. For each case, the first line gives an integer NN (2≤N≤63), then followed by a line that contains all the NN distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i]
is the frequency of c[i]
and is an integer no more than 1000. The next line gives a positive integer MM (≤1000), then followed by MM student submissions. Each student submission consists of NN lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 '0's and '1's.
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Yes
Yes
No
No
思路:
1、利用最小堆建立哈夫曼树,算出WPL
2、在建树过程中判断每个编码是否满足无歧义编码(数据都在叶子结点)和最优编码(WPL等于步骤1中所算的)
注意点:
1、最小堆中的数据的类型应该是哈夫曼树结点的类型(方便建立哈夫曼树)
2、分配的空间,不用的随时释放(虽然不释放也能过)
#include
#include
#include
#define MINDATA -10001
#define ERROR NULL
//哈夫曼树结点
typedef struct TreeNode *HuffmanTree;
struct TreeNode{
int Weight;
HuffmanTree Left, Right;
};
//最小堆
typedef struct HNode *Heap;
typedef Heap MaxHeap;
typedef Heap MinHeap;
typedef TreeNode ElementType; //堆中每个结点都是哈夫曼树结点的类型
struct HNode {
ElementType *Data;
int Size;
int Capacity;
};
MinHeap CreateHeap( int MaxSize ) {
MinHeap H = (MinHeap)malloc(sizeof(struct HNode));
//因为0位置是哨兵,所以MaxSize+1
H->Data = (ElementType *)malloc((MaxSize+1) * sizeof(ElementType));
H->Size = 0;
H->Capacity = MaxSize;
H->Data[0].Weight = MINDATA;
H->Data[0].Left = H->Data[0].Right = NULL;
return H;
}
bool IsFull( MinHeap H ){
return (H->Size == H->Capacity);
}
bool Insert( MinHeap H, HuffmanTree T ) {
//将元素X插入堆,其中H->Data[0]已经定义为哨兵
int i;
if( IsFull(H) ){
printf("最小堆已满");
return false;
}
i = ++H->Size; //i指向插入后堆中最后一个元素的位置
for( ; H->Data[i / 2].Weight > T->Weight; i /= 2)
H->Data[i] = H->Data[i / 2];
H->Data[i] = *T;
return true;
}
bool IsEmpty( MinHeap H ) {
return (H->Size == 0);
}
HuffmanTree DeleteMin( MinHeap H ) {
//从最小堆H中取出键值最小的元素,并删除一个结点;
int Parent, Child;
ElementType X;
HuffmanTree MinItem = (HuffmanTree)malloc(sizeof(struct TreeNode));
if( IsEmpty(H) ){
printf("最小堆已空");
return ERROR;
}
*MinItem = H->Data[1]; //取出根节点存放最小值
//用最小堆最后的一个元素从根结点开始向上过滤下层结点
X = H->Data[H->Size--];
for( Parent = 1; 2 * Parent <= H->Size; Parent = Child){
//Child指向左右子结点的较小者
Child = 2 * Parent;
if( Child != H->Size && H->Data[Child].Weight > H->Data[Child + 1].Weight )
Child++;
if( X.Weight <= H->Data[Child].Weight ) break; //找到了合适位置,要 >=
else //下滤
H->Data[Parent] = H->Data[Child];
}
H->Data[Parent] = X;
return MinItem;
}
HuffmanTree CreateHuffmanNode(int weight){
HuffmanTree T = (HuffmanTree)malloc(sizeof(struct TreeNode));
T->Weight = weight;
T->Left = T->Right = NULL;
return T;
}
HuffmanTree Huffman( MinHeap H ){
//假设权值已经在H中,且已是最小堆形式
int i;
HuffmanTree T;
for(i = 1; i < H->Capacity; i++) { //是Capacity,不是Size;Size会变(错误原因)
//printf("size:%d\n", H->Capacity);
T = (HuffmanTree)malloc( sizeof(struct TreeNode) );
T->Left = DeleteMin( H );
T->Right = DeleteMin( H );
T->Weight = T->Left->Weight + T->Right->Weight;
//printf("l %d r %d\n", T->Left->Weight, T->Right->Weight);
Insert( H, T );
//printf("%d .\n", T->Weight);
}
T = DeleteMin( H ); //右边是个结点,左右孩子为空,不是一棵树(错误原因)
//printf("%d .\n", T->Weight);
return T;
}
int WPL( HuffmanTree T, int Depth ) {
if( !T->Left && !T->Right )
return ( Depth * T->Weight );
else
return ( WPL(T->Left, Depth + 1) + WPL(T->Right, Depth + 1) );
}
MinHeap ReadData ( int n , int *f) {
MinHeap H = CreateHeap(n);
HuffmanTree T = (HuffmanTree)malloc( sizeof(struct TreeNode) );
int data, i = 0;
while ( n-- ) {
getchar(); //第一次接收第一行的换行符,之后接收c[i]
getchar(); //接收空格
scanf("%d", &data);
f[i++] = data;
T->Weight = data;
T->Left = T->Right = NULL;
Insert( H, T );
}
free(T);
return H;
}
void traverseHuffman ( HuffmanTree T ) {
if( T ) {
traverseHuffman( T->Left );
printf("%d ", T->Weight);
traverseHuffman( T->Right );
}
}
void traverseHeap ( MinHeap H ) {
for ( int i = 1; i <= H->Size; i++ ) {
printf("%d ", H->Data[i].Weight);
}
}
void DestroyTree ( HuffmanTree T ) {
if ( T ) {
DestroyTree( T->Left );
DestroyTree( T->Right );
free(T);
}
}
bool Judge ( int N, int *f, int CodeLen ) {
HuffmanTree T = (HuffmanTree)malloc(sizeof(struct TreeNode));
HuffmanTree Tmp;
T = CreateHuffmanNode(0);
char code[64];
int flag = 1, len, tmpN = N, wpl = 0, i = 0;
while ( N-- ) {
Tmp = T;
getchar();
getchar();
scanf("%s", code);
getchar();
//printf("%s\n", code);
len = strlen( code );
wpl += len * f[i++]; //计算带权路径长度
if ( flag && len > tmpN - 1 ) { //如果code长度大于N - 1
flag = 0;
//printf("1\n len : %d", len);
}
else if ( flag ) {
for ( int i = 0; i < len; i++ ) {
if ( i != len - 1 ) { //不是code的最后一位
if ( code[i] == '1' ) {
if ( !Tmp->Right )
Tmp->Right = CreateHuffmanNode(0);
else if ( Tmp->Right->Weight == 1 ) { //如果Tmp->Right已是带权节点
flag = 0;
//printf("2\n");
break;
}
Tmp = Tmp->Right;
}
else {
if ( !Tmp->Left )
Tmp->Left = CreateHuffmanNode(0);
else if ( Tmp->Left->Weight == 1 ) {
flag = 0;
//printf("3\n");
break;
}
Tmp = Tmp->Left;
}
}
else {
if ( code[i] == '1' ) {
if ( !Tmp->Right )
Tmp->Right = CreateHuffmanNode(1);
else { //Tmp->Right已存在,带权重复,不带权是前缀码
flag = 0;
//printf("4\n");
break;
}
Tmp = Tmp->Right;
if( Tmp->Right || Tmp->Left ) { //Tmp不是叶子结点
flag = 0;
//printf("5\n");
break;
}
}
else {
if ( !Tmp->Left )
Tmp->Left = CreateHuffmanNode(1);
else {
flag = 0;
//printf("6\n");
break;
}
Tmp = Tmp->Left;
if( Tmp->Right || Tmp->Left ) {
flag = 0;
//printf("7\n");
break;
}
}
}
}
}
}
DestroyTree( T ); //释放刚建的哈夫曼树
if ( flag && wpl == CodeLen )
return true;
else
return false;
}
int main(){
int N, M, CodeLen, *f;
scanf("%d", &N);
f = (int*)malloc( N * sizeof(int) );
//建立堆和哈夫曼树
MinHeap H;
H = ReadData( N, f );
HuffmanTree T = Huffman( H );
CodeLen = WPL( T, 0 );
//判断
scanf("%d", &M);
getchar();
while ( M-- ) {
if ( Judge( N, f, CodeLen ) ) {
printf("Yes\n");
}
else {
printf("No\n");
}
}
//traverseHeap(H);
//traverseHuffman(T);
//printf("%d", CodeLen);
system("pause");
return 0;
}