In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i]
is the frequency of c[i]
and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 '0's and '1's.
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Yes
Yes
No
No
依据给出字母的权重进行哈夫曼编码,再判断每个学生对每个字母的编码是否正确
分为两部分
(一)实现哈夫曼编码
(1)建立一个小顶堆,注意小顶堆里放的是指向结构体数组的指针
(2)然后依据每个字母的权重构造小顶堆,构造过程其实是插入,插入过程是上滤
(3)接着每次从小顶堆里弹出最小和次小,注意弹出的过程不止是删除,还要重构小顶堆,重构过程是下滤,构成一棵二叉树,再把这棵二叉树放进小顶堆,如此循环直到小顶堆里只有一个指针,那个指针即是哈夫曼树根节点
(二)检查学生答案
这里要注意学生可以不用哈夫曼编码得到与哈夫曼编码一样的答案
检查分两部分
(1)WPL是否相同
哈夫曼的WPL计算:递归
学生给的答案的WPL的计算:利用权重 * 每个编码长度再求和
(2)是否会出现前缀码
就用学生给的答案建树,如果是0就向左边建树;如果是1就向右边建树,每个字母编码建树完毕将当前current指针指向位置的权重设置为-1,这样在以后建树过程中如果发现下一层的权重是-1,说明之前一个字母的编码是当前字母编码的前缀,就不满足;另外如果建完树发现current不是叶子结点,也不满足
(1)建立小顶堆时的插入操作要注意比较的是weight,不是直接比较data
(2)检查学生输入时,检测的是左/右孩子的weight,不是current的weight
(3)每申请一个新节点,都要将指针初始化为NULL,否则会出现空指针错误
(4)检测学生作业时,如果检查到有编码不对,还要继续往下读取,不然会影响之后的判断,只是可以不用在这轮循环里判断了
/*
建立哈夫曼树:
(1)建立小顶堆,小顶堆按权值存放
(2)由小顶堆构建哈夫曼树
①从小顶堆里弹出最小和次小,组成一棵新的二叉树
②将这棵二叉树重新插入小顶堆
*/
#include
#include
#include
#define MAX_SIZE 1005
#define EMPTYCHARACTER '-'
typedef struct HuffmanNode HuffmanNode;
typedef HuffmanNode* Position;
typedef Position HuffmanTree;
struct HuffmanNode {
char Character;
int Weight;
Position LeftChild;
Position RightChild;
};
/*小顶堆数据结构*/
typedef struct MinHeap MinHeap;
struct MinHeap {
Position Data[MAX_SIZE];
int Size;
};
MinHeap Heap;
/*初始化小顶堆*/
void Init(int nodeNum) {
Heap.Data[0] = (Position)malloc(sizeof(HuffmanNode));
Heap.Data[0]->Character = EMPTYCHARACTER;
Heap.Data[0]->LeftChild = NULL;
Heap.Data[0]->RightChild = NULL;
Heap.Data[0]->Weight = 0;
for(int i = 1; i <= nodeNum; i++) { //小顶堆下标有效数据范围是1~nodeNum,0是哨兵
Heap.Data[i] = NULL;
}
Heap.Size = 0;
}
/*小顶堆插入,插入标准是看权值,插入的节点是指针*/
void Insert(Position newNode) {
int i = ++Heap.Size;
for(; Heap.Data[i/2]->Weight > newNode->Weight; i /= 2) {
Heap.Data[i] = Heap.Data[i/2];
}
Heap.Data[i] = newNode;
return;
}
/*从最小堆中弹出最小的节点,弹出后还要重新调整最小堆
最小堆删除操作:
返回的是存放在下标为1的最小值
然后指向末尾最后一个节点值,并且把堆的大小减1
利用最后一个节点下滤,首先选择左右孩子中较小的
然后将末尾最后一个节点值与较小值比较
如果小,说明找到了合适位置,break出循环,将这个值赋给parent所指位置
如果大,说明还要下滤*/
Position PopMin() {
Position min = Heap.Data[1];
Position x = Heap.Data[Heap.Size--];
int parent = 1;
int child = parent * 2;
for(; parent * 2 <= Heap.Size; parent = child) {
child = parent * 2;
//注意,比较的是权值,不是直接比较,赋值是直接赋值
if(child != Heap.Size && Heap.Data[child]->Weight > Heap.Data[child + 1]->Weight) {
child++;
}
if(Heap.Data[child]->Weight >= x->Weight) {
break;
}
else {
Heap.Data[parent] = Heap.Data[child];
}
}
Heap.Data[parent] = x;
/*自己额外加的,不加也可以,不过debug时好看一点*/
Heap.Data[Heap.Size + 1] = NULL;
return min;
}
/*建立哈夫曼树*/
/*先用另一个全局数组保存每次读入的权值*/
int Weight[MAX_SIZE];
HuffmanTree BuildHuffmanTree(int N) {
/*先搞个小顶堆出来*/
/*一开始问题出在这里,申请一个新节点内存时没有把leftChild和rightChild置为NULL*/
Init(N);
for(int i = 1; i <= N; i++) {
Position newNode = (Position)malloc(sizeof(HuffmanNode));
scanf("%c ", &(newNode->Character));
scanf("%d", &(newNode->Weight));
getchar();
Weight[i] = newNode->Weight;
newNode->LeftChild = NULL;
newNode->RightChild = NULL;
Insert(newNode);
}
/*利用小顶堆建立哈夫曼树
每次从小顶堆中弹出最小值和次小值,然后计算他们的权值组成一棵新的二叉树再插入小顶堆中
最后返回小顶堆下标1的指针所指既是哈夫曼树*/
for(int i = 1; i < N; i++) { //合并只需要N - 1次
HuffmanTree newNode = (HuffmanTree)malloc(sizeof(HuffmanNode));
newNode->Character = EMPTYCHARACTER;
newNode->LeftChild = PopMin();
newNode->RightChild = PopMin();
newNode->Weight = newNode->LeftChild->Weight + newNode->RightChild->Weight;
Insert(newNode);
}
return PopMin();
}
/*删除树*/
void DeleteTree(HuffmanTree root) {
if(root == NULL) {
return;
}
DeleteTree(root->LeftChild);
DeleteTree(root->RightChild);
free(root);
return;
}
int countWPL(HuffmanTree root, int depth) {
if(root->LeftChild == NULL && root->RightChild == NULL) {
return root->Weight * depth;
}
int leftSubtree = countWPL(root->LeftChild, depth + 1);
int rightSubtree = countWPL(root->RightChild, depth + 1);
return leftSubtree + rightSubtree;
}
/*按照学生写的建立二叉树*/
Position CreateNode() {
Position newNode = (Position)malloc(sizeof(HuffmanNode));
newNode->Weight = 0;
newNode->LeftChild = NULL;
newNode->RightChild = NULL;
return newNode;
}
int checkSubmit(Position current, char* code, int stringLen) {
for(int i = 0; i < stringLen; i++) {
if(code[i] == '0') {
if(current->LeftChild == NULL) {
current->LeftChild = CreateNode();
}
if(current->LeftChild->Weight == -1) {
return 0;
}
current = current->LeftChild;
}
else if(code[i] == '1') {
if(current->RightChild == NULL) {
current->RightChild = CreateNode();
}
if(current->RightChild->Weight == -1) {
return 0;
}
current = current->RightChild;
}
}
if(current->LeftChild == NULL && current->RightChild == NULL) {
current->Weight = -1;
return 1;
}
else {
return 0;
}
}
int main() {
int characterNum;
scanf("%d", &characterNum);
getchar();
HuffmanTree tree = BuildHuffmanTree(characterNum);
int WPL = countWPL(tree, 0);
int studentNum;
scanf("%d", &studentNum);
getchar();
for(int i = 1; i <= studentNum; i++) {
char c;
char code[MAX_SIZE];
int flag = 1;
int result = 1;
int codeLen = 0;
HuffmanTree studentTree = CreateNode();
for(int j = 1; j <= characterNum; j++) {
Position current = studentTree;
scanf("%c ", &c);
scanf("%s", code);
getchar();
codeLen += strlen(code) * Weight[j];
/*一开始设计判断正误的有问题,虽然只要找到一个字符编码有误就行了,但依然
要继续往下读,不然会影响之后的判断,只是不用进行检测了*/
if(flag == 1) {
result = checkSubmit(current, code, strlen(code));
if(result == 0) flag = 0;
}
}
if(result == 1 && (codeLen == WPL)) {
printf("Yes\n");
}
else {
printf("No\n");
}
DeleteTree(studentTree);
}
DeleteTree(tree);
return 0;
}