In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Input Specification:
Each input file contains one test case. For each case, the first line gives an integer N (2 <= N <= 63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i] is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (<=1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i] is the i-th character and code[i] is a string of '0's and '1's.
Output Specification:
For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.
Sample Input:7 A 1 B 1 C 1 D 3 E 3 F 6 G 6 4 A 00000 B 00001 C 0001 D 001 E 01 F 10 G 11 A 01010 B 01011 C 0100 D 011 E 10 F 11 G 00 A 000 B 001 C 010 D 011 E 100 F 101 G 110 A 00000 B 00001 C 0001 D 001 E 00 F 10 G 11Sample Output:
Yes Yes No No
#include
#include
#include
void percolateDown(int *heap, int parent) { //将位置parent出元素下滤使其满足堆序性
int temp = heap[parent];
int child = 2 * parent;
if (child + 1 <= heap[0] && heap[child + 1] < heap[child])
++child;
while (child <= heap[0] && heap[child] < temp) {
heap[parent] = heap[child];
parent = child;
child = 2 * parent;
if (child + 1 <= heap[0] && heap[child + 1] < heap[child])
++child;
}
heap[parent] = temp;
}
void buildMinHeap(int *heap) { //已满足结构性,调整元素位置使满足堆序性
for (int i = heap[0] / 2; i > 0; --i) //所有非叶节点根据堆序性下滤
percolateDown(heap, i);
}
int deleteMin(int *heap) { //删除并返回堆顶元素,同时调整结构维护堆序性
int minElem = heap[1];
heap[1] = heap[heap[0]--]; //将最后一个元素移到堆顶
percolateDown(heap, 1); //唯一不满足堆序性的堆顶元素下滤
return minElem;
}
void insertMinHeap(int *heap, int weight) { //堆中插入一个元素
heap[++heap[0]] = weight; //插入到堆末尾
//将插入节点的祖先节点中比他大的节点依次下滤,过程等价于上滤(稍微多一点的开销)
for (int i = heap[0] / 2; i > 0 && heap[i] > weight; i /= 2)
percolateDown(heap, i);
}
int calWPL(int *freq) {
int heap[100] = {}; //huffman树所用堆,0位置保存元素大小,1位置开始保存权值
int size = 0;
for (int i = 0; i < 256; ++i) { //将所有权值放入空堆中,等待建堆(调整使有堆序性)
if (freq[i]) {
heap[++size] = freq[i];
}
}
heap[0] = size; //0位置保存堆中元素个数
buildMinHeap(heap); //建堆
//模拟构建huffman树过程:每次从堆中弹出权值最小的两个子树进行合并,合并后的树(权为子树权和)再入堆;
//而wpl值 = 两个子树的wpl值(权重在子树内的路径和) + 两个子树的权重和(权重又在父子节点间有1的路径长);
//因此不需要构建huffman树,只需要保存上述两个数值,其中子树的权重和保存在堆中,wpl变量保存子树的wpl值,同时用于累加
int wpl = 0;
for (int i = 1; i < size; ++i) {
int weight1 = deleteMin(heap);
int weight2 = deleteMin(heap);
wpl += weight1 + weight2;
insertMinHeap(heap, weight1 + weight2);
}
return wpl;
}
int isPrefix(char *s1, char *s2) { //判断两个字符串是否属于前缀码关系
while (s1 && s2 && *s1 == *s2) //循环至第一个不相同的字母或结束位置
++s1, ++s2;
if (*s1 == '\0' || *s2 == '\0') //如果此时有一个到达结束位置,说明这个字符串一定是另一个字符串的前缀
return 1;
else
return 0;
}
int hasPrefixCode(char s[][200], int n) {//判断n个字符串是否含有前缀码
for (int i = 0; i < n; ++i)
for (int j = i + 1; j < n; ++j)
if (isPrefix(s[i], s[j])) //两两做对比
return 1;
return 0;
}
int main() {
freopen("test.txt", "r", stdin);
int n;
scanf("%d", &n);
int freq[256] = {};
for (int i = 0; i < n; ++i) {
char ch;
int num;
getchar();
scanf("%c%d", &ch, &num);
freq[ch] = num;
}
int wpl = calWPL(freq); //模拟构建huffman树过程计算WPL(加权路径长)
int k; //k个测试用例
scanf("%d", &k);
while (k--) {
char ch[256];
char s[256][200];
int thisWPL = 0;
for (int i = 0; i < n; ++i) {
scanf("\n%c %s", &ch[i], s[i]);
thisWPL += freq[ch[i]] * strlen(s[i]); //根据编码累加加权路径长
}
if (thisWPL == wpl && !hasPrefixCode(s, n)) //同时满足加权路径长最短和不含前缀码,则满足huffman编码
printf("Yes\n");
else
printf("No\n");
}
return 0;
}
题目链接:http://www.patest.cn/contests/mooc-ds/04-%E6%A0%916