In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i]
is the frequency of c[i]
and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 '0's and '1's.
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Yes
Yes
No
No
(有人(我)纠结测试数据的字符的顺序会不会变。经检验,测试数据里都是按顺序来的。解决这个不按顺序的问题也很简单,frequencies数组开到300,用字符的ascii码作为key(下标),存值。)
判断两件事:是否最优;是否为合法的huffman编码(即,一个不是另一个的前缀)
根据贪心算法(造一个小顶堆)算出最优的数值min cost,然后再拿每组数据算一个current cost,相等即为最优。
判断编码合法需要建树,然后在树上真实的走一遍。路过叶子节点或者一路上都是走已经开好的路,则代表了蕴含前缀关系,即不合法。否则,合法。
当既最优且合法,输出Yes。
# include
# include
# include
typedef enum {isLeaf, notLeaf, undiscover} Vstatus;
struct Node {
Vstatus status;
int left;
int right;
};
struct MinHeap {
int n;
int * heapList;
MinHeap() :n(0) { heapList = new int[70]; }
int size() { return n; }
void insert(int val)
{
++n;
int parent = n;
int child = n / 2;
for (; child > 0 && val < heapList[child]; parent = child, child = parent / 2)
{
heapList[parent] = heapList[child];
}
heapList[parent] = val;
}
int remove()
{
int ans = heapList[1];
int X = heapList[n--];
int child = 1;
int parent;
for (; child*2 <= n;child = parent)
{
parent = child * 2;
if (parent + 1 <= n && heapList[parent + 1] < heapList[parent]) parent++;
if (heapList[parent] < X) heapList[child] = heapList[parent];
else break;
}
heapList[child] = X;
return ans;
}
};
struct Tree {
int n;
Node * treeList;
int root;
Tree():n(1)
{
treeList = new Node[1010];
root = 0;
}
void clear()
{
n = 1;
for (int i = 0; i <= 1000; ++i)
{
treeList[i].left = treeList[i].right = -1;
treeList[i].status = undiscover;
}
}
bool huffman(int N, int min_cost, int * f)
{
bool ans = true;
int current_cost = 0;
for (int j=0; jleft == -1)
{
p->left = n;
p = &treeList[n];
p->status = notLeaf;
n++;
}
else
{
p = &treeList[p->left];
if (p->status == isLeaf) { ans = false; break; }
p->status = notLeaf;
}
}
else
{
if (p->right == -1)
{
p->right = n;
p = &treeList[n];
p->status = notLeaf;
n++;
}
else
{
p = &treeList[p->right];
if (p->status == isLeaf) { ans = false; break; }
p->status = notLeaf;
}
}
}
if (p->left != -1 || p->right != -1) { ans = false; }
p->status = isLeaf;
current_cost += strlen(code) * f[j] ;
}
return ans && current_cost == min_cost;
}
};
int main(void)
{
MinHeap h;
int n;
int frequencies[70];
scanf("%d", &n);
for (int i=0; i 1)
{
int tmp = h.remove() + h.remove();
mincost += tmp;
h.insert(tmp);
}
Tree t;
int k;
scanf("%d", &k);
while (k--)
{
t.clear();
if (t.huffman(n, mincost, frequencies)) printf("Yes\n");
else printf("No\n");
}
return 0;
}