In 1953, David A. Huffman published his paper “A Method for the Construction of Minimum-Redundancy Codes”, and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string “aaaxuaxz”, we can observe that the frequencies of the characters ‘a’, ‘x’, ‘u’ and ‘z’ are 4, 2, 1 and 1, respectively. We may either encode the symbols as {‘a’=0, ‘x’=10, ‘u’=110, ‘z’=111}, or in another way as {‘a’=1, ‘x’=01, ‘u’=001, ‘z’=000}, both compress the string into 14 bits. Another set of code can be given as {‘a’=0, ‘x’=11, ‘u’=100, ‘z’=101}, but {‘a’=0, ‘x’=01, ‘u’=011, ‘z’=001} is NOT correct since “aaaxuaxz” and “aazuaxax” can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i] is a character chosen from {‘0’ - ‘9’, ‘a’ - ‘z’, ‘A’ - ‘Z’, ‘_’}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0’s and '1’s.
For each test case, print in each line either “Yes” if the student’s submission is correct, or “No” if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Yes
Yes
No
No
本题分两步,第一步是判断是不是最低总权重,第二部判断是否有前缀码,即是否有二意解码
为了求出最低总权重,我们需要构造一颗Huffman树,并计算出最低总权重
typedef struct HuffNode* Huffman;
struct HuffNode
{
int weight;
Huffman left, right;
};
而为了构造一颗Huffman是,我们还需要一个最小堆
typedef struct HeapNode* Heap;
struct HeapNode
{
Huffman* elements;
int size;
int capacity;
};
堆的操作定义如下
Heap CreateHeap(int n)
{
Heap H;
H = (Heap)malloc(sizeof(HeapNode));
H->elements = (Huffman*)malloc(2 * n * sizeof(Huffman));
H->capacity = 2 * n - 1;
H->size = 0;
H->elements[0] = (Huffman)malloc(sizeof(HeapNode));
H->elements[0]->weight = 0;
H->elements[0]->left = NULL;
H->elements[0]->right = NULL;
return H;
}
void PreDown(Heap H, int node)
{
Huffman temp = H->elements[node];
int child;
while (node * 2 <= H->size)
{
child = node * 2;
if (node * 2 != H->size && H->elements[child]->weight > H->elements[child + 1]->weight)
child++;
if (H->elements[child]->weight < temp->weight)
{
H->elements[node] = H->elements[child];
node = child;
}
else
break;
}
H->elements[node] = temp;
}
void Build(Heap H)
{
int temp;
for (int i = H->size / 2; i > 0; i--)
PreDown(H, i);
}
Huffman Delet(Heap H)
{
Huffman data;
if (H->size == 0)
return NULL;
else
{
data = H->elements[1];
H->elements[1] = H->elements[H->size--];
PreDown(H, 1);
}
return data;
}
void Insert(Heap H, Huffman elem)
{
H->elements[++H->size] = elem;
int temp = elem->weight;
int node = H->size;
while (temp<H->elements[node/2]->weight)
{
H->elements[node] = H->elements[node / 2];
node = node / 2;
}
H->elements[node] = elem;
}
对Huffman树的操作定义如下
Huffman CreateHuffman(Heap H,int n)
{
Huffman T;
for (int i = 0; i < n - 1; i++)
{
T = (Huffman)malloc(sizeof(HuffNode));
T->left = Delet(H);
T->right = Delet(H);
T->weight = T->left->weight + T->right->weight;
Insert(H, T);
}
T = Delet(H);
return T;
}
int ComputeTotalWeight(Huffman T)
{
int totalweight = 0;
queue<Huffman> q;
int layer[130];
int i = 0, j = 0;
layer[j] = 0;
q.push(T);
while (!q.empty())
{
T = q.front();
q.pop();
if (T->left == NULL && T->right == NULL)
{
totalweight = totalweight + T->weight * layer[i];
}
if (T->left)
{
q.push(T->left);
layer[++j] = layer[i] + 1;
}
if (T->right)
{
q.push(T->right);
layer[++j] = layer[i] + 1;
}
i++;
}
return totalweight;
}
主函数如下
int main()
{
int n;
cin >> n;
map<char, int> mweight;
char code;
int weight;
for (int i = 0; i < n; ++i)
{
cin >> code >> weight;
mweight[code] = weight;
}
Heap H;
H = CreateHeap(n);
Huffman T;
for (auto it = mweight.begin(); it != mweight.end(); it++)
{
T = (Huffman)malloc(sizeof(HuffNode));
T->left = NULL;
T->right = NULL;
T->weight = it->second;
H->elements[++H->size] = T;
}
Build(H);
T = CreateHuffman(H, n); //利用堆生成了一颗Huffman树
int totalweight;
totalweight = ComputeTotalWeight(T); //计算最小总权重
int jn, testtotalweight;
string decode;
cin >> jn;
map<char, string> mdecode;
for (int i = 0; i < jn; i++)//依次判断
{
for (int i = 0; i < n; ++i)
{
cin >> code >> decode;
mdecode[code] = decode;
}
testtotalweight = 0;
for (auto it = mdecode.begin(); it != mdecode.end(); it++)
testtotalweight = testtotalweight + mweight[it->first] * it->second.length();
if (testtotalweight > totalweight)//总权重大于最小总权重
cout << "No" << endl;
else
{
bool pre = false;//判断是否有前缀编码
for (auto it = mdecode.begin(); it != (mdecode.end()); it++)
{
for (auto it2 = it; it2 != mdecode.end(); it2++)
{
if (it2 == it)
continue;
if (it->second == it2->second)
{
pre = true;
break;
}
else if (it->second.length() < it2->second.length())
{
if (it->second == it2->second.substr(0, it->second.length()))
{
pre = true;
break;
}
}
else if (it->second.length() > it2->second.length())
{
if (it2->second == it->second.substr(0, it2->second.length()))
{
pre = true;
break;
}
}
}
if (pre)
break;
}
if (pre)
cout << "No" << endl;
else
cout << "Yes" << endl;
}
}
return 0;
}