In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.
Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:
c[1] f[1] c[2] f[2] ... c[N] f[N]
where c[i]
is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i]
is the frequency of c[i]
and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:
c[i] code[i]
where c[i]
is the i
-th character and code[i]
is an non-empty string of no more than 63 '0's and '1's.
For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.
Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.
7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11
Yes
Yes
No
No
#include
#include
#include
#include
#include
#define OK 1
#define ERROR 0
#define TRUE 1
#define FALSE 0
#define INFEASIBLE -1
//#define OVERFLOW -2
using namespace std;
typedef int status;
typedef struct
{
int weight;
int parent, lchild, rchild;
}HTNode;
typedef HTNode *HuffmanTree;//huffman 树
typedef struct HuffmanCode
{
int length;
char code[65];
}HuffmanCode;//存放二进制编码
status CreatWeight(int *w, int n);
status CreatHuffmanTree(HuffmanTree &HT, int n, int * w);
status Select(HuffmanTree &HT, int n, int &s1, int &s2);//选出权值最小的两个数
status TreeWLength(HuffmanTree HT, int n);
status IsTrue(int sum, int n, HuffmanTree HT);
status isShortLength(int n, HuffmanCode *cd, HuffmanTree HT, int sum);
status isPreCode(int n, HuffmanCode *cd);
int main()
{
int n;
cin >> n;
//把权值读入数组
int *w = new int[n + 1];
CreatWeight(w, n);
//构建霍夫曼树
HuffmanTree HT;
CreatHuffmanTree(HT, n, w);
//求最短路径,并赋值给sum
int sum;
sum = TreeWLength(HT, n);
//判断各个同学的例子
int m;
cin >> m;
for (int i = 0; i < m; i++)
{
int flag = 0;
flag = IsTrue(sum, n, HT);
if (flag) cout << "Yes" << endl;
else cout << "No" << endl;
}
return 0;
}
status CreatWeight(int *w, int n)
{
char temp_ch;
for (int i = 1; i <= n; i++)
{
cin >> temp_ch >> w[i];
}
return 0;
}
status CreatHuffmanTree(HuffmanTree &HT, int n, int * w)
{
if (n <= 1) return ERROR;
int m = 2 * n - 1;
HT = new HTNode[m + 1];
HuffmanTree p;
int i;
for (p = HT+1, i = 1; i <= n; ++i,++p)//0号元素不放节点
{
++w;
p->weight = *w;
p->parent = 0;
p->rchild = 0;
p->lchild = 0;
}
for (i = n + 1; i <= m; ++i, ++p)//把双亲节点初始化
{
p->weight = 0;
p->parent = 0;
p->rchild = 0;
p->lchild = 0;
}
for (i = n + 1; i <= m; ++i)
{
int s1, s2;
Select(HT, i - 1, s1, s2);
HT[s1].parent = i; HT[s2].parent = i;
HT[i].weight = HT[s1].weight + HT[s2].weight;
HT[i].lchild = s1;
HT[i].rchild = s2;
}
return OK;
}
status Select(HuffmanTree &HT, int n, int &s1, int &s2)//选出权值最小的两个数
{
int i = 1;
int min1 = 21474836, min2 = 21474836;
for (i = 1; i <= n; i++)
{
if (HT[i].parent == 0)
{
if (HT[i].weight < min1)
{
min2 = min1;
min1 = HT[i].weight;
s2 = s1;
s1 = i;
}
else if (HT[i].weight < min2)
{
min2 = HT[i].weight;
s2 = i;
}
}
}
return OK;
}
status TreeWLength(HuffmanTree HT, int n)
{
int sum = 0;//记录总权值路径长度
for (int i = 1; i <= n; ++i)
{
int cnt = 0;
int k = i;
while (HT[k].parent)
{
k = HT[k].parent; ++cnt;
}//求每个的路径长度
sum += HT[i].weight * cnt;//总的等于每一个的权值路径的和
}
return sum;
}
status IsTrue(int sum, int n, HuffmanTree HT)
{
int flag = 0;
HuffmanCode* cd = new HuffmanCode[n + 1];
//读入学生的二进制编码
for (int i = 1; i <= n; i++)
{
char temp_ch;
cin >> temp_ch >> cd[i].code;
cd[i].length = strlen(cd[i].code);
}
if (isShortLength(n, cd, HT, sum) && isPreCode(n, cd))
flag = TRUE;
return flag;
}
status isShortLength(int n, HuffmanCode *cd, HuffmanTree HT, int sum)
{
int sum1 = 0;
for (int i = 1; i <= n; i++)
{
sum1 += cd[i].length*HT[i].weight;
}
if (sum1 == sum) { return TRUE; }
else return FALSE;
}
status isPreCode(int n, HuffmanCode *cd)
{
int flag = TRUE;
int isend = 0;
for (int i = n; i >= 1; --i)
{
for (int j = i - 1; j >= 1; --j)
{
int cnt = 0;
int len = strlen(cd[i].code);
for (int k = 0; k < len; k++)
{
if (cd[i].code[k] == cd[j].code[k])
++cnt;
}
if (cnt == len)
{
flag = FALSE; isend = 1; break;
}
}
if (isend) break;
}
return flag;
}