Huffman Codes

In 1953, David A. Huffman published his paper "A Method for the Construction of Minimum-Redundancy Codes", and hence printed his name in the history of computer science. As a professor who gives the final exam problem on Huffman codes, I am encountering a big problem: the Huffman codes are NOT unique. For example, given a string "aaaxuaxz", we can observe that the frequencies of the characters 'a', 'x', 'u' and 'z' are 4, 2, 1 and 1, respectively. We may either encode the symbols as {'a'=0, 'x'=10, 'u'=110, 'z'=111}, or in another way as {'a'=1, 'x'=01, 'u'=001, 'z'=000}, both compress the string into 14 bits. Another set of code can be given as {'a'=0, 'x'=11, 'u'=100, 'z'=101}, but {'a'=0, 'x'=01, 'u'=011, 'z'=001} is NOT correct since "aaaxuaxz" and "aazuaxax" can both be decoded from the code 00001011001001. The students are submitting all kinds of codes, and I need a computer program to help me determine which ones are correct and which ones are not.

Input Specification:

Each input file contains one test case. For each case, the first line gives an integer N (2≤N≤63), then followed by a line that contains all the N distinct characters and their frequencies in the following format:

 

c[1] f[1] c[2] f[2] ... c[N] f[N]

where c[i] is a character chosen from {'0' - '9', 'a' - 'z', 'A' - 'Z', '_'}, and f[i] is the frequency of c[i] and is an integer no more than 1000. The next line gives a positive integer M (≤1000), then followed by M student submissions. Each student submission consists of N lines, each in the format:

c[i] code[i]

where c[i] is the i-th character and code[i] is an non-empty string of no more than 63 '0's and '1's.

Output Specification:

For each test case, print in each line either "Yes" if the student's submission is correct, or "No" if not.

Note: The optimal solution is not necessarily generated by Huffman algorithm. Any prefix code with code length being optimal is considered correct.

Sample Input:

7
A 1 B 1 C 1 D 3 E 3 F 6 G 6
4
A 00000
B 00001
C 0001
D 001
E 01
F 10
G 11
A 01010
B 01011
C 0100
D 011
E 10
F 11
G 00
A 000
B 001
C 010
D 011
E 100
F 101
G 110
A 00000
B 00001
C 0001
D 001
E 00
F 10
G 11

Sample Output:

Yes
Yes
No
No
#include 
#include 
#include 
#include 
#include 

#define OK 1
#define ERROR 0
#define TRUE 1
#define FALSE 0
#define INFEASIBLE -1
//#define OVERFLOW -2

using namespace std;
typedef int status;

typedef struct
{
	int weight;
	int parent, lchild, rchild;
}HTNode;

typedef HTNode *HuffmanTree;//huffman 树

typedef struct HuffmanCode
{
	int length;
	char code[65];
}HuffmanCode;//存放二进制编码

status CreatWeight(int *w, int n);
status CreatHuffmanTree(HuffmanTree &HT, int n, int * w);
status Select(HuffmanTree &HT, int n, int &s1, int &s2);//选出权值最小的两个数
status TreeWLength(HuffmanTree HT, int n);
status IsTrue(int sum, int n, HuffmanTree HT);
status isShortLength(int n, HuffmanCode *cd, HuffmanTree HT, int  sum);
status isPreCode(int n, HuffmanCode *cd);

int main()
{
	int n;
	cin >> n;
	//把权值读入数组
	int *w = new int[n + 1];
	CreatWeight(w, n);
	//构建霍夫曼树
	HuffmanTree HT;
	CreatHuffmanTree(HT, n, w);
	//求最短路径,并赋值给sum
	int sum;
	sum = TreeWLength(HT, n);
	//判断各个同学的例子
	int m;
	cin >> m;
	for (int i = 0; i < m; i++)
	{
		int flag = 0;
		flag = IsTrue(sum, n, HT);
		if (flag) cout << "Yes" << endl;
		else cout << "No" << endl;
	}
	return 0;

}
status CreatWeight(int *w, int n)
{
	char temp_ch;
	for (int i = 1; i <= n; i++)
	{
		cin >> temp_ch >> w[i];
	}
	return 0;
}
status CreatHuffmanTree(HuffmanTree &HT, int n, int * w)
{
	if (n <= 1) return ERROR;
	int m = 2 * n - 1;
	HT = new HTNode[m + 1];
	HuffmanTree p;
	int i;
	for (p = HT+1, i = 1; i <= n; ++i,++p)//0号元素不放节点
	{
		++w;
		p->weight = *w;
		p->parent = 0;
		p->rchild = 0;
		p->lchild = 0;
	}
	for (i = n + 1; i <= m; ++i, ++p)//把双亲节点初始化
	{
		p->weight = 0;
		p->parent = 0;
		p->rchild = 0;
		p->lchild = 0;
	}
	for (i = n + 1; i <= m; ++i)
	{
		int s1, s2;
		Select(HT, i - 1, s1, s2);
		HT[s1].parent = i; HT[s2].parent = i;
		HT[i].weight = HT[s1].weight + HT[s2].weight;
		HT[i].lchild = s1;
		HT[i].rchild = s2;
	}
	return OK;
}
status Select(HuffmanTree &HT, int n, int &s1, int &s2)//选出权值最小的两个数
{
	int i = 1;
	int min1 = 21474836, min2 = 21474836;
	
	for (i = 1; i <= n; i++)
	{
		if (HT[i].parent == 0)
		{
			if (HT[i].weight < min1)
			{
				min2 = min1;
				min1 = HT[i].weight;
				s2 = s1;
				s1 = i;
			}
			else if (HT[i].weight < min2)
			{
				min2 = HT[i].weight;
				s2 = i;
			}
		}
	}
	return OK;
}
status TreeWLength(HuffmanTree HT, int n)
{
	int sum = 0;//记录总权值路径长度
	for (int i = 1; i <= n; ++i)
	{
		int cnt = 0;
		int k = i;
		while (HT[k].parent)
		{
			k = HT[k].parent; ++cnt;
		}//求每个的路径长度
		sum += HT[i].weight * cnt;//总的等于每一个的权值路径的和
		
	}

	return sum;
}
status IsTrue(int sum, int n, HuffmanTree HT)
{
	int flag = 0;
	HuffmanCode* cd = new HuffmanCode[n + 1];
	//读入学生的二进制编码
	for (int i = 1; i <= n; i++)
	{
		char temp_ch;
		cin >> temp_ch >> cd[i].code;
		cd[i].length = strlen(cd[i].code);
	}
	if (isShortLength(n, cd, HT, sum) && isPreCode(n, cd))
		flag = TRUE;
	
	return flag;

}
status isShortLength(int n, HuffmanCode *cd, HuffmanTree HT, int sum)
{
	int sum1 = 0;
	for (int i = 1; i <= n; i++)
	{
		sum1 += cd[i].length*HT[i].weight;	
	}
	
	if (sum1 == sum) {  return TRUE; }
	else return FALSE;
}
status isPreCode(int n, HuffmanCode *cd)
{
	int flag = TRUE;
	int isend = 0;
	for (int i = n; i >= 1; --i)
	{
		for (int j = i - 1; j >= 1; --j)
		{
			int cnt = 0;
			int len = strlen(cd[i].code);
			for (int k = 0; k < len; k++)
			{
				if (cd[i].code[k] == cd[j].code[k])
					++cnt;
			}
			if (cnt == len)
			{
				flag = FALSE; isend = 1; break;
			}
		}
		if (isend)  break;
	}
	return flag;
}


 

 

你可能感兴趣的:(C语言)