用优先队列实现指令系统下的哈夫曼编码

哈夫曼编码

哈夫曼编码(Huffman Coding),又称霍夫曼编码,是一种编码方式,哈夫曼编码是可变字长编码(VLC)的一种。Huffman于1952年提出一种编码方法,该方法完全依据字符出现概率来构造异字头的平均长度最短的码字,有时称之为最佳编码,一般就叫做Huffman编码(有时也称为霍夫曼编码)。

模拟实现

输入:指令以及每一条指令的概率
输出:每条指令的哈夫曼编码,理论最短平均编码长度 、实际最短平均编码长度
实际编码长度:每条指令 乘以 该指令的频数相加之和
理论编码长度:每条指令频数 乘以 对该指令频数取以2 为底的对数相加的负值

代码

#include
using namespace std;
long long sum=0;  //所有指令的总个数
int ordersum;     //指令的种类数


struct Huffman_node{ 
	string order;         		//指令,如:"mov" 
	long long count;	  		//该指令的个数  
	string code;          		//该指令的哈夫曼编码 
	Huffman_node  *left,  		//其左子 
                  *right,		//右子 
                  *parent; 		//父节点 
}; 
typedef Huffman_node* Noder;
Noder root,node[1000];

struct cmp {
    bool operator() (Noder a, Noder b) {
        return a->count > b->count;
    }
};
 
priority_queue< Noder, vector<Noder>, cmp> pq; 

//建立哈夫曼树 
void create_huffman_tree()
{
    root = NULL;

    while( !pq.empty() )
    {       
        Noder first = pq.top();
        pq.pop();
        if( pq.empty() )
        {
            root = first;
            break;
        }
        Noder second = pq.top();
        pq.pop();
        Noder new_node = new Huffman_node();
        new_node->count = first->count + second->count;

        if(first->count <= second->count)
        {
            new_node->left = first;
            new_node->right = second;
        }
        else
        {
            new_node->left = second;
            new_node->right = first;
        }
        first->parent = new_node;
        second->parent = new_node;

        pq.push(new_node);   
    }
} 

//输入、存储指令及指令的个数,并将其压入优先队列中 
int creat_array(int x,int y)
{
	for(int i=x;i<y;i++)
	{
		Noder node1 = new Huffman_node ();
        cin>>node1->order>>node1->count;
        sum+=node1->count; 
        node1->code = "";
        node1->left = NULL;
        node1->right = NULL;
        node1->parent = NULL;
        node [i] = node1; 
	}  
	for(int i=0;i<y;i++)
	{
		pq.push(node [i]);
	 } 
}

//实现哈夫曼编码 
void  create_map_1(const Noder node, bool left)
{
    if(left)
        node->code = node->parent->code + "0";
    else
        node->code = node->parent->code + "1"; 
    if(node->left == NULL && node->right == NULL)
        return;
    else
    {
        if(node->left != NULL)
            create_map_1(node->left, true);
        if(node->right != NULL)
            create_map_1(node->right, false);
    }
}

void  calculate_huffman_codes()
{
    if(root == NULL)
    {
        printf("建哈夫曼树失败\n");
        exit(1);
    }

    if(root->left != NULL)
        create_map_1(root->left, true);
    if(root->right != NULL)
        create_map_1(root->right, false);
}
 
 //输出每条指令的哈夫曼编码,分别输出理论和实际上最短平均编码长度 
void output()
{
	double result1=0,result2=0,a=2,unit;
	for(int i=0;i<ordersum;i++)
	{
//		cout<<(node[i]->code).size()<<" "<count<<" "<
//		cout<<(node[i]->code).size()*((double)node[i]->count/(double)sum);
		unit=((double)node[i]->count/(double)sum);
		result1+=(node[i]->code).size()*unit; // 实际上平均编码长度 
		result2-=unit*log(unit)/log(a); //理论最短平均编码长度 
//		cout<
		cout<<node[i]->order<<" "<<node[i]->count<<" "<<node[i]->code<<endl;
	}
	cout<<result1 <<endl;
	cout<<result2<<endl;
}

int main()
{
	puts("How many instructions do you have? Plese input.");
	cin>>ordersum;  
	puts("Please enter each instruction and the number of times it appears.");
	creat_array(0,ordersum);
	create_huffman_tree();
	calculate_huffman_codes();
	output();
	while(1)
	{
		puts("If you want to add some instructions, please input 1."); 
		puts("If you want to exit ,please input input 2.")	;
		int judge;	cin>>judge;
		if(judge==1){
			int addsum;cin>>addsum;
			creat_array(ordersum,ordersum+addsum);
			ordersum+=addsum;
			create_huffman_tree();
			calculate_huffman_codes();
			output();
		} 	
		else{
			return 0;
		}
	}
	return 0;
 } 

示例输入输出

 /*
示例输入与输出: 
How many instructions do you have? Plese input.
5
Please enter each instruction and the number of times it appears.
aaa 23
fff 56
eee 45
sss 54
ttt 15
aaa 23 001
fff 56 11
eee 45 01
sss 54 10
ttt 15 000
实际平均编码长度:2.19689
理论平均编码长度:2.17404
If you want to add some instructions, please input 1.
If you want to exit ,please input input 2.
1
How many instructions do you add? Plese input.
2
Please enter each instruction and the number of times it appears.
bbb 5
ddd 46
aaa 23 1101
fff 56 10
eee 45 111
sss 54 01
ttt 15 11001
bbb 5 11000
ddd 46 00
实际平均编码长度:2.61885
理论平均编码长度:2.55595
If you want to add some instructions, please input 1.
If you want to exit ,please input input 2.
2
 */ 

你可能感兴趣的:(数据结构,计算机系统结构)