算法导论——哈夫曼树

一、问题

针对大量的数据集,设计算法高效求出部分字符的哈夫曼编码

说明:本文为哈夫曼编码的应用,基本哈夫曼编码实现见博客:http://blog.csdn.net/xiaowang627/article/details/51483267

二、算法分析

用一个map addr存储每个字符以及构造好的哈夫曼树中该字符的地址,任意给出一个字符,检索该map映射可以得到该字符地址,然后从改地址开始自顶向上搜索,若当前节点是父结点左孩子则把0压栈,否则把1压栈,直到根节点。然后输出栈中元素即为该字符编码。

基于题目一基本哈夫曼算法改造数据结构如下,其中加入了父结点指针。

//字符
struct character{
       char ch;
       int freq;
};
//Huffman节点
struct Node{
       Node() {
              left = 0;
              right =0;
              parent = 0;
       }
       char ch;
       int freq;
       Node* left;
       Node* right;
       Node* parent;
};

三、算法实现

//字符
struct character{
	char ch;
	int freq;
};
//Huffman节点
struct Node{
	Node() {
		left = 0;
		right =0;
		parent = 0;
	}
	char ch;
	int freq;
	Node* left;
	Node* right;
	Node* parent;
};
//用于最小优先队列的比较函数
struct cmp {
	bool operator () (Node *a, Node *b) {
		return a->freq> b->freq;
	}
};

#include "HuffMan.h"
#include 
#include 
#include 
#include 
#include 
#include 
#define MaxSize 6
using namespace std;
//Node *HUFFMAN(character characters[]);
Node* HUFFMAN2(map fre, map& addr);
void print_code(Node* tree, string cur_path);
void printCharCode(Node* tree, char ch, map& addr);

int main() {
	//初始字符集合
	char ch;
	//aracter characters[MaxSize] = { {'a',45},{'b',13},{'c',12},{'d',16},{'e',9},{'f',5} };
	map  fre;
	map::iterator map_it;
	map addr;

	//读入所有字符
	ifstream infile("D:\\huf_data.txt");
	infile >> noskipws;
	while (infile >> ch) {
		fre[ch]++;
	}
	//生成哈夫曼树
	Node* tree;
	tree = HUFFMAN2(fre,addr);
	//tree = HUFFMAN(characters);
	//print(tree);
	//cout << "输出所有字符编码:" << endl;
	//print_code(tree,"");//输出所有字符编码
	//cout << endl;
	cout << "快速取一串字符的编码:" << endl;
	printCharCode(tree, 'f', addr);
	printCharCode(tree, 'e', addr);
	printCharCode(tree, 'g', addr);
	return 0;
}

void printCharCode(Node* tree, char ch, map& addr) {
	stack path;
	Node *cur_node;
	Node *temp;
	Node *p;

	if (addr.find(ch) == addr.end())
		cout << "找不到该字符" << endl;
	else {
		cur_node = addr[ch];
		cout << cur_node->ch << ":";
		temp = cur_node; 
		p = temp->parent;
		while (p!=0) {
			if (temp == p->left) {
				path.push(0);
			}
			else{
				path.push(1);
			}
			temp = p; 
			p = temp->parent;
		}//while
	}
	while (!path.empty()) {
		cout << path.top();
		path.pop();
	}
	cout << endl;
}

Node* HUFFMAN2(map fre, map& addr) {
	int len;
	priority_queue, cmp> Q;
	map::iterator map_it;

	len = fre.size();
	//初始化优先队列
	for (map_it = fre.begin(); map_it!= fre.end(); map_it++) {
		Node* node = new Node();
		node->ch = (*map_it).first;
		node->freq = (*map_it).second;
		Q.push(node);
	}
	//建立huffman树
	for (int i = 1; i < len; i++) {
		Node* x = Q.top(); Q.pop();
		Node* y = Q.top(); Q.pop();
		Node* z = new Node();
		z->left = x; x->parent = z;
		z->right = y; y->parent = z;
		z->freq = x->freq + y->freq;
		//cout << "z:" << z->ch << ":" << z->freq << endl;
		Q.push(z);
		if (fre.find(x->ch)!=fre.end()) {
			addr[x->ch] = x;
		}
		if (fre.find(y->ch) != fre.end()) {
			addr[y->ch] = y;
		}
	}
	return Q.top();
}
/*
Node *HUFFMAN(character characters[]) {
	Node* tree = new Node();
	priority_queue, cmp> Q;

	//初始化最小优先队列
	for (int i = 0; i < MaxSize; i++) {
		Node* node = new Node();
		node->ch = characters[i].ch;
		node->freq = characters[i].freq;
		Q.push(node);
	}

	for (int i = 1; i < MaxSize; i++){
		Node* x = Q.top(); Q.pop();
		Node* y = Q.top(); Q.pop();
		Node* z = new Node();
		z->left = x;
		z->right = y;
		z->freq = x->freq + y->freq;
		//cout << "z:" << z->ch << ":" << z->freq << endl;
		Q.push(z);
	}

	return Q.top();
}
*/
void print_code(Node* tree, string cur_path) {
	if(tree->left==0 && tree->right==0)
		cout << tree->ch << ":" << cur_path << endl;
	if (tree->left) {
		cur_path.append("0");
		print_code(tree->left,cur_path);
	}
	if (tree->right) {
		if (tree->left)
			cur_path.pop_back();
		cur_path.append("1");
		print_code(tree->right,cur_path);
	}
}


四、算法结果分析

采用如下数据集进行测试:

aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbccccccccccccddddddddddddddddeeeeeeeeefffffaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbccccccccccccddddddddddddddddeeeeeeeeefffffdddddddddddddeeeeeeeeefffffaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbccccccccccccddddddddddddddddeeeeeeeeefffffdddddddddddddeeeeeeeeefffffaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaabbbbbbbbbbbbbccccccccccccddddddddddddddddeeeeeeeeefffffdfdsfdsgddfdsfdsfdsfsdfsdffkdfjsdlfkldskfldskfldfdgfgfg

查询字符f和e的哈夫曼编码,输出如下:

算法导论——哈夫曼树_第1张图片

查询过程为先根据map找出字符对应的地址,依次向上遍历到根节点,将0或1压入栈中,最后依次输出栈中元素,即为该字符哈夫曼编码,求一个字符哈夫曼编码的时间复杂度仅为O(logn);

五、总结

该实验改进算法改造了原始算法的数据结构,通过map完成字符到地址的映射,提高了检索一个字符的效率。通过该算法求一个字符哈夫曼编码的时间复杂度由原先的需要遍历整颗哈夫曼树提升为O(logn);





你可能感兴趣的:(数据结构)