Huffman Coding 哈夫曼编码

作者:jostree 转载请注明出处 http://www.cnblogs.com/jostree/p/4096079.html

使用优先队列实现,需要注意以下几点:

1.在使用priority_queue时,内部需要存储哈夫曼树节点的指针,而不能是节点。因为构建哈夫曼树时,需要把其左右指针指向孩子,而如果储存的是节点,那么孩子的地址是会改变的。同理节点应当使用new在内存中开辟,而不能使用vector,原因是vector在数组大小为2整数次幂时,大小会倍增,开辟新数组并把老数组的数字copy过去,从而也会导致地址变化。

2.优先队列对指针的排列,需要额外写一个比较函数来比较指针指向的节点的大小。bool operator () (wcnode * node1, wcnode * node2) return node1->lessthan(node2);并在定义优先队列时使用这种方法:    priority_queue <wcnode*, vector<wcnode*>, compare> 第一个参数是节点类型,第二个参数是优先队列的储存结构,第三个参数是比较函数。

3.C++在写入文件时,由于只能按字节写入,因此需要把8个bit位转化为一个字节,最后不足8位用0补齐,并记录文件总bit数,便于解码。然后写入文件。另写入二进制文件可以使用ofstream out("output.txt",std::ofstream::binary);

4.哈夫曼编码信息包括每种字符的映射,和该文件的总bit数。

其代码如下:

  1 #include <cstdio>

  2 #include <cstdlib>

  3 #include <iostream>

  4 #include <cstring>

  5 #include  <fstream>

  6 #include  <queue>

  7 #include  <map>

  8 #include  <vector>

  9 using namespace std;

 10 class compare;

 11 

 12 class wcnode

 13 {

 14     public:

 15         friend class compare;

 16         char word;

 17         int count;

 18         wcnode* left;

 19         wcnode* right;

 20         bool lessthan (const wcnode *w)const

 21         {

 22             return count > w->count;

 23         }

 24         wcnode(char w='\0', int c=0, wcnode* l=NULL, wcnode * r=NULL)

 25         {

 26             word = w; count = c; left = l; right = r;

 27         }

 28 };

 29 

 30 class compare

 31 {

 32     public:

 33         bool operator () (wcnode * node1, wcnode * node2)

 34         {

 35             return node1->lessthan(node2);

 36         }

 37 };

 38 

 39 void preorder(wcnode *head, vector<bool> rec, map<char, vector<bool> > & res)

 40 {

 41     if( head->left == NULL && head->right == NULL )

 42     {

 43         res[head->word] = rec;

 44         return;

 45     }

 46     vector<bool> l = rec;

 47     l.push_back(0);

 48     vector<bool> r = rec;

 49     r.push_back(1);

 50     if(head->left != NULL) preorder(head->left, l, res);

 51     if(head->right != NULL) preorder(head->right, r, res);

 52 }

 53 map<char, vector<bool> > encode(map<char, int> &wordcount)

 54 {

 55     map<char, vector<bool> > res;

 56     priority_queue <wcnode*, vector<wcnode*>, compare> pq;

 57     map<char, int>::iterator t;

 58     wcnode *tmp;

 59     wcnode *t1, *t2, *t3;

 60 

 61     for( t = wordcount.begin() ; t != wordcount.end() ; t++ )

 62     {

 63         tmp = new wcnode();

 64         tmp->word = t->first;

 65         tmp->count = t->second;

 66         pq.push(tmp);

 67     }

 68     while( pq.size() > 1 )

 69     {

 70         t1 = pq.top();

 71         pq.pop();

 72         t2 = pq.top();

 73         pq.pop();

 74         t3 = new wcnode();

 75         t3->count = t1->count + t2->count;

 76         t3->left = t1;

 77         t3->right = t2;

 78         pq.push(t3);

 79     }

 80     wcnode *huffmanhead = pq.top();

 81     vector<bool> rec;

 82     preorder(huffmanhead, rec, res);

 83     map<char, vector<bool>  >::iterator it;

 84     for( it = res.begin() ; it != res.end() ; it++ )

 85     {

 86         cout<<it->first<<":";

 87         for( int i = 0; i < it->second.size() ; i++ )

 88         {

 89             cout<<it->second[i];

 90         }

 91         cout<<", ";

 92     }

 93     return res;

 94 }

 95 

 96 void output(string s, string passage, map<char, vector<bool> > res)

 97 {

 98     ofstream out(s.c_str());

 99     vector<bool> bit;

100     for( int i = 0 ; i < passage.size() ; i++ )

101     {

102         vector<bool> tmp = res[passage[i]];

103         for( int i = 0 ; i < tmp.size(); i++ )

104         {

105             bit.push_back(tmp[i]);

106         }

107     }

108     char outputchar = 0;

109     for( int i = 0 ; i < bit.size() ; i++ )

110     {

111         if( i % 8 == 7 )

112         {

113            out.write(&outputchar, sizeof(outputchar));

114            outputchar = 0;

115         }

116         outputchar = outputchar + bit[i];

117         outputchar = outputchar * 2;

118     }

119     if( outputchar != 0 )

120     {

121         out.write(&outputchar, sizeof(outputchar));

122     }

123     out.close();

124 }

125 int main(int argc, char *argv[])

126 {

127     char tmp;

128     ifstream in("Aesop_Fables.txt");

129     map <char, int> wordcount;

130     map <char, vector<bool> > res;

131     string passage;

132     while( in.get(tmp) )

133     {

134         passage += tmp;

135         if( wordcount.count(tmp) == 0  )

136         {

137             wordcount[tmp] = 1;

138         }

139         else

140         {

141             wordcount[tmp]++;

142         }

143     }

144     res = encode(wordcount);

145     output("outAesop.txt", passage, res);

146     in.close();

147 }
View Code

 

 

你可能感兴趣的:(Huffman)