Huffman树的概念
Huffman树是由n个带权叶子节点构成的所有二叉树中带权路径长度最短的二叉树。
节点的带权路径长度
树根到某一节点的路径长度与该节点的权的乘积。
树的带权路径长度
树的带权路径长度为树中从根节点到所有叶子节点的各个带权路径长度之和。
Huffman树的构造步骤:
文件解压缩的步骤:
压缩和解压缩过程中常见的问题:
代码如下:
HuffmanTree.hpp
#pragma once
#include
using namespace std;
#include
#include
#include
#include
#include
template<class W>
struct HuffmanTreeNode
{
HuffmanTreeNode* _root;
HuffmanTreeNode* _left;
HuffmanTreeNode* _right;
W _w;
HuffmanTreeNode(const W& w)
:_root(NULL)
,_left(NULL)
,_right(NULL)
,_w(w)
{}
};
template<class W>
class HuffmanTree
{
typedef HuffmanTreeNode Node;
public:
HuffmanTree()
:_root(NULL)
{}
struct NodeCompare
{
bool operator()(const Node* left, const Node* right)
{
return left->_w > right->_w;
}
};
HuffmanTree(W* w, size_t n,const W& invalid)
{
//构建Huffman树优先级队列
priority_queuevector, NodeCompare> minheap;
for (size_t i = 0; i < n; ++i)
{
if (w[i] != invalid)
{
minheap.push(new Node(w[i]));
}
}
while (minheap.size()>1)
{
Node* left = minheap.top();
minheap.pop();
Node* right = minheap.top();
minheap.pop();
Node* parent = new Node(left->_w + right->_w);
parent->_left = left;
parent->_right = right;
minheap.push(parent);
}
_root = minheap.top();
}
~HuffmanTree()
{
Destory(_root);
_root = NULL;
}
void Destory(Node* root)
{
if (root == NULL)
{
return;
}
Destory(root->_left);
Destory(root->_right);
delete root;
}
Node* GetRoot()
{
return _root;
}
private://防拷贝
HuffmanTree(const HuffmanTree& w)
{}
HuffmanTree& operator=(const HuffmanTree& w)
{}
private:
Node* _root;
};
FileCompress.hpp
#pragma once
#include"HuffmanTree.hpp"
typedef long long LongType;
struct CharInfo
{
char _ch;
LongType _count;
string _code;
bool operator!=(const CharInfo& info)
{
return _count != info._count;
}
CharInfo operator+(const CharInfo& info)
{
CharInfo ret;
ret._count = _count + info._count;
return ret;
}
bool operator>(const CharInfo& info) const
{
return _count > info._count;
}
};
class FileCompress
{
typedef HuffmanTreeNode Node;
public:
struct ConfigInfo
{
char _ch;
LongType _count;
};
FileCompress()
{
for (int i = 0; i < 256; ++i)
{
_hashInfos[i]._ch = i;
_hashInfos[i]._count = 0;
}
}
void Compress(const char* file)
{
//1.统计字符个数
ifstream ifs(file, ios_base::in | ios_base::binary);
char ch;
while (ifs.get(ch))
{
_hashInfos[(unsigned char)ch]._count++;
}
//2.生成Huffman树
CharInfo invalid;
invalid._count = 0;
HuffmanTree tree(_hashInfos, 256, invalid);
//3.生成Huffman code
GenerateHuffmanCode(tree.GetRoot());
//4.压缩
string compressfile = file;
compressfile += ".compress";
ofstream ofs(compressfile.c_str(), ios_base::out | ios_base::binary);
//4.1压缩前写入字符次数,方便解压缩时构建Huffman树
for (int i = 0; i < 256; ++i)
{
if (_hashInfos[i]._count > 0)
{
ConfigInfo info;
info._ch = _hashInfos[i]._ch;
info._count = _hashInfos[i]._count;
ofs.write((char*)&info, sizeof(ConfigInfo));
}
}
ConfigInfo end;
end._count = 0;
ofs.write((char*)&end, sizeof(ConfigInfo));
//4.2进行压缩
char value = 0;
int pos = 0;
ifs.clear();
ifs.seekg(0);
while (ifs.get(ch))
{
string& code = _hashInfos[(unsigned char)ch]._code;
for (size_t i = 0; i < code.size(); ++i)
{
if (code[i] == '0')
{
value &= (~(1 << pos));
}
else if (code[i] == '1')
{
value |= (1 << pos);
}
else
{
assert(false);
}
++pos;
if (pos == 8)
{
ofs.put(value);
//printf("%x ", value);
pos = 0;
value = 0;
}
}
}
if (pos > 0)
{
//printf("%x ", value);
ofs.put(value);
}
}
void GenerateHuffmanCode(Node* root)
{
if (root == NULL)
return;
if (root->_left == NULL&&root->_right == NULL)
{
_hashInfos[(unsigned char)root->_w._ch]._code = root->_w._code;
}
if (root->_left != NULL)
{
root->_left->_w._code = root->_w._code + '0';
GenerateHuffmanCode(root->_left);
}
if (root->_right != NULL)
{
root->_right->_w._code = root->_w._code + '1';
GenerateHuffmanCode(root->_right);
}
}
void UnCompress(const char* file)
{
//1.打开压缩文件
ifstream ifs(file, ios_base::in | ios_base::binary);
string uncompressfile = file;
size_t pos = uncompressfile.rfind('.');
assert(pos != string::npos);
uncompressfile.erase(pos);//缺省值为npos
#ifdef _DEBUG
uncompressfile += ".uncompress";
#endif
ofstream ofs(uncompressfile.c_str(), ios_base::out | ios_base::binary);
//2.重建Huffman树
while (1)
{
ConfigInfo info;
ifs.read((char*)&info, sizeof(ConfigInfo));
if (info._count > 0)
{
_hashInfos[(unsigned char)info._ch]._count = info._count;
}
else
{
break;
}
}
CharInfo invalid;
invalid._count = 0;
HuffmanTree tree(_hashInfos, 256, invalid);
//3.解压缩
Node* root = tree.GetRoot();
LongType filecount = root->_w._count;
Node* cur = root;
char ch;
while (ifs.get(ch))
{
for (size_t pos = 0; pos < 8; ++pos)
{
if (ch&(1 << pos))//1
{
cur = cur->_right;
}
else//0
{
cur = cur->_left;
}
if (cur->_left == NULL&&cur->_right == NULL)
{
ofs.put(cur->_w._ch);
cur = root;
if (--filecount == 0)
{
break;
}
}
}
}
}
private:
CharInfo _hashInfos[256];
};
void TestCompress()
{
FileCompress fc;
fc.Compress("input.txt");
}
void TestUncompress()
{
FileCompress fc;
fc.UnCompress("input.txt.compress");
}
test.cpp
#include"FileCompress.hpp"
int main()
{
TestCompress();
TestUncompress();
return 0;
}
最后可以根据UltraCompare工具进行压缩前后文件的比较。