定义哈夫曼树之前先说明几个与哈夫曼树有关的概念:
路径: 树中一个结点到另一个结点之间的分支构成这两个结点之间的路径。
路径长度:路径上的分枝数目称作路径长度。
树的路径长度:从树根到每一个结点的路径长度之和。
结点的带权路径长度:在一棵树中,如果其结点上附带有一个权值,通常把该结点的路径长度与该结点上的权值
之积称为该结点的带权路径长度
树的带权路径长度:如果树中每个叶子上都带有一个权值,则把树中所有叶子的带权路径长度之和称为树的带
权路径长度。
3.根据配置文件读取相关信息重建Huffman树,对压缩后的文件进行译码。
#pragma once
#pragma once
#include
#include
#include
#include
using namespace std;
template
struct Less
{
bool operator() (const T & l, const T & r)
{
return l < r;
}
};
template
struct Greater
{
bool operator() (const T & l, const T & r)
{
return l > r;
}
};
template>
class Heap {
private:
vector _array;
protected:
void Adjustdown(int root)
{
size_t child = root * 2 + 1;
while (child < _array.size())
{
if (child + 1 < _array.size() && Compare()(_array[child + 1], _array[child]))
{
child++;
}
if (Compare()(_array[child], _array[root]))
{
swap(_array[child], _array[root]);
root = child;
child = root * 2 + 1;
}
else
{
break;
}
}
}
void Adjustup(int child)
{
int root = (child - 1) / 2;
while (root >= 0)
{
if (Compare() (_array[child], _array[root]))
{
swap(_array[child], _array[root]);
child = root;
root = (child - 1) / 2;
}
else
{
break;
}
}
}
public:
Heap()
{}
Heap(T *array, int size)
{
int i;
for (i = 0; i < size; i++)
{
_array.push_back(array[i]);
}
for (i = (_array.size() - 2) / 2; i >= 0; i--)
{
Adjustdown(i);
}
}
void push(T data)
{
_array.push_back(data);
Adjustup(_array.size() - 1);
}
void pop()
{
swap(_array[0], _array[_array.size() - 1]);
_array.pop_back();
Adjustdown(0);
}
T & top()
{
assert(_array.size() > 0);
return _array[0];
}
size_t size()
{
return _array.size();
}
};
#pragma once
#pragma once
#include
#include"heap.hpp"
using namespace std;
template
struct HuffmanTreeNode {
HuffmanTreeNode *_left;
HuffmanTreeNode *_right;
HuffmanTreeNode *_parent;
T _weight; //weight代表自定义类型的对象实体
HuffmanTreeNode(const T weight)
:_left(NULL)
, _right(NULL)
, _weight(weight)
, _parent(NULL)
{}
};
template
struct NodeCompare
{
bool operator()(HuffmanTreeNode *l, HuffmanTreeNode *r)
{
return l->_weight < r->_weight;
}
};
zemplate
class HuffmanTree {
typedef HuffmanTreeNode Node;
public:
HuffmanTree()
:_root(NULL)
{}
void Create(const T *a, size_t size, T invalid)
{
assert(a);
Heap> minHeap;
for (size_t i = 0; i < size; i++)
{
if (a[i] != invalid)
{
Node *newnode = new Node(a[i]);
minHeap.push(newnode);
}
}
while (minHeap.size() >= 2)
{
Node *left = minHeap.top();
minHeap.pop();
Node *right = minHeap.top();
minHeap.pop();
Node *parent = new Node(left->_weight + right->_weight);
parent->_left = left;
parent->_right = right;
left->_parent = parent;
right->_parent = parent;
minHeap.push(parent);
}
_root = minHeap.top();
minHeap.pop();
}
Node *GetRoot()
{
return _root;
}
private:
Node *_root;
};#pragma once
#include"HuffmanTree.hpp"
#include
#include
/*
a 4次
b 3次
……
共有 256 个字符,开辟256数组
*/
typedef long LongType;
//标识每个字符
struct FileInfo {
char _ch;
LongType _count;
string _code;
FileInfo(unsigned char ch = 0)
:_ch(ch)
, _count(0)
{}
bool operator < (const FileInfo &info)
{
return this->_count < info._count;
}
FileInfo operator + (const FileInfo &info)
{
FileInfo tmp;
tmp._count = this->_count + info._count;
return tmp;
}
bool operator != (const FileInfo &info)const
{
return this->_count != info._count;
}
};
class FileCompress {
public:
FileCompress()
{
for (int i = 0; i < 256; ++i)
_infos[i]._ch = i;
}
public:
bool Compress(const char* filename)
{
//1.打开文件,统计每个字符出现次数
assert(filename);
FILE* fOut = fopen(filename,"rb");
assert(fOut);
long long chSize = 0;
char ch = fgetc(fOut);
while (ch != EOF)
{
++chSize;
_infos[(unsigned char)ch]._count++;
ch = fgetc(fOut);
}
//2.生成对应的Huffman编码
HuffmanTree> tree;
FileInfo invalid; //指定非法值,_count为0
tree.Create(_infos, 256, invalid);
string Ins;
Ins.clear();
_GenerateHuffmanCode(tree.GetRoot());
//3.写入压缩文件
string compressfile = filename;
compressfile += ".huffman";
FILE *fInCompress = fopen(compressfile.c_str(), "wb");
assert(fInCompress);
fseek(fOut, 0, SEEK_SET);
ch = fgetc(fOut);
int index = 0;
unsigned char Inch = 0;
while (ch != EOF)
{
string &code = _infos[(unsigned char)ch]._code;
for (size_t i = 0; i < code.size(); ++i)
{
Inch <<= 1;
if (code[i] == '1')
{
Inch |= 1;
}
if (++index == 8)
{
fputc(Inch, fInCompress);
index = 0;
Inch = 0;
}
}
ch = fgetc(fOut);
}
//处理最后一个不满1个Byte的单元
if (index != 0)
{
Inch <<= (8 - index);
fputc(Inch, fInCompress);
}
//4.写配置文件
string configfile = filename;
configfile += ".cfig";
FILE *fInConfig = fopen(configfile.c_str(), "wb");
assert(fInConfig);
string str;
//配置文件的第一行:> 文本总的chSize
_itoa(chSize, (char*)str.c_str(), 10);
fputs(str.c_str(), fInConfig);
fputc('\n', fInConfig);
for (size_t i = 0; i < 256; ++i)
{
string Inconfig;
if (_infos[i]._count > 0)
{
Inconfig += _infos[i]._ch;
Inconfig += ',';
Inconfig += _itoa(_infos[i]._count,(char*)str.c_str(), 10);
Inconfig += '\n';
}
fputs(Inconfig.c_str(), fInConfig);
str.clear();
}
fclose(fOut);
fclose(fInCompress);
fclose(fInConfig);
return true;
}
bool UnCompress(const char *filename)
{
//1.读取配置文件信息
string configfile = filename;
configfile += ".cfig";
FILE *fOutConfig = fopen(configfile.c_str(), "rb");
assert(fOutConfig);
string line;
long long chSize = 0;
//配置文件的第一行:> 文本总的chSize
ReadLine(fOutConfig, line);
chSize += atoi(line.c_str());
line.clear();
while (ReadLine(fOutConfig,line))
{
if (!line.empty())
{
unsigned char ch = line[0];
_infos[ch]._count = atoi(line.substr(2).c_str());
line.clear();
}
else
{
line = '\n';
}
}
//2.构造哈夫曼树
HuffmanTree> tree;
FileInfo invalid; //指定非法值,_count为0
tree.Create(_infos, 256, invalid);
_GenerateHuffmanCode(tree.GetRoot());
//3.解压文件
string compressfile = filename; //读压缩文件
compressfile += ".huffman";
FILE *fOutCompress = fopen(compressfile.c_str(), "rb");
assert(fOutCompress);
string uncompressfile = filename; //写解压缩文件
uncompressfile += ".uncompress";
FILE *fInUncompress = fopen(uncompressfile.c_str(), "wb");
assert(fInUncompress);
char ch = fgetc(fOutCompress);
HuffmanTreeNode* cur = tree.GetRoot();
int pos = 8;
while (1)
{
//叶子节点
if (cur->_left == NULL && cur->_right == NULL)
{
fputc(cur->_weight._ch,fInUncompress);
cur = tree.GetRoot();
if(--chSize==0)
break;
}
--pos;
if (ch & (1 << pos)) //pos的最高位1则 右孩子 1<_right;
else
cur = cur->_left;
if (pos == 0)
{
ch = fgetc(fOutCompress);
pos = 8;
}
}
fclose(fOutConfig);
fclose(fOutCompress);
fclose(fInUncompress);
return true;
}
protected:
void _GenerateHuffmanCode(HuffmanTreeNode* root)
{
if (root == NULL)
return;
_GenerateHuffmanCode(root->_left);
_GenerateHuffmanCode(root->_right);
if (root->_left == NULL && root->_right == NULL)
{
HuffmanTreeNode* cur = root;
HuffmanTreeNode* parent = cur->_parent;
// *cur类型为huffman类型的node 其中的weight成员代表元素类型info
// info._ch 代表相应的字符char
string& code = _infos[(unsigned char)cur->_weight._ch]._code;
while (parent)
{
if (parent->_left == cur)
code += '0';
else
code += '1';
cur = parent;
parent = cur->_parent;
}
reverse(code.begin(),code.end());
}
}
bool ReadLine(FILE *fOut, string &str)
{
char ch = fgetc(fOut);
if (ch == EOF)
return false;
while (ch != EOF && ch != '\n')
{
str += ch;
ch = fgetc(fOut);
}
return false;
}
private:
FileInfo _infos[256];
};