C++数据结构之文件压缩(哈夫曼树)实例详解
概要:
项目简介:利用哈夫曼编码的方式对文件进行压缩,并且对压缩文件可以解压
开发环境:windows vs2013
项目概述:
1.压缩
a.读取文件,将每个字符,该字符出现的次数和权值构成哈夫曼树
b.哈夫曼树是利用小堆构成,字符出现次数少的节点指针存在堆顶,出现次数多的在堆底
c.每次取堆顶的两个数,再将两个数相加进堆,直到堆被取完,这时哈夫曼树也建成
d.从哈夫曼树中获取哈夫曼编码,然后再根据整个字符数组来获取出现了得字符的编码
e.获取编码后每次凑满8位就将编码串写入到压缩文件(value处理编码1与它即可,0只移动位)
f.写好配置文件,统计每个字符及其出现次数,并以“字符+','+次数”的形式保存到配置文件中
2.解压
a.读取配置文件,统计所有字符的个数
b.构建哈夫曼树,读解压缩文件,将所读到的编码字符的这个节点所所含的字符写入到解压缩文件中,知道将压缩文件读完
c.压缩解压缩完全完成,进行小文件大文件的测试
实例代码:
#pragma once #includetemplate struct Less { bool operator()(const T& l, const T& r) const { return l < r; } }; template struct Greater { bool operator()(const T& l, const T& r) const { return l > r; } }; template class Heap { public: Heap() {} Heap(T* array, size_t n) //建堆 { _array.reserve(n); for (size_t i = 0; i < n; i++) { _array.push_back(array[i]); } for (int i = (_array.size() - 2) >> 1; i >= 0; --i) { _AdjustDown(i); } } const T& Top()const { return _array[0]; } void Push(const T& x) { _array.push_back(x); _AdjustUp(_array.size() - 1); } size_t Size() { return _array.size(); } void Pop() { assert(_array.size() > 0); swap(_array[0], _array[_array.size() - 1]); _array.pop_back(); _AdjustDown(0); } bool Empty() { return _array.size() == 0; } void Print() { for (size_t i = 0; i < _array.size(); ++i) { cout << _array[i] << " "; } cout << endl; } protected: void _AdjustUp(int child) //上调 { Compare ComFunc; int parent = (child - 1) >> 1; while (child) { if (ComFunc(_array[child], _array[parent])) { swap(_array[child], _array[parent]); child = parent; parent = (child - 1) >> 1; } else { break; } } } void _AdjustDown(int root) //下调 { Compare ComFunc; int parent = root; int child = root * 2 + 1; while (child < _array.size()) { if (child + 1 < _array.size() && ComFunc(_array[child + 1], _array[child])) { ++child; } if (ComFunc(_array[child], _array[parent])) { swap(_array[child], _array[parent]); parent = child; child = parent * 2 + 1; } else { break; } } } protected: vector _array; }; void TestHeap() { int a[] = { 10, 11, 13, 12, 16, 18, 15, 17, 14, 19 }; //Heap heap(a, sizeof(a) / sizeof(a[0])); //Heap > heap(a, sizeof(a) / sizeof(a[0])); Heap > heap(a, sizeof(a) / sizeof(a[0])); heap.Print(); heap.Push(25); heap.Print(); heap.Pop(); heap.Print(); }
#pragma once #include"Heap.h" templatestruct HuffmanTreeNode { HuffmanTreeNode * _left; HuffmanTreeNode * _right; HuffmanTreeNode * _parent; T _w; //权值 HuffmanTreeNode(const T& x) :_w(x) , _left(NULL) , _right(NULL) , _parent(NULL) {} }; template class HuffmanTree { typedef HuffmanTreeNode Node; public: HuffmanTree() :_root(NULL) {} HuffmanTree(T* a, size_t n, const T& invalid = T()) //构建哈夫曼树 { struct Compare { bool operator()(Node* l, Node* r) const { return l->_w < r->_w; } }; Heap minHeap; for (size_t i = 0; i < n; ++i) { if (a[i] != invalid) { minHeap.Push(new Node(a[i])); } } while (minHeap.Size() > 1) { Node* left = minHeap.Top(); minHeap.Pop(); Node* right = minHeap.Top(); minHeap.Pop(); Node* parent = new Node(left->_w + right->_w); parent->_left = left; parent->_right = right; left->_parent = parent; right->_parent = parent; minHeap.Push(parent); } _root = minHeap.Top(); } Node* GetRoot() { return _root; } ~HuffmanTree() { _Destory(_root); } protected: void _Destory(Node* root) { if (root == NULL) { return; } _Destory(root->_left); _Destory(root->_right); delete root; } HuffmanTree(const HuffmanTree & tree); HuffmanTree& operator=(const HuffmanTree & tree); protected: Node* _root; }; void TestHuffmanTree() { #pragma once #includeusing namespace std; #include #include"HuffmanTree.h" typedef long long LongType; struct CharInfo { unsigned char _ch; //字符 LongType _count; //字符出现的次数 string _code; //huffman编码 CharInfo(LongType count = 0) :_count(count) , _ch(0) , _code("") {} bool operator<(const CharInfo& info) const { return _count < info._count; } CharInfo operator+(const CharInfo& info) { return CharInfo(_count + info._count); } bool operator!=(const CharInfo& info)const { return _count != info._count; } }; struct CountInfo { unsigned char _ch; //字符 LongType _count; //字符出现的次数 }; class FileCompress { public: FileCompress() { for (size_t i = 0; i < 256; ++i) { _info[i]._ch = i; } } void Compress(const char* filename) { assert(filename); //统计字符出现的次数,均已二进制方式读写 FILE* fout = fopen(filename, "rb"); assert(fout); int ch = fgetc(fout); string CompressFile = filename; CompressFile += ".huffman"; FILE* fin = fopen(CompressFile.c_str(), "wb"); assert(fin); CountInfo info; while (ch != EOF) { _info[(unsigned char)ch]._count++; ch = fgetc(fout); } //构建huffman tree CharInfo invalid; invalid._count = 0; HuffmanTree tree(_info, 256, invalid); //生成huffman code GetHuffmanCode(tree.GetRoot()); //压缩 //写配置文件 for (size_t i = 0; i < 256; ++i) { if (_info[i]._count) { info._ch = _info[i]._ch; info._count = _info[i]._count; fwrite(&info, sizeof(info), 1, fin); } } info._count = -1; fwrite(&info, sizeof(info), 1, fin); fseek(fout, 0, SEEK_SET); //返回到文件开始 ch = fgetc(fout); char value = 0; //二进制 int pos = 0; //左移位数 while (ch != EOF) { string& code = _info[(unsigned char)ch]._code; size_t i = 0; for (i = 0; i < code.size(); ++i) { value <<= 1; ++pos; if (code[i] == '1') { value |= 1; } if (pos == 8) //满8位写进文件中 { fputc(value, fin); value = 0; pos = 0; } } ch = fgetc(fout); } if (pos) { value <<= (8 - pos); fputc(value, fin); } fclose(fin); fclose(fout); } void GetHuffmanCode(HuffmanTreeNode * root) //huffman编码 { if (root == NULL) { return; } if (root->_left == NULL && root->_right == NULL) { HuffmanTreeNode *parent, *cur; cur = root; parent = cur->_parent; string& code = _info[(unsigned char)root->_w._ch]._code; while (parent) { if (cur == parent->_left) { code += '0'; } else { code += '1'; } cur = parent; parent = cur->_parent; } reverse(code.begin(), code.end()); } GetHuffmanCode(root->_left); GetHuffmanCode(root->_right); } //解压 void UnCompress(const char* filename) { assert(filename); string UnCompressFile = filename; size_t index = UnCompressFile.rfind('.'); assert(index != string::npos); UnCompressFile = UnCompressFile.substr(0, index); UnCompressFile += ".unhuffman"; FILE* fout = fopen(filename, "rb"); assert(fout); FILE* fin = fopen(UnCompressFile.c_str(), "wb"); assert(fin); CountInfo info; fread(&info, sizeof(CountInfo), 1, fout); //读配置信息 while (1) { fread(&info, sizeof(CountInfo), 1, fout); if (info._count == -1) { break; } _info[(unsigned char)info._ch]._ch = info._ch; _info[(unsigned char)info._ch]._count = info._count; } //重建huffman树 CharInfo invalid; invalid._count = 0; HuffmanTree tree(_info, 256, invalid); HuffmanTreeNode * root = tree.GetRoot(); HuffmanTreeNode * cur = root; LongType count = root->_w._count; int value = fgetc(fout); if (value == EOF) //只有一种字符 { if (info._ch != 0) { while (cur->_w._count--) { fputc(cur->_w._ch, fin); } } } else { while (value != EOF) { int pos = 7; char test = 1; while (pos >= 0) { if (value & (test << pos)) { cur = cur->_right; } else { cur = cur->_left; } if (cur->_left == NULL && cur->_right == NULL) { fputc(cur->_w._ch, fin); cur = root; if (--count == 0) { break; } } --pos; } value = fgetc(fout); } } fclose(fout); fclose(fin); } protected: CharInfo _info[256]; //所有字符信息 }; void TestFileCompress() { FileCompress fc; //fc.Compress("实验.doc"); //fc.UnCompress("实验.doc.huffman"); //fc.Compress("qq.JPG"); //fc.UnCompress("qq.JPG.huffman"); //fc.Compress("www.MP3"); //fc.UnCompress("www.MP3.huffman"); fc.Compress("yppppp.txt"); fc.UnCompress("yppppp.txt.huffman"); }
int array[10] = { 2, 4, 6, 9, 7, 8, 5, 0, 1, 3 };HuffmanTreet(array, 10);} #include#include #include using namespace std; #include"Heap.h" #include"HuffmanTree.h" #include"FileCompress.h" int main() { //TestHeap(); TestHuffmanTree(); TestFileCompress(); system("pause"); return 0; }
以上就是哈夫曼树的实例详解,如有疑问请留言或者到本站社区交流,感谢阅读,希望能帮助到大家,谢谢大家对本站的支持!