项目描述:实现文件的压缩机解压缩。
开发平台:VS2013
开发技术:堆,Huaffman树,文件输入输出函数
项目特点:1. 统计文件中字符出现的次数,利用数据结构堆建造Huffman树,出现次数多的编码短,出现次数少的编码长。
2. 根据建造好的Huffman树形成编码,以对文件进行压缩。
3.将文件中出现的字符以及他们出现的次数写入配置文件,以便后续的解压缩。
4. 根据配置文件读取相关信息重建Huffman树,对压缩后的文件进行译码。
FileCompress.h:
#pragma once #include "HuffmanTree.h" #include<algorithm> #include<windows.h> typedef long long LongType; struct FileInfo { unsigned char _ch; LongType _count; string _code; FileInfo(unsigned char ch = 0) :_ch(ch) , _count(0) {} FileInfo operator+(FileInfo& fi) { FileInfo tmp; tmp._count = this->_count + fi._count; return tmp; } bool operator < (FileInfo& fi) { return this->_count < fi._count; } bool operator != (const FileInfo& fi)const { return this->_count != fi._count; } }; template<class T> class FileCompress { public: FileCompress() { for (int i = 0; i < 256; ++i) { _infos[i]._ch = i; } } public: bool Compress(const char* filename) { //1.打开文件,统计文件字符出现的次数 long long Charcount = 0; assert(filename); FILE* fOut = fopen(filename, "rb"); assert(fOut); char ch = fgetc(fOut); while (ch != EOF) { _infos[(unsigned char)ch]._count++; ch = fgetc(fOut); Charcount++; } //2.生成对应的huffman编码 GenerateHuffmanCode(); //3.压缩文件 string compressFile = filename; compressFile += ".compress"; FILE* fwCompress = fopen(compressFile.c_str(), "wb"); assert(fwCompress); fseek(fOut, 0, SEEK_SET); ch = fgetc(fOut); char inch = 0; int index = 0; while (ch != EOF) { string& code = _infos[(unsigned char)ch]._code; for (size_t i = 0; i < code.size(); ++i) { inch = inch << 1; if (code[i] == '1') { inch |= 1; } if (++index == 8) { fputc(inch, fwCompress); inch = 0; index = 0; } } ch = fgetc(fOut); } if (index) { inch = inch << (8 - index); fputc(inch, fwCompress); } //4.配置文件,方便后续的解压缩 string configFile = filename; configFile += ".config"; FILE *fconfig = fopen(configFile.c_str(), "wb"); assert(fconfig); char CountStr[128]; _itoa(Charcount >> 32, CountStr, 10); fputs(CountStr, fconfig); fputc('\n', fconfig); _itoa(Charcount & 0xffffffff, CountStr, 10); fputs(CountStr, fconfig); fputc('\n', fconfig); FileInfo invalid; for (int i = 0; i < 256; i++) { if (_infos[i] != invalid) { fputc(_infos[i]._ch, fconfig); fputc(',', fconfig); fputc(_infos[i]._count + '0', fconfig); fputc('\n', fconfig); } } fclose(fOut); fclose(fwCompress); fclose(fconfig); return true; } bool UnCompresss(const char* filename) { string configfile = filename; configfile += ".config"; FILE* outConfig = fopen(configfile.c_str(), "rb"); assert(outConfig); char ch; long long Charcount = 0; string line = ReadLine(outConfig); Charcount = atoi(line.c_str()); Charcount <<= 32; line.clear(); line = ReadLine(outConfig); Charcount += atoi(line.c_str()); line.clear(); while (feof(outConfig)) { line = ReadLine(outConfig); if (!line.empty()) { ch = line[0]; _infos[(unsigned char)ch]._count = atoi(line.substr(2).c_str()); line.clear(); } else { line = '\n'; } } HuffmanTree<FileInfo> ht; FileInfo invalid; ht.CreatTree(_infos, 256, invalid); HuffmanTreeNode<FileInfo>* root = ht.GetRootNode(); string UnCompressFile = filename; UnCompressFile += ".uncompress"; FILE* fOut = fopen(UnCompressFile.c_str(), "wb"); string CompressFile = filename; CompressFile += ".compress"; FILE* fIn = fopen(CompressFile.c_str(), "rb"); int pos = 8; HuffmanTreeNode<FileInfo>* cur = root; ch = fgetc(fIn); while ((unsigned char)ch != EOF) { --pos; if ((unsigned char)ch &(1 << pos)) { cur = cur->_right; } else { cur = cur->_left; } if (cur->_left == NULL && cur->_right == NULL) { fputc(cur->_weight._ch, fOut); cur = root; Charcount--; } if (pos == 0) { ch = fgetc(fIn); pos = 8; } if (Charcount == 0) { break; } } fclose(outConfig); fclose(fIn); fclose(fOut); return true; } protected: string ReadLine(FILE* fConfig) { char ch = fgetc(fConfig); if(ch == EOF) { return ""; } string line; while (ch != '\n' && ch != EOF) { line += ch; ch = fgetc(fConfig); } return line; } void GenerateHuffmanCode() { HuffmanTree<FileInfo> hft; FileInfo invalid; hft.CreatTree(_infos, 256, invalid); _GenerateHuffmanCode(hft.GetRootNode()); } void _GenerateHuffmanCode(HuffmanTreeNode<FileInfo>* root) { if (root == NULL) { return; } _GenerateHuffmanCode(root->_left); _GenerateHuffmanCode(root->_right); if (root->_left == NULL && root->_right == NULL) { HuffmanTreeNode<FileInfo>* cur = root; HuffmanTreeNode<FileInfo>* parent = cur->_parent; string& code = _infos[cur->_weight._ch]._code; while (parent) { if (parent->_left == cur) { code += '0'; } else if (parent->_right == cur) { code += '1'; } cur = parent; parent = cur->_parent; } reverse(code.begin(), code.end()); } } private: FileInfo _infos[256]; }; void TestFileCompress() { FileCompress<FileInfo> fc; int begin1 = GetTickCount(); fc.Compress("test.txt"); int end1 = GetTickCount(); cout << end1 - begin1 << endl; int begin2 = GetTickCount(); fc.UnCompresss("test.txt"); int end2 = GetTickCount(); cout << end2 - begin2 << endl; }
HuffmanTree.h:
#pragma once #include "Heap.h" #include<assert.h> template<class T> struct HuffmanTreeNode { HuffmanTreeNode<T>* _left; HuffmanTreeNode<T>* _right; HuffmanTreeNode<T>* _parent; T _weight; HuffmanTreeNode(const T& x) :_weight(x) , _left(NULL) , _right(NULL) , _parent(NULL) {} }; template<class T> class HuffmanTree { typedef HuffmanTreeNode<T> Node; public: HuffmanTree() :_root(NULL) {} ~HuffmanTree() { Destory(_root); } template <class T> struct NodeCompare { bool operator()(Node *l, Node *r) { return l->_weight < r->_weight; } }; public: void CreatTree(const T* a, size_t size, const T& invalid) { assert(a); Heap<Node*, NodeCompare<T>> minHeap; for (size_t i = 0; i < size; ++i) { if (a[i] != invalid) { Node* node = new Node(a[i]); minHeap.Push(node); } } while (minHeap.Size() > 1) { Node* left = minHeap.Top(); minHeap.Pop(); Node* right = minHeap.Top(); minHeap.Pop(); Node* parent = new Node(left->_weight + right->_weight); parent->_left = left; parent->_right = right; left->_parent = parent; right->_parent = parent; minHeap.Push(parent); } _root = minHeap.Top(); } Node* GetRootNode() { return _root; } void Destory(Node* root) { if (root) { Destory(root->_left); Destory(root->_right); delete root; root = NULL; } } private: HuffmanTreeNode<T>* _root; }; void TestHuffmanTree() { int a[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }; HuffmanTree<int> hf; hf.CreatTree(a, 10, -1); }
Heap.h:
#pragma once #include <vector> #include<assert.h> // 小堆 template<class T> struct Less { bool operator() (const T& l, const T& r) { return l < r; // operator< } }; template<class T> struct Greater { bool operator() (const T& l, const T& r) { return l > r; // operator< } }; template<class T, class Compare = Less<T>> class Heap { public: Heap() {} Heap(const T* a, size_t size) { for (size_t i = 0; i < size; ++i) { _arrays.push_back(a[i]); } // 建堆 for(int i = (_arrays.size()-2)/2; i >= 0; --i) { AdjustDown(i); } } void Push(const T& x) { _arrays.push_back(x); AdjustUp(_arrays.size()-1); } void Pop() { assert(_arrays.size() > 0); swap(_arrays[0], _arrays[_arrays.size() - 1]); _arrays.pop_back(); AdjustDown(0); } T& Top() { assert(_arrays.size() > 0); return _arrays[0]; } bool Empty() { return _arrays.empty(); } int Size() { return _arrays.size(); } void AdjustDown(int root) { int child = root*2 + 1; // Compare com; while (child < _arrays.size()) { // 比较出左右孩子中小的那个 //if (child+1<_arrays.size() && // _arrays[child+1] > _arrays[child]) if(child+1<_arrays.size() && com(_arrays[child+1],_arrays[child])) { ++child; } //if(_arrays[child] > _arrays[root]) if(com(_arrays[child],_arrays[root])) { swap(_arrays[child], _arrays[root]); root = child; child = 2*root+1; } else { break; } } } void AdjustUp(int child) { int parent = (child-1)/2; //while (parent >= 0) while (child > 0) { //if (_arrays[child] > _arrays[parent]) if (Compare()(_arrays[child],_arrays[parent])) { swap(_arrays[parent], _arrays[child]); child = parent; parent = (child-1)/2; } else { break; } } } void Print() { for (size_t i = 0; i < _arrays.size(); ++i) { cout<<_arrays[i]<<" "; } cout<<endl; } public: /*T* _array; size_t _size; size_t _capacity;*/ vector<T> _arrays; }; template<class T> class PriorityQueue { public: void Push(const T& x) { _hp.Push(x); } void Pop() { _hp.Pop(); } public: Heap<T> _hp; }; void Test1() { int a[10] = {10, 11, 13, 12, 16, 18, 15, 17, 14, 19}; Heap<int, Greater<int> > hp1(a, 10); hp1.Push(1); hp1.Print(); Heap<int> hp2(a, 10); hp2.Push(1); hp2.Print(); //Less<int> less; //cout<<less(1, 2)<<endl; //Greater<int> greater; //cout<<greater(1, 2)<<endl; } #include <list> void Test2() { vector<int> v1; v1.push_back(1); v1.push_back(2); v1.push_back(3); v1.push_back(4); // [) vector<int>::iterator it = v1.begin(); while (it != v1.end()) { cout<<*it<<" "; ++it; } cout<<endl; list<int> l1; l1.push_back(1); l1.push_back(2); l1.push_back(3); l1.push_back(4); list<int>::iterator listIt = l1.begin(); while (listIt != l1.end()) { cout<<*listIt<<" "; ++listIt; } cout<<endl; } void AdjustDown(int* a, size_t size, int root) { int child = root*2+1; while (child < size) { if (child+1 < size && a[child+1] > a[child]) { ++child; } if (a[child] > a[root]) { swap(a[child], a[root]); root = child; child = 2*root+1; } else { break; } } } void HeapSort(int* a, size_t size) { // 建堆 for(int i = (size-2)/2; i >= 0; --i) { AdjustDown(a, size, i); } // 选数据排序 for (size_t i = 0; i < size; ++i) { swap(a[0], a[size-i-1]); AdjustDown(a, size-i-1, 0); } } void TestHeapSort() { int a[10] = {5, 9, 2, 3, 0, 1, 7, 8, 4, 6}; HeapSort(a, 10); }
Test.cpp:
#define _CRT_SECURE_NO_WARNINGS #include <iostream> using namespace std; //#include "HuffmanTree.h" #include "FileCompress.h" int main() { // TestHuffmanTree(); TestFileCompress(); return 0; }