项目之文件的压缩及解压缩

项目描述:实现文件的压缩机解压缩。
开发平台:VS2013
开发技术:堆,Huaffman树,文件输入输出函数
项目特点:1. 统计文件中字符出现的次数,利用数据结构堆建造Huffman树,出现次数多的编码短,出现次数少的编码长。
                  2. 根据建造好的Huffman树形成编码,以对文件进行压缩。

                  3.将文件中出现的字符以及他们出现的次数写入配置文件,以便后续的解压缩。 
                  4. 根据配置文件读取相关信息重建Huffman树,对压缩后的文件进行译码。

FileCompress.h:

#pragma once

#include "HuffmanTree.h"
#include<algorithm>
#include<windows.h>

typedef long long LongType;

struct FileInfo
{
	unsigned char _ch;
	LongType   _count;
	string      _code;

	FileInfo(unsigned char ch = 0)
		:_ch(ch)
		, _count(0)
	{}

	FileInfo operator+(FileInfo& fi)
	{
		FileInfo tmp;
		tmp._count = this->_count + fi._count;
		return tmp;
	}

	bool operator < (FileInfo& fi)
	{
		return this->_count < fi._count;
	}

	bool operator != (const FileInfo& fi)const
	{
		return this->_count != fi._count;
	}

};

template<class T>
class FileCompress
{
public:
	FileCompress()
	{
		for (int i = 0; i < 256; ++i)
		{
			_infos[i]._ch = i;
		}
	}

public:

	bool Compress(const char* filename)
	{
		//1.打开文件,统计文件字符出现的次数
		long long Charcount = 0;
		assert(filename);
		FILE* fOut = fopen(filename, "rb");
		assert(fOut);

		char ch = fgetc(fOut);

		while (ch != EOF)
		{
			_infos[(unsigned char)ch]._count++;
			ch = fgetc(fOut);
			Charcount++;
		}		

		//2.生成对应的huffman编码
		GenerateHuffmanCode();

		//3.压缩文件
		string compressFile = filename;
		compressFile += ".compress";
		FILE* fwCompress = fopen(compressFile.c_str(), "wb");
		assert(fwCompress);

		fseek(fOut, 0, SEEK_SET);
		ch = fgetc(fOut);
		char inch = 0;
		int index = 0;
		while (ch != EOF)
		{
			string& code = _infos[(unsigned char)ch]._code;
			for (size_t i = 0; i < code.size(); ++i)
			{
				inch = inch << 1;
				if (code[i] == '1')
				{
					inch |= 1;
				}
				if (++index == 8)
				{
					fputc(inch, fwCompress);
					inch = 0;
					index = 0;
				}
			}
			ch = fgetc(fOut);
		}

		if (index)
		{
			inch = inch << (8 - index);
			fputc(inch, fwCompress);
		}

		//4.配置文件,方便后续的解压缩
		string configFile = filename;
		configFile += ".config";
		FILE *fconfig = fopen(configFile.c_str(), "wb");
		assert(fconfig);

		char CountStr[128];
		_itoa(Charcount >> 32, CountStr, 10);
		fputs(CountStr, fconfig);
		fputc('\n', fconfig);
		_itoa(Charcount & 0xffffffff, CountStr, 10);
		fputs(CountStr, fconfig);
		fputc('\n', fconfig);

		FileInfo invalid;
		for (int i = 0; i < 256; i++)
		{
			if (_infos[i] != invalid)
			{
				fputc(_infos[i]._ch, fconfig);
				fputc(',', fconfig);
				fputc(_infos[i]._count + '0', fconfig);
				fputc('\n', fconfig);
			}
		}

		fclose(fOut);
		fclose(fwCompress);
		fclose(fconfig);

		return true;
	}

	bool UnCompresss(const char* filename)
	{
		string configfile = filename;
		configfile += ".config";
		FILE* outConfig = fopen(configfile.c_str(), "rb");
		assert(outConfig);
		char ch;
		long long Charcount = 0;
		string line = ReadLine(outConfig);
		Charcount = atoi(line.c_str());
		Charcount <<= 32;
		line.clear();
		line = ReadLine(outConfig);
		Charcount += atoi(line.c_str());
		line.clear();

		while (feof(outConfig))
		{
			line = ReadLine(outConfig);
			if (!line.empty())
			{
				ch = line[0];
				_infos[(unsigned char)ch]._count = atoi(line.substr(2).c_str());
				line.clear();
			}
			else
			{
				line = '\n';
			}
		}

		HuffmanTree<FileInfo> ht;
		FileInfo invalid;
		ht.CreatTree(_infos, 256, invalid);

		HuffmanTreeNode<FileInfo>* root = ht.GetRootNode();

		string	UnCompressFile = filename;
		UnCompressFile += ".uncompress";
		FILE* fOut = fopen(UnCompressFile.c_str(), "wb");

		string CompressFile = filename;
		CompressFile += ".compress";
		FILE* fIn = fopen(CompressFile.c_str(), "rb");

		int pos = 8;
		HuffmanTreeNode<FileInfo>* cur = root;
		ch = fgetc(fIn);

		while ((unsigned char)ch != EOF)
		{
			--pos;
			if ((unsigned char)ch &(1 << pos))
			{
				cur = cur->_right;
			}
			else
			{
				cur = cur->_left;
			}
			if (cur->_left == NULL && cur->_right == NULL)
			{
				fputc(cur->_weight._ch, fOut);
				cur = root;
				Charcount--;
			}
			if (pos == 0)
			{
				ch = fgetc(fIn);
				pos = 8;
			}
			if (Charcount == 0)
			{
				break;
			}
		}

		fclose(outConfig);
		fclose(fIn);
		fclose(fOut);
		return true;
	}

protected:

	string ReadLine(FILE* fConfig)
	{
		char ch = fgetc(fConfig);
		if(ch == EOF)
		{
			return "";
		}
		string line;
		while (ch != '\n' && ch != EOF)
		{
			line += ch;
			ch = fgetc(fConfig);
		}
		return line;
	}

	void GenerateHuffmanCode()
	{
		HuffmanTree<FileInfo> hft;
		FileInfo invalid;
		hft.CreatTree(_infos, 256, invalid);
		_GenerateHuffmanCode(hft.GetRootNode());
	}

	void _GenerateHuffmanCode(HuffmanTreeNode<FileInfo>* root)
	{
		if (root == NULL)
		{
			return;
		}

		_GenerateHuffmanCode(root->_left);
		_GenerateHuffmanCode(root->_right);

		if (root->_left == NULL && root->_right == NULL)
		{
			HuffmanTreeNode<FileInfo>* cur = root;
			HuffmanTreeNode<FileInfo>* parent = cur->_parent;
			string& code = _infos[cur->_weight._ch]._code;

			while (parent)
			{
				if (parent->_left == cur)
				{
					code += '0';
				}
				else if (parent->_right == cur)
				{
					code += '1';
				}
				cur = parent;
				parent = cur->_parent;
			}

			reverse(code.begin(), code.end());
		}
	}

private:
	FileInfo _infos[256];
};

void TestFileCompress()
{

	FileCompress<FileInfo> fc;

	int begin1 = GetTickCount();
	fc.Compress("test.txt");
	int end1 = GetTickCount();
	cout << end1 - begin1 << endl;

	int begin2 = GetTickCount();
	fc.UnCompresss("test.txt");
	int end2 = GetTickCount();
	cout << end2 - begin2 << endl;

}

HuffmanTree.h:

#pragma once

#include "Heap.h"
#include<assert.h>


template<class T>
struct HuffmanTreeNode
{
	HuffmanTreeNode<T>* _left;
	HuffmanTreeNode<T>* _right;
	HuffmanTreeNode<T>* _parent;
	T _weight;	

	HuffmanTreeNode(const T& x)
		:_weight(x)
		, _left(NULL)
		, _right(NULL)
		, _parent(NULL)
	{}
};

template<class T>
class HuffmanTree
{
	typedef HuffmanTreeNode<T> Node;

public:

	HuffmanTree()
		:_root(NULL)
	{}

	~HuffmanTree()
	{
		Destory(_root);
	}

	template <class T>
	struct NodeCompare
	{
		bool operator()(Node *l, Node *r)
		{
			return l->_weight < r->_weight;
		}
	};

public:
	void CreatTree(const T* a, size_t size, const T& invalid)
	{
		assert(a);
		Heap<Node*, NodeCompare<T>> minHeap;
		for (size_t i = 0; i < size; ++i)
		{
			if (a[i] != invalid)
			{
				Node* node = new Node(a[i]);
				minHeap.Push(node);
			}
		}

		while (minHeap.Size() > 1)
		{
			Node* left = minHeap.Top();
			minHeap.Pop();
			Node* right = minHeap.Top();
			minHeap.Pop();

			Node* parent = new Node(left->_weight + right->_weight);
			parent->_left = left;
			parent->_right = right;
			left->_parent = parent;
			right->_parent = parent;

			minHeap.Push(parent);
		}

		_root = minHeap.Top();
	}

	Node* GetRootNode()
	{
		return _root;
	}

	void Destory(Node* root)
	{
		if (root)
		{
			Destory(root->_left);
			Destory(root->_right);
			delete root;
			root = NULL;
		}
	}

private:
	HuffmanTreeNode<T>* _root;
};

void TestHuffmanTree()
{
	int a[10] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 };
	HuffmanTree<int> hf;
	hf.CreatTree(a, 10, -1);
}

Heap.h:

#pragma once
#include <vector>
#include<assert.h>

// 小堆
template<class T>
struct Less
{
	bool operator() (const T& l, const T& r)
	{
		return l < r; // operator<
	}
};

template<class T>
struct Greater
{
	bool operator() (const T& l, const T& r)
	{
		return l > r; // operator<
	}
};

template<class T, class Compare = Less<T>>
class Heap
{
public:
	Heap()
	{}

	Heap(const T* a, size_t size)
	{
		for (size_t i = 0; i < size; ++i)
		{
			_arrays.push_back(a[i]);
		}

		// 建堆
		for(int i = (_arrays.size()-2)/2; i >= 0; --i)
		{
			AdjustDown(i);
		}
	}

	void Push(const T& x)
	{
		_arrays.push_back(x);
		AdjustUp(_arrays.size()-1);
	}

	void Pop()
	{
		assert(_arrays.size() > 0);
		swap(_arrays[0], _arrays[_arrays.size() - 1]);
		_arrays.pop_back();

		AdjustDown(0);
	}

	T& Top()
	{
		assert(_arrays.size() > 0);
		return _arrays[0];
	}

	bool Empty()
	{
		return _arrays.empty();
	}

	int Size()
	{
		return _arrays.size();
	}

	void AdjustDown(int root)
	{
		int child = root*2 + 1;
		// 	
		Compare com;
		while (child < _arrays.size())
		{
			// 比较出左右孩子中小的那个
			//if (child+1<_arrays.size() &&
			//	_arrays[child+1] > _arrays[child])
			if(child+1<_arrays.size() &&
				com(_arrays[child+1],_arrays[child]))
			{
				++child;
			}

			//if(_arrays[child] > _arrays[root])
			if(com(_arrays[child],_arrays[root]))
			{
				swap(_arrays[child], _arrays[root]);
				root = child;
				child = 2*root+1;
			}
			else
			{
				break;
			}
		}
	}

	void AdjustUp(int child)
	{
		int parent = (child-1)/2;

		//while (parent >= 0)
		while (child > 0)
		{
			//if (_arrays[child] > _arrays[parent])
			if (Compare()(_arrays[child],_arrays[parent]))
			{
				swap(_arrays[parent], _arrays[child]);
				child = parent;
				parent = (child-1)/2;
			}
			else
			{
				break;
			}
		}
	}

	void Print()
	{
		for (size_t i = 0; i < _arrays.size(); ++i)
		{
			cout<<_arrays[i]<<" ";
		}
		cout<<endl;
	}

public:
	/*T* _array;
	size_t _size;
	size_t _capacity;*/
	vector<T> _arrays;
};

template<class T>
class PriorityQueue
{
public:
	void Push(const T& x)
	{
		_hp.Push(x);
	}

	void Pop()
	{
		_hp.Pop();
	}

public:
	Heap<T> _hp;
};

void Test1()
{
	int a[10] = {10, 11, 13, 12, 16, 18, 15, 17, 14, 19};
	Heap<int, Greater<int> > hp1(a, 10);
	hp1.Push(1);
	hp1.Print();

	Heap<int> hp2(a, 10);
	hp2.Push(1);
	hp2.Print();


	//Less<int> less;
	//cout<<less(1, 2)<<endl;

	//Greater<int> greater;
	//cout<<greater(1, 2)<<endl;
}

#include <list>

void Test2()
{
	vector<int> v1;
	v1.push_back(1);
	v1.push_back(2);
	v1.push_back(3);
	v1.push_back(4);

	// [)
	vector<int>::iterator it = v1.begin();
	while (it != v1.end())
	{
		cout<<*it<<" ";
		++it;
	}
	cout<<endl;


	list<int> l1;
	l1.push_back(1);
	l1.push_back(2);
	l1.push_back(3);
	l1.push_back(4);

	list<int>::iterator listIt = l1.begin();
	while (listIt != l1.end())
	{
		cout<<*listIt<<" ";
		++listIt;
	}

	cout<<endl;
}

void AdjustDown(int* a, size_t size, int root)
{
	int child = root*2+1;
	while (child < size)
	{
		if (child+1 < size && a[child+1] > a[child])
		{
			++child;
		}

		if (a[child] > a[root])
		{
			swap(a[child], a[root]);
			root = child;
			child = 2*root+1;
		}
		else
		{
			break;
		}
	}
}


void HeapSort(int* a, size_t size)
{
	// 建堆
	for(int i = (size-2)/2; i >= 0; --i)
	{
		AdjustDown(a, size, i);
	}

	// 选数据排序
	for (size_t i = 0; i < size; ++i)
	{
		swap(a[0], a[size-i-1]);
		AdjustDown(a, size-i-1, 0);
	}
}

void TestHeapSort()
{
	int a[10] = {5, 9, 2, 3, 0, 1, 7, 8, 4, 6};
	HeapSort(a, 10);
}


Test.cpp:

#define _CRT_SECURE_NO_WARNINGS

#include <iostream>
using namespace std;

//#include "HuffmanTree.h"
#include "FileCompress.h"

int main()
{
//	TestHuffmanTree();
	TestFileCompress();
	return 0;
}



你可能感兴趣的:(数据结构,项目)