利用哈夫曼树进行文件压缩

项目描述:


项目简介:利用哈夫曼编码的方式对文件进行压缩,并且对压缩文件可以解压

开发环境:windows vs2013

项目概述:

        1.压缩

            a.读取文件,将每个字符,该字符出现的次数和权值构成哈夫曼树

            b.哈夫曼树是利用小堆构成,字符出现次数少的节点指针存在堆顶,出现次数多的在堆底

            c.每次取堆顶的两个数,再将两个数相加进堆,直到堆被取完,这时哈夫曼树也建成

            d.从哈夫曼树中获取哈夫曼编码,然后再根据整个字符数组来获取出现了得字符的编码

            e.获取编码后每次凑满8位就将编码串写入到压缩文件(value处理编码1与它即可,0只移动位)

             f.写好配置文件,统计每个字符及其出现次数,并以“字符+','+次数”的形式保存到配置文件中

         2.解压

             a.读取配置文件,统计所有字符的个数

             b.构建哈夫曼树,读解压缩文件,将所读到的编码字符的这个节点所所含的字符写入到解压缩文件中,知道将压缩文件读完

             c.压缩解压缩完全完成,进行小文件大文件的测试


代码如下:

#pragma once 
#include"HuffManTree.h"
#include

struct CharInfo
{
	CharInfo(int count=0)
	:_count(count)
	{
	}

	bool operator<(const CharInfo info)
	{
		return _count < info._count;
	}

	bool operator>(const CharInfo info)
	{
		return _count>info._count;
	}

	bool operator!=(const CharInfo info)
	{
		return _count != info._count;
	}

	CharInfo operator+(const CharInfo Info)
	{
		return CharInfo(_count + Info._count);
	}

	char _ch;//字符
	int _count;//字符出现的次数
	string _code;//字符对应的编码
};

class FileCompress
{
public:
	FileCompress()
	{
		for (int i = 0; i < 256; i++)
		{
			_info[i]._ch = i;
			_info[i]._count = 0;
		}
	}

public:
	void  Compress(const char* FileName)//压缩
	{ 
		FILE* fout = fopen(FileName, "rb");
		assert(fout);
		
		//统计字符出现的次数
		int ch = fgetc(fout);
		printf("%c\n", ch);
		int count = 0;
		while (ch!= EOF)
		{
			_info[unsigned char(ch)]._count++;
			ch = fgetc(fout);
			count++;
		}

		//构建哈夫曼树
		CharInfo invalid;
		HuffManTree h(_info, 256, invalid);

		//生成哈夫曼编码
		string code;
		_GetHuffManCode(h._GetRoot(), code);

		string CompressFileName = FileName;
		CompressFileName += ".compress";
		FILE* fin = fopen(CompressFileName.c_str(), "wb");
		assert(fin);
		fseek(fout, 0, SEEK_SET);//从文件开头

		ch =(unsigned char)fgetc(fout);

		char value = 0;
		int size = 0;
		while (ch != EOF)
		{
			string _ccode = _info[(unsigned char)ch]._code;
			for (int i = 0; i < _ccode.size(); ++i)
			{
				value <<= 1;
				if (_ccode[i] =='1')
				{
					value |=1;
				}
				size++;
				if (size == 8)
				{
					fputc(value, fin);
					value = 0;
					size = 0;
				}
				
			}
			ch = fgetc(fout);
		}
		//补位
		if (size!=0)
		{
			value <<= ( 8- size);
			fputc(value, fin);
		}

		//写配置文件
		string configFileName = FileName;
		configFileName += ".config.txt";
		FILE* finConfig = fopen(configFileName.c_str(), "wb");
		assert(finConfig);
	
		string str;
		char buf[128];
		for (int i = 0; i < 256; i++)
		{
			if (_info[i]._count>0)
			{
				str += _info[i]._ch;
				str += ',';
				_itoa(_info[i]._count, buf, 10);
				str += buf;
				str += '\n';

				fputs(str.c_str(), finConfig);
				str.clear();
			}
		}
		
	

		fclose(fin);
		fclose(fout);
		fclose(finConfig);
	}

	void unCompress(const char* FileName)//解压缩
	{
		//读配置文件
		string configFileNane = FileName;
		configFileNane += ".config.txt";
		FILE* foutConfig = fopen(configFileNane.c_str(), "rb");
		assert(foutConfig);
		int count = 0;
		string str;
		while (Read_a_Line(foutConfig,str))
		{
			if (str.empty())
			{
				str += '\n';
				count += 1;
				str.clear();
			}
			//else
			//{
			//	//_info[(unsigned char)str[0]] = atoi(str.substr(2).c_str());
			//	count += _info[(unsigned char)str[0]]._count;
			//	str.clear();
			//}	
		//	_info[((unsigned char)str[0])]._count = atoi(str.substr(2).c_str());
			//count += _info[(unsigned char)str[0]]._count;

			else
			{
				unsigned char ch = str[0];
				_info[ch]._count = atoi(str.substr(2).c_str());
				count += _info[ch]._count;
				str.clear();
			}
		
		}

		CharInfo invaild;
		HuffManTree tree(_info, 256,invaild);

		string unCompressFileName = FileName;
		unCompressFileName += ".unCompress";//解压缩文件
		string CompressFileName = FileName;
		CompressFileName += ".compress";

		FILE* fout = fopen(CompressFileName.c_str(), "rb");
		assert(fout);
		FILE* fin = fopen(unCompressFileName.c_str(), "wb");
		assert(fin);

		HuffManTreeNode* root = tree._GetRoot();
		HuffManTreeNode* cur = root;
		int ch = fgetc(fout);
		int size =7;

		while (ch != EOF)
		{
			if (ch & (1 << size))
			{
				cur = cur->_right;
			}
			else
			{
				cur = cur->_left;
			}
			if (cur->_left==NULL&&cur->_right==NULL)
			{
				fputc(cur->_weight._ch, fin);
				cur = root;
				//count--;
				//if (count == 0)
				//	break;
			}
			size--;
			if (size<0)
			{
				ch=fgetc(fout);
				size = 7;
			}
		
		}

		fclose(fin);
		fclose(fout);
		fclose(foutConfig);
	}

protected:
	bool Read_a_Line(FILE*& fout,string& str)
	{
		int ch = fgetc(fout);
		if (ch == EOF)
			return false;
		
		while (ch != EOF&&ch!='\n')
		{
			str += ch;
			ch = fgetc(fout);
		}
		return true;
	}

	void _GetHuffManCode(const HuffManTreeNode* root,string code)//生成哈夫曼编码
	{
		if (root == NULL)
		{
			return;
		}
		if (root->_left == NULL&&root->_right == NULL)
		{
			_info[unsigned char((root->_weight)._ch)]._code = code;
			return;
		}
		if (root->_left)
			_GetHuffManCode(root->_left, code + '0');//左路为0
		if (root->_right)
			_GetHuffManCode(root->_right, code + '1');//右路为1
		
	}

private:
	CharInfo _info[256];
};


#pragma once 
#include
#include"Heap.h"
using namespace std;


template
struct HuffManTreeNode
{
	HuffManTreeNode(const T& weight)
	:_left(NULL)
	,_right(NULL)
	,_weight(weight)
	{
	}
	HuffManTreeNode* _left;
	HuffManTreeNode* _right;
	T _weight;
};


template
class HuffManTree
{
public:
	typedef HuffManTreeNode Node;
public:

	HuffManTree(T* arr, int size, T& invalid)//建立一个小堆
	{
		struct CompareNode
		{
			bool operator()(Node*& L,Node*& R)
			{
				return L->_weight < R->_weight;
			}
		};
		Heap MinHeap;

		for (int i = 0; i < size; i++)
		{
			if (arr[i]!=invalid)
				MinHeap.Push(new Node(arr[i]));
		}
						
		while (MinHeap.Size()>1)
		{
			Node* left = MinHeap.Top();
			MinHeap.Pop();
			Node* right = MinHeap.Top();
			MinHeap.Pop();
			Node* parent = new Node(left->_weight + right->_weight);
			parent->_left = left;
			parent->_right = right;
			MinHeap.Push(parent);
		}

		_root = MinHeap.Top();
		MinHeap.Pop();
	}

	Node* _GetRoot()
	{
		return _root;
	}

private:
	Node* _root;
};			


#pragma once 
#include

#include
using namespace std;
#include


template
struct Small
{
public:
	bool operator()(const T& l, const T& r)
	{
		return l < r;
	}
};
//
//template//可用来建大堆
//struct Big
//{
//	bool operator()(const T& l, const T& r)
//	{
//		return l > r;
//	}
//};

template>//建立小堆
class Heap
{
public:
	
	Heap()
	{
	}

	Heap(const T* arr,int size)
	{
		for (int i = 0; i < size; i++)
		{
			_v.push_back(arr[i]);
		}

		for (int i = _v.size() / 2-1; i>=0; i--)
		{
			_AdjustDown(i);
		}
	}

	~Heap()
	{}

	void Push(const T& d)
	{
		_v.push_back(d);
		_AdjustUp(_v.size()-1);
	}

	int Size()
	{
		return _v.size();
	}

	T& Top()
	{
		return *(_v.begin());
	}

	void Pop()//用交换法
	{
		swap(_v[0], _v[_v.size()-1]);
		_v.pop_back();
		_AdjustDown(0);
	}
protected:
	void _AdjustDown(int parent)//向下调整
	{
		CompareNode compareNode;
		int child = 2 * parent + 1;

		while (child < _v.size())
		{
			if (child + 1 < _v.size() && compareNode(_v[child + 1], _v[child]))//找较小的child
			{
				child++;
			}
			if (compareNode(_v[child], _v[parent]))
			{
				swap(_v[parent], _v[child]);
				parent = child;
				child = 2 * parent + 1;
			}
			else
				break;
		}
	}

	void _AdjustUp(int child)//向上调整
	{
		CompareNode compareNode;
		int parent = (child-1)/2;
		while (child>0)
		{
			/*if (child + 1 < _v.size() && compareNode(_v[child + 1], _v[child]))
			{
				child++;
			}
*/
			if (compareNode(_v[child], _v[parent]))
			{
				swap(_v[parent], _v[child]);
				child = parent;
				parent = (child - 1) / 2;
			}
			else
				break;
		}
	}
private:
	vector _v;
};


#include"FileCompress.h"

void test()
{
	FileCompress f;
	f.Compress("input.txt");

	f.unCompress("input.txt");
}


int main()
{
	test();
	system("pause");
	return 0;
}



你可能感兴趣的:(数据结构和算法)