基于Huffman编码的压缩和解压缩小软件(附C++源码)

压缩前:

Alt

将pic.png拖到.exe文件上,可得到.zLzip压缩文件:

Alt

编码过程:

基于Huffman编码的压缩和解压缩小软件(附C++源码)_第1张图片

压缩过程:

基于Huffman编码的压缩和解压缩小软件(附C++源码)_第2张图片

将.zLzip压缩文件拖回可解压缩得到原文件:

Alt
基于Huffman编码的压缩和解压缩小软件(附C++源码)_第3张图片
基于Huffman编码的压缩和解压缩小软件(附C++源码)_第4张图片
基于Huffman编码的压缩和解压缩小软件(附C++源码)_第5张图片

顺便一提,当原文件内字符分布均衡时,其信息熵很低,压缩效果不太好。

代码如下(编译器是TDM-GCC 4.9.2 64-bit Release, C++11标准)

main.cpp文件:

#include "HuffmanEncoderCompress.h"

void test();
void test2(int argc, char** argv);

int main(int argc, char** argv) {
	

//	test();					// 本地测试
	
	test2(argc, argv);		// 界面拖动 
	
	return 0;
}


// 本地测试 
void test() {
	
	HuffmanEncoderCompress* hecp = new HuffmanEncoderCompress("test\\pic.png");
	
	hecp->run();
//	hecp->printInfo();

	int n = 0;
	for (int i = 0; i < n; ++i) {
		HuffmanEncoderCompress* hecp2 = new HuffmanEncoderCompress(hecp->getOutputFileName(), true);
		hecp2->run();
		delete hecp;
		hecp = hecp2;
	}
	
	delete hecp;
}


// 读取输入(可在界面中拖动) 
void test2(int argc, char** argv) {
	if (argc < 2) exit(-1);
	
	HuffmanEncoderCompress* hecp = new HuffmanEncoderCompress(argv[1]);

	hecp->run();
	
	delete hecp;
	getchar();		// 防止运行界面闪退 
}

HuffmanEncoderCompress.h头文件:

#ifndef __HUFFMAN_ENCODER_COMPRESS_H__
#define __HUFFMAN_ENCODER_COMPRESS_H__

#include <string>
using std::string;


constexpr int _CODE_NUM = 256;			// 码数 
constexpr int _ZIP_NAME_LEN = 16;		// 压缩识别符长度 
constexpr int _FILE_NAME_LEN = 256;		//  文件名长度 
constexpr int _FILE_SIZE_LEN = 8; 		// 文件大小值长度 
constexpr int _CODE_FREQUENCY_LEN = 8;	//  码频值长度 
constexpr int _ZLZIP_HEAD_LEN = _ZIP_NAME_LEN + _FILE_NAME_LEN + _FILE_SIZE_LEN 	// 头文件信息长度 
		+ _CODE_FREQUENCY_LEN * _CODE_NUM;	
static const char _ZIP_NAME[_ZIP_NAME_LEN] = "zLimbo zLzip";	// 压缩 识别符 
		
		
// 编码类 
struct Code {
	unsigned char oldCode;				// 旧码 
	unsigned long long frequency;		// 频率 
	unsigned long long newCode;			// 新码 
	string newCodeStr;					// 新码字符串表示 debug 
	int length;							// 新码长度 
	
	Code(): oldCode(0), frequency(0), newCode(0), length(0) { }
};


// Huffman 节点 
struct HuffmanTreeNode {
	
	unsigned long long weight;			// 权重 
	Code* codePtr; 						// 指向码(叶子节点才有指向) 
	HuffmanTreeNode* left;				// 左分支 
	HuffmanTreeNode* right;				// 右分支 

	
	HuffmanTreeNode(unsigned long long w = 0, Code* cp = nullptr, 
			HuffmanTreeNode* l = nullptr, HuffmanTreeNode* r = nullptr):
		weight(w), codePtr(cp), left(l), right(r)
	{ }
};


// 自定义比较器,用于优先队列 
class CmparatorOfHuffmanTreeNode {
	public:
		 bool operator() (HuffmanTreeNode*& lhs, HuffmanTreeNode*& rhs) const {
	        return lhs->weight > rhs->weight;
	    }
};


class HuffmanEncoderCompress {
	
	private:
		
		Code _codes[_CODE_NUM];		// 码 
		
		bool _isCompress; 				// 是否压缩(用于多次压缩 ) 
		
		string _inputFileName;			// 输入文件名 
		string _outputFileName;			// 输出文件名 
		
		unsigned long long _inputFileSize;		// 输入文件大小 
		unsigned long long _outputFileSize;		// 输出文件大小 
		
		HuffmanTreeNode* _huffmanTreeRoot;		// Huffman树根节点 
	
	
	private:
		// 释放节点 
		freeNode(HuffmanTreeNode* np);
	
	public:
		// 构造 
		HuffmanEncoderCompress(const string& inputFileName, bool isCompress = false);
		
		// 析构 
		~HuffmanEncoderCompress();
		
		// 无法拷贝构造 
		HuffmanEncoderCompress(const HuffmanEncoderCompress& hec) = delete;
		
		// 运行 
		void run();
		
		// 统计频率 
		void statisticalFrequency();
		
		// 构建 Huffman 树 
		HuffmanTreeNode* buildHuffmanTree();
		
		// 获得 Huffman 编码 
		void getNewCodes(HuffmanTreeNode* np, unsigned long long newCode, string newCodeStr, int length);
		
		// 压缩文件 
		void compress();
		
		// 搜索节点,辅助于解码 
		bool findNode(HuffmanTreeNode*& np, unsigned char inputByte, int& pos);
		
		// 解压缩 
		void uncompress();
		
		// 打印哈夫曼编码信息 
		void printHuffmanEncodeInfo();
		
		// 打印压缩或解压缩信息 
		void printInfo(const char* type);
		
		// 获取输出文件名 
		string getOutputFileName() const { return _outputFileName; }
		
		// 判断两个文件是否相等 
		bool compare2File(const string& fileName1, const string& fileName2);
};


#endif

HuffmanEncoderCompress.cpp 文件

#include "HuffmanEncoderCompress.h"

#include <cstdio>
#include <cstring>
#include <queue>
using namespace std;


HuffmanEncoderCompress::HuffmanEncoderCompress(const string& inputFileName, bool isCompress):
		_isCompress(isCompress),
		_inputFileName(inputFileName), _outputFileName(inputFileName+".zLzip"), 
		_inputFileSize(0), _outputFileSize(0), _huffmanTreeRoot(nullptr)
{	
	for (int i = 0; i < _CODE_NUM; ++i) _codes[i].oldCode = i;
}


// 释放节点及其子树 
HuffmanEncoderCompress::freeNode(HuffmanTreeNode* np) {
	if (np) {
		freeNode(np->left);
		freeNode(np->right);
		delete np; 
	}
} 
	

// 析构 
HuffmanEncoderCompress::~HuffmanEncoderCompress() {
	freeNode(_huffmanTreeRoot); 
}


// 运行 
void HuffmanEncoderCompress::run() {
	
	FILE *inputFp = NULL;
	if ((inputFp = fopen(_inputFileName.c_str(), "rb")) == NULL) {
		printf("open file %s failed!\n", _inputFileName.c_str());
		exit(-1);
	}
	
	char zipName[_ZIP_NAME_LEN];
	fread(zipName, _ZIP_NAME_LEN, 1, inputFp);

	if (_isCompress || strcmp(zipName, _ZIP_NAME)) {		// 无识别符,非压缩文件 
		
		fclose(inputFp); 
		printf("开始压缩文件%s......\n", _inputFileName.c_str()); 
		printf("正在统计频率......\n");
		statisticalFrequency();			// 统计频率 
		printf("正在构建哈夫曼树......\n");
		_huffmanTreeRoot = buildHuffmanTree();		//  构建哈夫曼树 
		printf("正在产生新编码......\n");
		getNewCodes(_huffmanTreeRoot, 0, string(), 0);	// 获得新编码 
		printHuffmanEncodeInfo();
		printf("正在压缩......\n");
		compress();		// 压缩 
		printInfo("压缩"); 
		printf("压缩成功\n");
		
	} else {		// 有识别符,是压缩文件 
		
		printf("开始解压缩文件%s......\n", _inputFileName.c_str());
		printf("读取原始文件信息......\n"); 
		char outputFileName[_FILE_NAME_LEN];
		fread(outputFileName, _FILE_NAME_LEN, 1, inputFp);	// 读入原文件名 
		printf("原始文件名为%s\n", outputFileName); 
		_outputFileName = string(outputFileName); 
		fread(&_outputFileSize, _FILE_SIZE_LEN, 1, inputFp); 	// 读入原文件大小
		
		for (int i = 0; i < _CODE_NUM; ++i) 		// 读入字符频率表 
			fread(&_codes[i].frequency, _CODE_FREQUENCY_LEN, 1, inputFp);
			
		fclose(inputFp);
		printf("正在构建哈夫曼树......\n");
		_huffmanTreeRoot = buildHuffmanTree();		//  构建哈夫曼树 
		printf("正在产生新编码......\n");
		getNewCodes(_huffmanTreeRoot, 0, string(), 0);	// 获得新编码
		printHuffmanEncodeInfo();
		printf("正在解压缩......\n");
		uncompress();	// 解压缩 
		printInfo("解压缩");
		printf("解压成功\n");
	} 
}

// 统计频率 
void HuffmanEncoderCompress::statisticalFrequency() {
	// 打开文件 
	FILE *inputFp = NULL;
	if ((inputFp = fopen(_inputFileName.c_str(), "rb")) == NULL) {
		printf("open file %s failed!\n", _inputFileName.c_str());
		exit(-1);
	}
	// 统计频率 
	while (!feof(inputFp)) {
		unsigned char inputByte;
		fread(&inputByte, 1, 1, inputFp);
		if (feof(inputFp)) break;
		++_codes[inputByte].frequency;	
		++_inputFileSize;
	}
	fclose(inputFp);	
}


// 构建 Huffman 树 
HuffmanTreeNode* HuffmanEncoderCompress::buildHuffmanTree() {
	// 使用优先队列,自定义比较器 
	priority_queue<HuffmanTreeNode*, vector<HuffmanTreeNode*>, CmparatorOfHuffmanTreeNode> nps;
	// 初始化叶子节点 
	for (int i = 0; i < _CODE_NUM; ++i)
		nps.push(new HuffmanTreeNode(_codes[i].frequency, &_codes[i]));
	// 取最小两个节点合并 
	while (true) {
		HuffmanTreeNode* np1 = nps.top(); nps.pop();
		HuffmanTreeNode* np2 = nps.top(); nps.pop();
		HuffmanTreeNode* np3 = new HuffmanTreeNode(np1->weight + np2->weight, nullptr, np1, np2);
		if (!nps.empty()) nps.push(np3);	
		else return np3;	// 优先队列为空则构成 Huffman 树 
	}
}


// 获得 Huffman 编码 
void HuffmanEncoderCompress::getNewCodes(HuffmanTreeNode* np, unsigned long long newCode, string newCodeStr, int length) {
	// 是叶子节点则结束,记录编码 
	if (np->codePtr) {
		np->codePtr->newCode = newCode;
		np->codePtr->newCodeStr = newCodeStr;
		np->codePtr->length = length;
		return;
	}
	// 否则向左右分支探索 
	newCode <<= 1;
	++length; 
	if (np->left) getNewCodes(np->left, newCode, newCodeStr+"0", length);
	if (np->right) getNewCodes(np->right, newCode+1, newCodeStr+"1", length); 	
} 


// 压缩文件 
void HuffmanEncoderCompress::compress() {
	
	FILE *inputFp = NULL;
	if ((inputFp = fopen(_inputFileName.c_str(), "rb")) == NULL) {
		printf("open file %s failed!\n", _inputFileName.c_str());
		exit(-1);
	}
	
	FILE *outputFp = NULL;
	if ((outputFp = fopen(_outputFileName.c_str(), "wb")) == NULL) {
		printf("open file %s failed!\n", _outputFileName.c_str());
		exit(-1);
	}
	
	// 写入压缩文件头信息 
	fwrite(_ZIP_NAME, _ZIP_NAME_LEN, 1, outputFp);				// 识别符		 
	fwrite(_inputFileName.c_str(), _FILE_NAME_LEN, 1, outputFp);	// 文件名 
	fwrite(&_inputFileSize, _FILE_SIZE_LEN, 1, outputFp); 			// 文件大小 
	for (int i = 0; i < _CODE_NUM; ++i) 				// 字符频率,用以构建Huffman树 
		fwrite(&_codes[i].frequency, _CODE_FREQUENCY_LEN, 1, outputFp);
	
	// 压缩所需临时辅助变量 
	unsigned char inputByte;
	unsigned char outputByte = 0;
	unsigned long long newCode;
	int length;
	int cnt = 0;
	unsigned long long currentInputSize = 0;
	unsigned long long currentOutputSize = 0;
	double currRate = 0.0;
//	string s;		//debug
	
	while (!feof(inputFp)) {
		
		fread(&inputByte, 1, 1, inputFp);
		if (feof(inputFp)) break;
		
		// 记录速率 
		double rate = (double)(++currentInputSize) / _inputFileSize * 100;
		if (rate - currRate >= 10) {
			currRate = rate;
		//	system("cls");
			printf("已压缩:%.1f%%\t压缩率:%.2f%%\n", currRate, (double)_outputFileSize/currentInputSize*100);
		} 
		
		newCode = _codes[(int)inputByte].newCode;
		length = _codes[(int)inputByte].length;
//		printf("# %d(%s) ", newCode, _codes[(int)inputByte].newCodeStr.c_str());	//debug
		
		while (length--) {
			
			outputByte <<= 1;
			outputByte += (newCode>>length) & 1;
			
//			s += (newCode >> length) & 1 ? "1" : "0"; 		//debug
			if (++cnt == 8) {
				
//				printf("\t %d(%s)\n", (int)outputByte, s.c_str());								//debug
//				s.clear();																		//debug
//				system("pause");																//debug
//				if (length > 0) printf("# %s ", _codes[(int)inputByte].newCodeStr.c_str());		//debug
				fwrite(&outputByte, 1, 1, outputFp);
				outputByte = 0;
				cnt = 0;
				++_outputFileSize;
			}
		} 
	}
	
	if (cnt < 8) {	 // 最后一个不足8比特填充 0 
		outputByte <<= 8-cnt;
		fwrite(&outputByte, 1, 1, outputFp);
		++_outputFileSize;
	}
	
	//system("cls");
	// 打印压缩信息 
	printf("已压缩:%.1f%%\t压缩率:%.2f%%\n", 100.0, (double)_outputFileSize/currentInputSize*100);	
	
	fclose(inputFp);
	fclose(outputFp);
}


// 搜索节点,辅助于解码 
bool HuffmanEncoderCompress::findNode(HuffmanTreeNode*& np, unsigned char inputByte, int& pos) {
	// 叶子节点则搜索成功 
	if (np->codePtr) return true;
	if (pos < 0) return false;
	int val = (inputByte >> pos) & 1;
	--pos;
	if (val == 0) { 
		np = np->left;
		return findNode(np, inputByte, pos);
	} else {
		np = np->right;
		return findNode(np, inputByte, pos);
	}
}


// 解压缩 
void HuffmanEncoderCompress::uncompress() {
	
	FILE *inputFp = NULL;
	if ((inputFp = fopen(_inputFileName.c_str(), "rb")) == NULL) {
		printf("open file %s failed!\n", _inputFileName.c_str());
		exit(-1);
	}
	
	FILE *outputFp = NULL;
	if ((outputFp = fopen(_outputFileName.c_str(), "wb")) == NULL) {
		printf("open file %s failed!\n", _outputFileName.c_str());
		exit(-1);
	}
	
	fseek(inputFp, _ZLZIP_HEAD_LEN, SEEK_SET);
	_inputFileSize = _ZLZIP_HEAD_LEN;
	
	// 解压缩所需临时变量 
	unsigned char inputByte;
	unsigned char outputByte;
	HuffmanTreeNode* np = _huffmanTreeRoot;
	unsigned long long currentOutputSize = 0;
	int pos;
	double currRate = 0.0;
	
	while (!feof(inputFp)) {
		
		fread(&inputByte, 1, 1, inputFp);
		if (feof(inputFp)) break;
		++_inputFileSize; 
		pos = 7;
		
//		printf("%d(", (int)inputByte);				//debug
//		for (int i = 7; i >= 0; --i) {				//debug
//			printf("%d", (inputByte >> i) & 1);		//debug
//		}											//debug
//		printf(") ");								//debug
		
		while (findNode(np, inputByte, pos)) {
			
//			if (!np) printf("np = nullptr\n");						//debug
//			if (!np->codePtr) printf("np->codePtr = nullptr\n");	//debug
			
			outputByte = np->codePtr->oldCode;
//			printf("\tfind: %d(%s) ", (int)outputByte, np->codePtr->newCodeStr.c_str());	//debug
				
			fwrite(&outputByte, 1, 1, outputFp);
			
			// 记录速率 
			double rate = (double)(++currentOutputSize) / _outputFileSize * 100;
			if (rate - currRate >= 10) {
				currRate = rate;
			//	system("cls");
				printf("已解压缩:%.1f%%\t解压缩率:%.2f%%\n", currRate, (double)currentOutputSize/_inputFileSize*100);
			} 
			
			if (currentOutputSize == _outputFileSize) {
				printf("已解压缩:%.1f%%\t解压缩率:%.2f%%\n", 100.0, (double)currentOutputSize/_inputFileSize*100);
				break;
			}
			
			np = _huffmanTreeRoot;
		} 
		
//		if (!findNode(np, inputByte, pos)) {		//debug
//			printf("\n");							//debug
//			system("pause");						//debug
//		}
	}
	fclose(inputFp);
	fclose(outputFp);
}


// 打印哈夫曼编码信息 
void HuffmanEncoderCompress::printHuffmanEncodeInfo() {
	//printf("file size: %d B, %f KB, %f MB\n", _inputFileSize, _inputFileSize/1024.0, _inputFileSize/1024.0/1024.0); 
	printf("%-10s %-10s %-20s %-5s %-10s\n", "原码", "频率", "哈夫曼编码", "长度", "十进制");
	
	for (int i = 0; i < _CODE_NUM; ++i) {
		Code &code = _codes[i];
		printf("%-10d %-10llu %-20s %-5d %-10llu\n", (int)code.oldCode, code.frequency, 
				code.newCodeStr.c_str(), code.length, code.newCode);
	}
}


// 打印压缩或解压缩信息 
void HuffmanEncoderCompress::printInfo(const char* type) {
	
	double compressRate = (double)_outputFileSize / _inputFileSize * 100;
	printf("%s率:%.2f%%\n", type, compressRate); 
	double inputFileSize = _inputFileSize, outputFileSize = _outputFileSize;
	if (inputFileSize < 1024) {
		printf("输入文件大小:%.2fB, 输出文件大小:%.2fB\n", inputFileSize, outputFileSize);
		return;
	} 
	
	inputFileSize /= 1024; outputFileSize /= 1024;
	if (inputFileSize < 1024) {
		printf("输入文件大小:%.2fKB, 输出文件大小:%.2fKB\n", inputFileSize, outputFileSize);
		return;
	} 
	
	inputFileSize /= 1024; outputFileSize /= 1024;
	if (inputFileSize < 1024) {
		printf("输入文件大小:%.2fMB, 输出文件大小:%.2fMB\n", inputFileSize, outputFileSize);
		return;
	} 
	
	inputFileSize /= 1024; outputFileSize /= 1024;
	if (inputFileSize < 1024) {
		printf("输入文件大小:%.2fGB, 输出文件大小:%.2fGB\n", inputFileSize, outputFileSize);
		return;
	} 
}


// 判断两个文件是否相等 
bool HuffmanEncoderCompress::compare2File(const string& fileName1, const string& fileName2) {
	
	FILE *fp1 = NULL;
	if ((fp1 = fopen(fileName1.c_str(), "rb")) == NULL) {
		printf("open file %s failed!\n", fileName1.c_str());
		exit(-1);
	}
	
	FILE *fp2 = NULL;
	if ((fp2 = fopen(fileName2.c_str(), "rb")) == NULL) {
		printf("open file %s failed!\n", fileName2.c_str());
		exit(-1);
	}
	
	while (!feof(fp1) && !feof(fp2)) {
		unsigned char uch1, uch2;
		
		fread(&uch1, 1, 1, fp1);
		fread(&uch2, 1, 1, fp2);
		
		if (uch1 != uch2) return false;
	}
	
	if (!feof(fp1) || !feof(fp2)) return false;
	
	return true;
}

你可能感兴趣的:(基于Huffman编码的压缩和解压缩小软件(附C++源码))