压缩前:
将pic.png拖到.exe文件上,可得到.zLzip压缩文件:
编码过程:
压缩过程:
将.zLzip压缩文件拖回可解压缩得到原文件:
顺便一提,当原文件内字符分布均衡时,其信息熵很低,压缩效果不太好。
代码如下(编译器是TDM-GCC 4.9.2 64-bit Release, C++11标准)
main.cpp文件:
#include "HuffmanEncoderCompress.h"
void test();
void test2(int argc, char** argv);
int main(int argc, char** argv) {
test2(argc, argv);
return 0;
}
void test() {
HuffmanEncoderCompress* hecp = new HuffmanEncoderCompress("test\\pic.png");
hecp->run();
int n = 0;
for (int i = 0; i < n; ++i) {
HuffmanEncoderCompress* hecp2 = new HuffmanEncoderCompress(hecp->getOutputFileName(), true);
hecp2->run();
delete hecp;
hecp = hecp2;
}
delete hecp;
}
void test2(int argc, char** argv) {
if (argc < 2) exit(-1);
HuffmanEncoderCompress* hecp = new HuffmanEncoderCompress(argv[1]);
hecp->run();
delete hecp;
getchar();
}
HuffmanEncoderCompress.h头文件:
#ifndef __HUFFMAN_ENCODER_COMPRESS_H__
#define __HUFFMAN_ENCODER_COMPRESS_H__
#include <string>
using std::string;
constexpr int _CODE_NUM = 256;
constexpr int _ZIP_NAME_LEN = 16;
constexpr int _FILE_NAME_LEN = 256;
constexpr int _FILE_SIZE_LEN = 8;
constexpr int _CODE_FREQUENCY_LEN = 8;
constexpr int _ZLZIP_HEAD_LEN = _ZIP_NAME_LEN + _FILE_NAME_LEN + _FILE_SIZE_LEN
+ _CODE_FREQUENCY_LEN * _CODE_NUM;
static const char _ZIP_NAME[_ZIP_NAME_LEN] = "zLimbo zLzip";
struct Code {
unsigned char oldCode;
unsigned long long frequency;
unsigned long long newCode;
string newCodeStr;
int length;
Code(): oldCode(0), frequency(0), newCode(0), length(0) { }
};
struct HuffmanTreeNode {
unsigned long long weight;
Code* codePtr;
HuffmanTreeNode* left;
HuffmanTreeNode* right;
HuffmanTreeNode(unsigned long long w = 0, Code* cp = nullptr,
HuffmanTreeNode* l = nullptr, HuffmanTreeNode* r = nullptr):
weight(w), codePtr(cp), left(l), right(r)
{ }
};
class CmparatorOfHuffmanTreeNode {
public:
bool operator() (HuffmanTreeNode*& lhs, HuffmanTreeNode*& rhs) const {
return lhs->weight > rhs->weight;
}
};
class HuffmanEncoderCompress {
private:
Code _codes[_CODE_NUM];
bool _isCompress;
string _inputFileName;
string _outputFileName;
unsigned long long _inputFileSize;
unsigned long long _outputFileSize;
HuffmanTreeNode* _huffmanTreeRoot;
private:
freeNode(HuffmanTreeNode* np);
public:
HuffmanEncoderCompress(const string& inputFileName, bool isCompress = false);
~HuffmanEncoderCompress();
HuffmanEncoderCompress(const HuffmanEncoderCompress& hec) = delete;
void run();
void statisticalFrequency();
HuffmanTreeNode* buildHuffmanTree();
void getNewCodes(HuffmanTreeNode* np, unsigned long long newCode, string newCodeStr, int length);
void compress();
bool findNode(HuffmanTreeNode*& np, unsigned char inputByte, int& pos);
void uncompress();
void printHuffmanEncodeInfo();
void printInfo(const char* type);
string getOutputFileName() const { return _outputFileName; }
bool compare2File(const string& fileName1, const string& fileName2);
};
#endif
HuffmanEncoderCompress.cpp 文件
#include "HuffmanEncoderCompress.h"
#include <cstdio>
#include <cstring>
#include <queue>
using namespace std;
HuffmanEncoderCompress::HuffmanEncoderCompress(const string& inputFileName, bool isCompress):
_isCompress(isCompress),
_inputFileName(inputFileName), _outputFileName(inputFileName+".zLzip"),
_inputFileSize(0), _outputFileSize(0), _huffmanTreeRoot(nullptr)
{
for (int i = 0; i < _CODE_NUM; ++i) _codes[i].oldCode = i;
}
HuffmanEncoderCompress::freeNode(HuffmanTreeNode* np) {
if (np) {
freeNode(np->left);
freeNode(np->right);
delete np;
}
}
HuffmanEncoderCompress::~HuffmanEncoderCompress() {
freeNode(_huffmanTreeRoot);
}
void HuffmanEncoderCompress::run() {
FILE *inputFp = NULL;
if ((inputFp = fopen(_inputFileName.c_str(), "rb")) == NULL) {
printf("open file %s failed!\n", _inputFileName.c_str());
exit(-1);
}
char zipName[_ZIP_NAME_LEN];
fread(zipName, _ZIP_NAME_LEN, 1, inputFp);
if (_isCompress || strcmp(zipName, _ZIP_NAME)) {
fclose(inputFp);
printf("开始压缩文件%s......\n", _inputFileName.c_str());
printf("正在统计频率......\n");
statisticalFrequency();
printf("正在构建哈夫曼树......\n");
_huffmanTreeRoot = buildHuffmanTree();
printf("正在产生新编码......\n");
getNewCodes(_huffmanTreeRoot, 0, string(), 0);
printHuffmanEncodeInfo();
printf("正在压缩......\n");
compress();
printInfo("压缩");
printf("压缩成功\n");
} else {
printf("开始解压缩文件%s......\n", _inputFileName.c_str());
printf("读取原始文件信息......\n");
char outputFileName[_FILE_NAME_LEN];
fread(outputFileName, _FILE_NAME_LEN, 1, inputFp);
printf("原始文件名为%s\n", outputFileName);
_outputFileName = string(outputFileName);
fread(&_outputFileSize, _FILE_SIZE_LEN, 1, inputFp);
for (int i = 0; i < _CODE_NUM; ++i)
fread(&_codes[i].frequency, _CODE_FREQUENCY_LEN, 1, inputFp);
fclose(inputFp);
printf("正在构建哈夫曼树......\n");
_huffmanTreeRoot = buildHuffmanTree();
printf("正在产生新编码......\n");
getNewCodes(_huffmanTreeRoot, 0, string(), 0);
printHuffmanEncodeInfo();
printf("正在解压缩......\n");
uncompress();
printInfo("解压缩");
printf("解压成功\n");
}
}
void HuffmanEncoderCompress::statisticalFrequency() {
FILE *inputFp = NULL;
if ((inputFp = fopen(_inputFileName.c_str(), "rb")) == NULL) {
printf("open file %s failed!\n", _inputFileName.c_str());
exit(-1);
}
while (!feof(inputFp)) {
unsigned char inputByte;
fread(&inputByte, 1, 1, inputFp);
if (feof(inputFp)) break;
++_codes[inputByte].frequency;
++_inputFileSize;
}
fclose(inputFp);
}
HuffmanTreeNode* HuffmanEncoderCompress::buildHuffmanTree() {
priority_queue<HuffmanTreeNode*, vector<HuffmanTreeNode*>, CmparatorOfHuffmanTreeNode> nps;
for (int i = 0; i < _CODE_NUM; ++i)
nps.push(new HuffmanTreeNode(_codes[i].frequency, &_codes[i]));
while (true) {
HuffmanTreeNode* np1 = nps.top(); nps.pop();
HuffmanTreeNode* np2 = nps.top(); nps.pop();
HuffmanTreeNode* np3 = new HuffmanTreeNode(np1->weight + np2->weight, nullptr, np1, np2);
if (!nps.empty()) nps.push(np3);
else return np3;
}
}
void HuffmanEncoderCompress::getNewCodes(HuffmanTreeNode* np, unsigned long long newCode, string newCodeStr, int length) {
if (np->codePtr) {
np->codePtr->newCode = newCode;
np->codePtr->newCodeStr = newCodeStr;
np->codePtr->length = length;
return;
}
newCode <<= 1;
++length;
if (np->left) getNewCodes(np->left, newCode, newCodeStr+"0", length);
if (np->right) getNewCodes(np->right, newCode+1, newCodeStr+"1", length);
}
void HuffmanEncoderCompress::compress() {
FILE *inputFp = NULL;
if ((inputFp = fopen(_inputFileName.c_str(), "rb")) == NULL) {
printf("open file %s failed!\n", _inputFileName.c_str());
exit(-1);
}
FILE *outputFp = NULL;
if ((outputFp = fopen(_outputFileName.c_str(), "wb")) == NULL) {
printf("open file %s failed!\n", _outputFileName.c_str());
exit(-1);
}
fwrite(_ZIP_NAME, _ZIP_NAME_LEN, 1, outputFp);
fwrite(_inputFileName.c_str(), _FILE_NAME_LEN, 1, outputFp);
fwrite(&_inputFileSize, _FILE_SIZE_LEN, 1, outputFp);
for (int i = 0; i < _CODE_NUM; ++i)
fwrite(&_codes[i].frequency, _CODE_FREQUENCY_LEN, 1, outputFp);
unsigned char inputByte;
unsigned char outputByte = 0;
unsigned long long newCode;
int length;
int cnt = 0;
unsigned long long currentInputSize = 0;
unsigned long long currentOutputSize = 0;
double currRate = 0.0;
while (!feof(inputFp)) {
fread(&inputByte, 1, 1, inputFp);
if (feof(inputFp)) break;
double rate = (double)(++currentInputSize) / _inputFileSize * 100;
if (rate - currRate >= 10) {
currRate = rate;
printf("已压缩:%.1f%%\t压缩率:%.2f%%\n", currRate, (double)_outputFileSize/currentInputSize*100);
}
newCode = _codes[(int)inputByte].newCode;
length = _codes[(int)inputByte].length;
while (length--) {
outputByte <<= 1;
outputByte += (newCode>>length) & 1;
if (++cnt == 8) {
fwrite(&outputByte, 1, 1, outputFp);
outputByte = 0;
cnt = 0;
++_outputFileSize;
}
}
}
if (cnt < 8) {
outputByte <<= 8-cnt;
fwrite(&outputByte, 1, 1, outputFp);
++_outputFileSize;
}
printf("已压缩:%.1f%%\t压缩率:%.2f%%\n", 100.0, (double)_outputFileSize/currentInputSize*100);
fclose(inputFp);
fclose(outputFp);
}
bool HuffmanEncoderCompress::findNode(HuffmanTreeNode*& np, unsigned char inputByte, int& pos) {
if (np->codePtr) return true;
if (pos < 0) return false;
int val = (inputByte >> pos) & 1;
--pos;
if (val == 0) {
np = np->left;
return findNode(np, inputByte, pos);
} else {
np = np->right;
return findNode(np, inputByte, pos);
}
}
void HuffmanEncoderCompress::uncompress() {
FILE *inputFp = NULL;
if ((inputFp = fopen(_inputFileName.c_str(), "rb")) == NULL) {
printf("open file %s failed!\n", _inputFileName.c_str());
exit(-1);
}
FILE *outputFp = NULL;
if ((outputFp = fopen(_outputFileName.c_str(), "wb")) == NULL) {
printf("open file %s failed!\n", _outputFileName.c_str());
exit(-1);
}
fseek(inputFp, _ZLZIP_HEAD_LEN, SEEK_SET);
_inputFileSize = _ZLZIP_HEAD_LEN;
unsigned char inputByte;
unsigned char outputByte;
HuffmanTreeNode* np = _huffmanTreeRoot;
unsigned long long currentOutputSize = 0;
int pos;
double currRate = 0.0;
while (!feof(inputFp)) {
fread(&inputByte, 1, 1, inputFp);
if (feof(inputFp)) break;
++_inputFileSize;
pos = 7;
while (findNode(np, inputByte, pos)) {
outputByte = np->codePtr->oldCode;
fwrite(&outputByte, 1, 1, outputFp);
double rate = (double)(++currentOutputSize) / _outputFileSize * 100;
if (rate - currRate >= 10) {
currRate = rate;
printf("已解压缩:%.1f%%\t解压缩率:%.2f%%\n", currRate, (double)currentOutputSize/_inputFileSize*100);
}
if (currentOutputSize == _outputFileSize) {
printf("已解压缩:%.1f%%\t解压缩率:%.2f%%\n", 100.0, (double)currentOutputSize/_inputFileSize*100);
break;
}
np = _huffmanTreeRoot;
}
}
fclose(inputFp);
fclose(outputFp);
}
void HuffmanEncoderCompress::printHuffmanEncodeInfo() {
printf("%-10s %-10s %-20s %-5s %-10s\n", "原码", "频率", "哈夫曼编码", "长度", "十进制");
for (int i = 0; i < _CODE_NUM; ++i) {
Code &code = _codes[i];
printf("%-10d %-10llu %-20s %-5d %-10llu\n", (int)code.oldCode, code.frequency,
code.newCodeStr.c_str(), code.length, code.newCode);
}
}
void HuffmanEncoderCompress::printInfo(const char* type) {
double compressRate = (double)_outputFileSize / _inputFileSize * 100;
printf("%s率:%.2f%%\n", type, compressRate);
double inputFileSize = _inputFileSize, outputFileSize = _outputFileSize;
if (inputFileSize < 1024) {
printf("输入文件大小:%.2fB, 输出文件大小:%.2fB\n", inputFileSize, outputFileSize);
return;
}
inputFileSize /= 1024; outputFileSize /= 1024;
if (inputFileSize < 1024) {
printf("输入文件大小:%.2fKB, 输出文件大小:%.2fKB\n", inputFileSize, outputFileSize);
return;
}
inputFileSize /= 1024; outputFileSize /= 1024;
if (inputFileSize < 1024) {
printf("输入文件大小:%.2fMB, 输出文件大小:%.2fMB\n", inputFileSize, outputFileSize);
return;
}
inputFileSize /= 1024; outputFileSize /= 1024;
if (inputFileSize < 1024) {
printf("输入文件大小:%.2fGB, 输出文件大小:%.2fGB\n", inputFileSize, outputFileSize);
return;
}
}
bool HuffmanEncoderCompress::compare2File(const string& fileName1, const string& fileName2) {
FILE *fp1 = NULL;
if ((fp1 = fopen(fileName1.c_str(), "rb")) == NULL) {
printf("open file %s failed!\n", fileName1.c_str());
exit(-1);
}
FILE *fp2 = NULL;
if ((fp2 = fopen(fileName2.c_str(), "rb")) == NULL) {
printf("open file %s failed!\n", fileName2.c_str());
exit(-1);
}
while (!feof(fp1) && !feof(fp2)) {
unsigned char uch1, uch2;
fread(&uch1, 1, 1, fp1);
fread(&uch2, 1, 1, fp2);
if (uch1 != uch2) return false;
}
if (!feof(fp1) || !feof(fp2)) return false;
return true;
}