利用哈夫曼树实现文件压缩和解压缩

利用库中的优先级队列实现哈夫曼树,最后基于哈夫曼树最终实现文件压缩。
描述:
1.统计文件中字符出现的次数,利用优先级队列构建Haffman树,生成Huffman编码。
构造过程可以使用priority_queue辅助,每次pq.top()都可以取出权值(频数)最小的节点。每取出两个最小权值的节点,就new出一个新的节点,左右孩子分别指向它们。然后把这个新节点push进优先队列。
2.压缩:利用Haffman编码对文件进行压缩,即在压缩文件中按顺序存入每个字符的Haffman编码。
3.将文件中出现的字符以及它们出现的次数写入配置文件中,以便后续压缩使用。
4.减压缩:利用配置文件重构Haffman树,对文件进行减压缩。


构建哈夫曼树

#define _CRT_SECURE_NO_WARNINGS 1
#pragma once
#include
#include 
#include
using namespace std;

template<class W>
struct HuffmanTreeNode
{
    HuffmanTreeNode(const W &weight)
    : _pLeft(NULL)
    , _pRight(NULL)
    , _pParent(NULL)
    , _weight(weight)
    {}
    HuffmanTreeNode*_pLeft;
    HuffmanTreeNode*_pRight;
    HuffmanTreeNode*_pParent;
    W _weight;
};

template<class W>
class HuffmanTree
{
    typedef HuffmanTreeNode*PNode;
public:
        HuffmanTree()
        : _pRoot(NULL)
    {}
    HuffmanTree(W*array, size_t size, const W&invalid)
    {
        _CreateHuffmantree(array,  size, invalid);

    }
    void _Destroy(PNode&pRoot)
    {
        //后序
        if (pRoot)
        {
            _Destroy(pRoot->_pLeft);
            _Destroy(pRoot->_pRight);
            delete pRoot;
            pRoot = NULL;
        }
    }
    ~HuffmanTree()
    {
        _Destroy(_pRoot);
    }
    PNode GetRoot()
    {
        return  _pRoot;
    }
private:    
    //构建哈夫曼树
    void _CreateHuffmantree(W*array, size_t size, const W&invalid)
    {

        struct PtrNodeCompare
        {
            bool operator()(PNode n1, PNode n2)//重载“()”
            {
                //return (n1->_weight)._count > (n1->_weight)._count;
                return n1->_weight <  n2->_weight;
            }
        };
        priority_queuevector, PtrNodeCompare>hp;

        for (size_t i = 0; i < size; ++i)
        {
            if (array[i] != invalid)
            {

                //PNode p = new HuffmanTreeNode(array[i]);
                hp.push(new HuffmanTreeNode(array[i]));
            }
        }
        //空堆
        if (hp.empty())
            _pRoot = NULL;
        while (hp.size()>1)
        {
            PNode pLeft = hp.top();
            hp.pop();
            PNode pRight = hp.top();
            hp.pop();
            PNode pParent = new HuffmanTreeNode(pLeft->_weight + pRight->_weight);//左加右的权值,作为新节点
            pParent->_pLeft = pLeft;
            pLeft->_pParent = pParent;

            pParent->_pRight = pRight;
            pRight->_pParent = pParent;
            hp.push(pParent);
        }
        _pRoot = hp.top();
    }

public:
    PNode _pRoot;
};

文件压缩和解压缩

#define _CRT_SECURE_NO_WARNINGS 1
#pragma once
#include"haffman.hpp"
#include
#include
using namespace std;
#include

typedef long long LongType;

struct CharInfo
{
    char _ch;//字符
    LongType _count;//次数
    string  _code;//编码

    bool operator !=(const CharInfo&info)
    {
        return _count != info._count;
    }
    CharInfo operator+(const CharInfo&info)
    {
        CharInfo ret;
        ret._count = _count + info._count;
        return ret;
    }
    bool operator<(const CharInfo&info)
    {
        return _count > info._count;
    }
};

class FileCompress
{
    typedef HuffmanTreeNode Node;
    struct TmpInfo
    {
        char _ch;//字符
        LongType _count;//次数
    };
public:
    //构造函数
    FileCompress()
    {
        for (size_t i = 0; i < 256; ++i)
        {
            _infos[i]._ch = i;
            _infos[i]._count = 0;
        }
    }
    //获取哈夫曼编码
    void GenerateHuffmanCode(Node*root,string code)//code不能传引用??
    {
        if (root == NULL)
            return;
        //前序遍历生成编码
        if (root->_pLeft == NULL&&root->_pRight == NULL)
        {
            _infos[(unsigned char )root->_weight._ch]._code = code;
            return;
        }
        GenerateHuffmanCode(root->_pLeft, code+'0');
        GenerateHuffmanCode(root->_pRight, code + '1');
    }
    void Compress(const char *file)//file:源文件
    {
        //1.统计字符出现的次数
        FILE*fout = fopen(file, "rb");
        assert(fout);
        char ch = fgetc(fout);
        while (ch != EOF||feof(fout)==0)//如文件结束,则返回值为1,否则为0
        {
            _infos[(unsigned char)ch]._count++;
            ch = fgetc(fout);
        }
        //2.生成Huffmantree 及code
        CharInfo invalid;
        invalid._count = 0;
        HuffmanTreetree(_infos, 256, invalid);//参数:数组,256个,无效值(出现0次)

        string compressfile = file;//
        compressfile += ".huffman";//?
        FILE*fin = fopen(compressfile.c_str(),"wb");//打开压缩文件
        assert(fin);


        string code;
        GenerateHuffmanCode(tree.GetRoot(), code);

        //3.0写入字符出现的信息
        //fwrite(_infos, sizeof(CharInfo), 256, fin);
        int writeNum = 0;
        int objSize = sizeof(TmpInfo);
        for (rsize_t i = 0; i < 256; ++i)
        {
            if (_infos[i]._count>0)
            {
                TmpInfo info;
                info._ch = _infos[i]._ch;
                info._count = _infos[i]._count;
                fwrite(&info, objSize, 1, fin);
                writeNum++;
            }
        }
        TmpInfo info;
        info._count = -1;
        fwrite(&info, objSize, 1, fin);//把info._count = -1写进去作为结束标志位

        //3.压缩
        fseek(fout, 0, SEEK_SET);//文件指针、偏移量、参照位置
            ch = fgetc(fout);
            char value = 0;
            size_t pos = 0;
            while (ch != EOF)
            {
                string &code = _infos[(unsigned char)ch]._code;
                for (size_t i = 0; i < code.size(); ++i)
                {
                    if (code[i] == '1')
                        value |= (1<else if (code[i] == '0')
                    {
                        value &= ~(1<else
                    {
                        assert(false);
                    }
                    ++pos; 
                    if (pos == 8)
                    {
                        fputc(value, fin);
                        value = 0;
                        pos = 0;
                    }

                }
                ch = fgetc(fout);
            }
            if (pos > 0)
            {
                fputc(value, fin);//写入压缩文件(fin)
            }
            fclose(fout);
            fclose(fin);
    }
    void uncompress(const char *file)
    {
        string uncompressfile = file;//file:Input.txt.huffman
        size_t pos = uncompressfile.rfind('.');//找到倒数第一个'.'
        assert(pos != string::npos);
        uncompressfile.erase(pos);//删除掉'.'后面字符串
        uncompressfile += ".unhuffman";//Input.txt+'.unhuffman'
        FILE*fin = fopen(uncompressfile.c_str(), "wb");//打开解压缩文件
        assert(fin);
        FILE*fout = fopen(file, "rb");//打开压缩文件
        assert(fout);
        //fread(_infos, sizeof(CharInfo), 256, fout);
        //3.0读入字符出现的信息
        TmpInfo info;
        int cycleNum = 1;
        int objSize = sizeof(TmpInfo);
        fread(&info, objSize, 1, fout);

        while (info._count != -1)//-1为结束标志
        {
            _infos[(unsigned char)info._ch]._ch = info._ch;
            _infos[(unsigned char)info._ch]._count= info._count;

            fread(&info, objSize, 1, fout);
            cycleNum++;
        }

        int aaa = 0;
        //重建huaffman树
        CharInfo invalid;
        invalid._count = 0;
        HuffmanTreetree(_infos, 256, invalid);//参数:数组,256个,无效值(出现0次)
        Node *root = tree.GetRoot();
        Node*cur = root;
        LongType n = root->_weight._count;//所有叶子节点的和(源文件字符的个数)
        char ch = fgetc(fout);//从fout(压缩文件)读字符
        while (ch != EOF||n>0)
        {
            for (size_t i = 0; i < 8; ++i)
            {

                if ((ch&(1 << i)) == 0)
                    cur = cur->_pLeft;
                else
                    cur = cur->_pRight;
                if (cur->_pLeft == NULL&&cur->_pRight == NULL)
                {
                    //cout << cur->_weight._ch;
                    fputc(cur->_weight._ch, fin);//fin解压缩文件
                    cur = root;
                    if (--n == 0)
                        break;
                }

            }
            ch = fgetc(fout);
        }
        fclose(fin);
        fclose(fout);

    }
protected:
    CharInfo _infos[256];
};
void TestFileCompress()
{

    FileCompress fc;
    FileCompress fc1;
    //fc.Compress("s.txt");
    //fc1.uncompress("s.txt.huffman");

    //fc.Compress("Input.txt");
    //fc1.uncompress("Input.txt.huffman");

    fc.Compress("1.txt");
    fc1.uncompress("1.txt.huffman");

    //fc.Compress("zhizhen.doc");
    //fc1.uncompress("zhizhen.doc.huffman");
}

结果

这里写图片描述

你可能感兴趣的:(c++)