实验名称: :Huffman编码(二叉树应用)

 

Huffman编码(二叉树应用)

二、实验的目的和要求:

1.要求对文件进行Huffman编码的算法,以及对乙编码文件进行解码的算法,为简单起见,可以假设文件是存放在一个字符向量;

2.熟练掌握二叉树的应用;

3.熟练掌握计算机系统的基本操作方法,了解如何编辑、编译、链接和运行一个C++程序及二叉树上的基本运算;

4.上机调试程序,掌握查错、排错使程序能正确运行。

三、实验的环境:指硬件和软件环境

1.硬件环境

内存1016M , 处理器:Intel(R) Core(TM)2 Duo CPU. ,

主频:2.20GHZ

2.软件环环境

操作系统:Windows XP   Microsoft visual C++ 6.0

四、算法描述:

可用特殊符号加自然语言或算法框图(程序流程图、PAD图等)或伪语言(like C++)。

 

 

 

:

 

五、源程序清单:

主文件名:Huffman.cpp

#include"Utility.h"

#include"Lk_stack.h"

#include"Huffman.h"

void main()

{

       HuffmanTree hf;

       char c=0;

       char answer;

       while(c!='3') 

       {

              cout<

              cout<

              cout<

              cout<

              cin>>c;

              switch(c)

              {

                     case '1':   hf.Code(); break;

                     case '2':  hf.UnCode(); break;

              }

         }

}

文件名:Huffman.h

const unsigned int n=256;                   //字符数

const unsigned int m=256*2-1;            //结点总数

struct HTNode{                                        //压缩用Huffman树结点

       unsigned long weight;                  //字符频度(权值)

    unsigned int parent,lchild,rchild;

};

struct Buffer{                                    //字节缓冲压缩用Huffman树 

       char ch;                                     //字节

       unsigned int bits;                         //实际比特数    

};

class HuffmanTree{                                  //Huffman树 

public:

  void Code();                                          //编码

  void UnCode();                               //译码

private: 

  HTNode HT[m+1];                                //树结点表(HT[1]到HT[m])

  char Leaf[n+1];                              //叶结点对应字符(leaf[1]到leaf[n])

  char *HuffmanCode[n+1];               //叶结点对应编码(*HuffmanCode[1]到*HuffmanCode[n])

  unsigned int count;                          //频度大于零的字符数    

  unsigned int char_index[n];             //字符对应在树结点表的下标(char_index[0]到char_index[n-1])

  unsigned long size;                         //被压缩文件长度

  FILE *infp,*outfp;                          //输入/出文件

  Buffer buf;                                     //字符缓冲

  void Stat();                                     //统计字符出现频度并过滤掉频度为零的字符

  //在HT[0]~HT[k]中选择parent为-1,树值最小的两个结点s1,s2

  void Select(unsigned int k, unsigned int &s1, unsigned int &s2);

  void Write(unsigned int bit);             //向outfp中写入一个比特

  void Write(unsigned int num,unsigned int k);//向outfp中写入k个比特

  void WriteToOutfp();                       //强行写入outfp

  void Read(unsigned int &bit);           //从infp中读出一个比特

  void Read(unsigned int &num,unsigned int k);//从infp中读出k个比特

  int  NToBits(unsigned int num);      //0~num之间的整数用二进位表示所需的最少位数

  void CreateFromCodeFile();             //由编码文件中存储的树结构建立Huffman树

  //由被压缩文件建立Huffman树,将树结构存入编码文件的文件头部中,并求每个字符的Huffman编码

  void CreateFromSourceFile();         

};

void HuffmanTree::Code()                   //编码

{

       char infName[256],outfName[256];

      

       cout<<"Please input source file name(size less than 4GB):";  //被压缩文件最多4GB

       cin>>infName;                 

       if((infp=fopen(infName,"rb"))==NULL){

              cout<<"Can not open file:"<

           exit(1);

       }

       if(feof(infp)!=0){

              cout<<"Empty source file:"<

           exit(1);

       }

cout<<"Please input code file name:";

       cin>>outfName;                 

       if((outfp=fopen(outfName,"wb"))==NULL){

              cout<<"Can not open file:"<

           exit(1);

       }

       cout<<"Pocessing..."<

unsigned char ch;

       unsigned int i,c;

       for(i=0;i<=n;i++)HuffmanCode[i]=NULL;

    CreateFromSourceFile(); 

rewind(infp);

    ch=fgetc(infp);

    while(feof(infp)==0){

              c=char_index[ch];

              for(i=0;i

                     if(HuffmanCode[c][i]=='0')Write(0);

                     else Write(1);

              }

              ch=fgetc(infp);

       }

       WriteToOutfp();

       fclose(infp);fclose(outfp);

cout<<"Process end."<

}

void HuffmanTree::UnCode()

{

       char infName[256],outfName[256];

       cout<<"Please input code file name:";

       cin>>infName;                 

       if((infp=fopen(infName,"rb"))==NULL){

              cout<<"Can not open file:"<

           exit(1);

       }

       if(feof(infp)!=0){

              cout<<"Empty code file:"<

           exit(1);

       }

cout<<"Please input target file name:";

       cin>>outfName;                 

       if((outfp=fopen(outfName,"wb"))==NULL){

              cout<<"Can not open file:"<

           exit(1);

       }

       cout<<"Pocessing..."<

unsigned int bit,c,i;

       CreateFromCodeFile();                               //建立Huffman树

       Read(bit);

       for(i=0;i

              c=2*count-1;                                     //2*count-1为根结点的下标

              while((HT[c].lchild!=0||HT[c].rchild!=0)&&(feof(infp)==0)){

                     if(bit==0)c=HT[c].lchild;

                     else c=HT[c].rchild;

                     Read(bit); 

              }

              fputc(Leaf[c],outfp);                          //将字符写入outfp中

       }

fclose(infp);fclose(outfp);

cout<<"Process end."<

}

void HuffmanTree::Stat()              //统计字符出现频度并过滤掉频度为零的字符

{

       unsigned int i,cha;

       for(i=1;i<=n;i++)HT[i].weight=0;

       size=0;

       rewind(infp);

       cha=fgetc(infp);

       while(feof(infp)==0)      //统计字符出现频度

       {

              HT[cha+1].weight++;

              size++;

              cha=fgetc(infp);

       }

count=0;

       for(cha=0;cha

              if(HT[cha+1].weight>0){

                     count++;

                     Leaf[count]=cha;

                     HT[count].weight=HT[cha+1].weight;

                     char_index[cha]=count;

              }

       }

}

void HuffmanTree::Select(unsigned int k, unsigned int &s1, unsigned int &s2)

{//s1,s2为权值最小的根,且s1的权值小于s2的权值

       unsigned int root_count=0;                  //根结点数;

       unsigned int root_index[n];                  //根结点下标;

    unsigned int tem,i,j;  

       for(i=1;i<=k;i++)

              if(HT[i].parent==0)

                     root_index[root_count++]=i;

    s1=root_index[0];s2=root_index[1];

       if(HT[s1].weight>HT[s2].weight){

              tem=s1;s1=s2;s2=tem;

       }

for(i=2;i

              j=root_index[i];

              if(HT[j].weight

                     s2=j;

                     if(HT[s1].weight>HT[s2].weight){

                            tem=s1;s1=s2;s2=tem;

                     }

              }

       }

}

void HuffmanTree::Write(unsigned int bit)                         //向outfp中写入一个比特

{

       buf.bits++;

       buf.ch=(buf.ch<<1)+bit;

       if(buf.bits==8){  //缓冲区已满,写入outfp

              fputc(buf.ch,outfp);

              buf.bits=0;

              buf.ch=0;

       }

}

void HuffmanTree::Write(unsigned int num,unsigned int k)         //向outfp中写入k个比特

{

       Stack s;

       unsigned int i,bit;

       for(i=1;i<=k;i++){

              s.push(num & 1);

              num=(num>>1);

       }

for(i=1;i<=k;i++){

              s.top(bit);

              Write(bit);

              s.pop();

       }

}

void HuffmanTree::WriteToOutfp()                     //强行写入outfp

{

       unsigned int l=buf.bits;

       if(l>0)

              for(unsigned int i=0;i<8-l;i++)Write(0);

}

void HuffmanTree::Read(unsigned int &bit)                       //从infp中读出一个比特

{

       if(buf.bits==0){

              buf.ch=fgetc(infp);

              buf.bits=8;

       }

       bit=(buf.ch & 128)>>7;   

       buf.ch=buf.ch<<1;

       buf.bits--;

}

void HuffmanTree::Read(unsigned int &num,unsigned int k)             //从infp中读出k个比特

{

       unsigned int bit;

       num=0;

       for(unsigned int i=0;i

              Read(bit);

              num=(num<<1)+bit;

       }

}

int HuffmanTree::NToBits(unsigned int num)       //0~num之间的整数用二进位表示所需的位数

{

       unsigned int l=0,power=1;

       while(power<=num){

              l++;power=power*2;

       }

       return l;

}

void HuffmanTree::CreateFromCodeFile()           //由编码文件中存储的树结构建立Huffman树

{

       buf.bits=0;                                                       //清空缓冲区

       buf.ch=0;

       unsigned int num,l,i;

       rewind(infp);

       fread(&size,sizeof(unsigned long),1,infp);

       Read(count,8);

    count=count+1;

       for(i=1;i<=count;i++)

              fread(&Leaf[i],sizeof(char),1,infp);

       l=NToBits(2*count-1);

       for(i=1;i<=count;i++){

              HT[i].lchild=0;

              HT[i].rchild=0;

       }

       for(i=count+1;i<=2*count-1;i++){

              HT[i].lchild=(Read(num,l),num);

              HT[i].rchild=(Read(num,l),num);

       }

}

void HuffmanTree::CreateFromSourceFile()       

//由被压缩文件建立Huffman树,将树结构存入编码文件的文件头部中,并求每个字符的Huffman编码

{

       Stat();//统计字符出现频度并过滤掉频度为零的字符

    //由被压缩文件建立Huffman树

       unsigned int i,s1,s2;

       for(i=1;i<=count;i++)HT[i].parent=HT[i].lchild=HT[i].rchild=0;

    for(i=count+1;i<=2*count-1;i++){//建立Huffman树

              Select(i-1,s1,s2);          //选择parent为0,权值最小的两个结点s1,s2

              HT[s1].parent=HT[s2].parent=i;

              HT[i].parent=0;HT[i].lchild=s1;HT[i].rchild=s2;

              HT[i].weight=HT[s1].weight+HT[s2].weight;

       }

//将树结构存入编码文件的文件头部中

       unsigned int l;

       buf.bits=0;                                                      //清空缓冲区

       buf.ch=0;

    rewind(outfp);

       fwrite(&size,sizeof(unsigned int),1,outfp);

       Write(count-1,8);

    for(i=1;i<=count;i++)

              fwrite(&Leaf[i],sizeof(char),1,outfp);

       l=NToBits(2*count-1);

       for(i=count+1;i<=2*count-1;i++){

              Write(HT[i].lchild,l);

              Write(HT[i].rchild,l);

       }

//求每个字符的Huffman编码

       unsigned int start,c,f;

       char *cd;                 //编码临时变量

       for(i=1;i<=n;i++)

              if(HuffmanCode[i]!=NULL){

                     delete []HuffmanCode[i];  //释放存储空间

                     HuffmanCode[i]=NULL;

              }

       cd=new char[count];       //分配求编码的工作空间

       cd[count-1]='\0';         //编码结束符

       for(i=1;i<=count;i++){    //逐位求Huffman编码

              start=count-1;        //编码结束符位置

              for(c=i,f=HT[i].parent;f!=0;c=f,f=HT[c].parent) //从叶到根求编码

                     if(HT[f].lchild==c)cd[--start]='0';

                     else cd[--start]='1';

        HuffmanCode[i]=new char[count-start];      //为第i个字符编码分配空间

              strcpy(HuffmanCode[i],&cd[start]);            //从cd复制编码到HuffmanCode

       }

       delete []cd;

 }

文件名:LK_stack.h

template

struct Node {

   Node_entry entry;              //数据成员

Node *next;

   Node();                       //构造

   Node(Node_entry item, Node *add_on = NULL);

};

template

class Stack {

public:                          //  标准栈声明方法

   Stack();

   bool empty() const;

   Error_code push(const Stack_entry &item);

   Error_code pop();

   Error_code top(Stack_entry &item) const;

   void clear();

   ~Stack();

   Stack(const Stack &original);

   void operator =(const Stack &original);

protected:

   Node *top_node;

};

template

Node::Node()

{

   next = NULL;

}

template

Node::Node(Node_entry item, Node *add_on)

{

   entry = item;

   next = add_on;

}

template

Stack::Stack()

{

   top_node=NULL;

}

template

bool Stack::empty() const

{

  if(top_node==NULL)

     return true;

  else

     return false;

}

template

Error_code Stack::push(const Stack_entry &item)

{

   Node *new_top = new Node(item, top_node);

   if (new_top == NULL) return overflow;

   top_node = new_top;

   return success;

}

template

Error_code Stack::pop()

{

   Node *old_top = top_node;

   if (top_node == NULL) return underflow;

   top_node = old_top->next;

   delete old_top;

   return success;

}

template

Error_code Stack::top(Stack_entry &item) const

{

   if(empty())

     return underflow;

   else{

     item=top_node->entry;

     return success;

   }

}

template

void Stack::clear()             //  清除栈

{

   while (!empty())

      pop();

}

template

Stack::~Stack()             //  析构函数 删除栈

       clear();

}

template

Stack::Stack(const Stack &original) //  copy constructor

{

   Node *new_copy, *original_node = original.top_node;

   if (original_node == NULL) top_node = NULL;

   else

   {                         // 复制连接结点

      top_node = new_copy = new Node(original_node->entry);

      while (original_node->next != NULL)

         {

                original_node = original_node->next;

             new_copy->next = new Node(original_node->entry);

             new_copy = new_copy->next;

      }

   }

}

template

void Stack::operator = (const Stack &original) //  Overload assignment

{

   Node *new_top, *new_copy, *original_node = original.top_node;

   if (original_node == NULL) new_top = NULL;

   else

   {                       

      new_copy = new_top = new Node(original_node->entry);

      while (original_node->next != NULL)

         {

                original_node = original_node->next;

             new_copy->next = new Node(original_node->entry);

             new_copy = new_copy->next;

      }

   }

   while (!empty())              

      pop();

   top_node = new_top;          

}

文件名;Utility.h

#include      //standard string operations

#include    //standard iostream operations

#include      //numeric limits

#include        //mathematical functions

#include     //file input and output

#include       //character classification

#include        //date and time function

#include       //con input and output

#include      //standard libray

#include       //standard I/O libray

enum Error_code{success,fail,underflow,overflow,range_error};

//enum bool{false,true};

六、运行结果:

  实验数据压缩前形态为:we.txt内容为:we are you 123495kfj/.';'  

压缩后实验数据用Microsoft Visual c++查看we.cpp:

还原后数据为ww.txt : we are you 123495kfj/.';'

由此可见程序完成了数据的无损压缩和解压。

 

 七、实验运行情况分析(包括算法、运行结果、运行环境等问题的讨论)。

程序虽然很精简,但可以可以实现数据的压缩和解压,把一个文件压缩后,可以解压为其他的格式,所以这也可以作为一些文件的格式转换器。但是本程序也存在着不足,不能够压缩整个文件夹,当然也不能解压那些被压缩了的文件。只能解压Huffman压缩的文件。否则会出现错误。

你可能感兴趣的:(数据结构)