Huffman编码(二叉树应用)
二、实验的目的和要求:
1.要求对文件进行Huffman编码的算法,以及对乙编码文件进行解码的算法,为简单起见,可以假设文件是存放在一个字符向量;
2.熟练掌握二叉树的应用;
3.熟练掌握计算机系统的基本操作方法,了解如何编辑、编译、链接和运行一个C++程序及二叉树上的基本运算;
4.上机调试程序,掌握查错、排错使程序能正确运行。
三、实验的环境:指硬件和软件环境
1.硬件环境:
内存1016M , 处理器:Intel(R) Core(TM)2 Duo CPU. ,
主频:2.20GHZ
2.软件环环境:
操作系统:Windows XP Microsoft visual C++ 6.0
四、算法描述:
可用特殊符号加自然语言或算法框图(程序流程图、PAD图等)或伪语言(like C++)。
:
五、源程序清单:
主文件名:Huffman.cpp
#include"Utility.h"
#include"Lk_stack.h"
#include"Huffman.h"
void main()
{
HuffmanTree hf;
char c=0;
char answer;
while(c!='3')
{
cout< cout< cout< cout< cin>>c; switch(c) { case '1': hf.Code(); break; case '2': hf.UnCode(); break; } } } 文件名:Huffman.h const unsigned int n=256; //字符数 const unsigned int m=256*2-1; //结点总数 struct HTNode{ //压缩用Huffman树结点 unsigned long weight; //字符频度(权值) unsigned int parent,lchild,rchild; }; struct Buffer{ //字节缓冲压缩用Huffman树 char ch; //字节 unsigned int bits; //实际比特数 }; class HuffmanTree{ //Huffman树 public: void Code(); //编码 void UnCode(); //译码 private: HTNode HT[m+1]; //树结点表(HT[1]到HT[m]) char Leaf[n+1]; //叶结点对应字符(leaf[1]到leaf[n]) char *HuffmanCode[n+1]; //叶结点对应编码(*HuffmanCode[1]到*HuffmanCode[n]) unsigned int count; //频度大于零的字符数 unsigned int char_index[n]; //字符对应在树结点表的下标(char_index[0]到char_index[n-1]) unsigned long size; //被压缩文件长度 FILE *infp,*outfp; //输入/出文件 Buffer buf; //字符缓冲 void Stat(); //统计字符出现频度并过滤掉频度为零的字符 //在HT[0]~HT[k]中选择parent为-1,树值最小的两个结点s1,s2 void Select(unsigned int k, unsigned int &s1, unsigned int &s2); void Write(unsigned int bit); //向outfp中写入一个比特 void Write(unsigned int num,unsigned int k);//向outfp中写入k个比特 void WriteToOutfp(); //强行写入outfp void Read(unsigned int &bit); //从infp中读出一个比特 void Read(unsigned int &num,unsigned int k);//从infp中读出k个比特 int NToBits(unsigned int num); //0~num之间的整数用二进位表示所需的最少位数 void CreateFromCodeFile(); //由编码文件中存储的树结构建立Huffman树 //由被压缩文件建立Huffman树,将树结构存入编码文件的文件头部中,并求每个字符的Huffman编码 void CreateFromSourceFile(); }; void HuffmanTree::Code() //编码 { char infName[256],outfName[256]; cout<<"Please input source file name(size less than 4GB):"; //被压缩文件最多4GB cin>>infName; if((infp=fopen(infName,"rb"))==NULL){ cout<<"Can not open file:"< exit(1); } if(feof(infp)!=0){ cout<<"Empty source file:"< exit(1); } cout<<"Please input code file name:"; cin>>outfName; if((outfp=fopen(outfName,"wb"))==NULL){ cout<<"Can not open file:"< exit(1); } cout<<"Pocessing..."< unsigned char ch; unsigned int i,c; for(i=0;i<=n;i++)HuffmanCode[i]=NULL; CreateFromSourceFile(); rewind(infp); ch=fgetc(infp); while(feof(infp)==0){ c=char_index[ch]; for(i=0;i if(HuffmanCode[c][i]=='0')Write(0); else Write(1); } ch=fgetc(infp); } WriteToOutfp(); fclose(infp);fclose(outfp); cout<<"Process end."< } void HuffmanTree::UnCode() { char infName[256],outfName[256]; cout<<"Please input code file name:"; cin>>infName; if((infp=fopen(infName,"rb"))==NULL){ cout<<"Can not open file:"< exit(1); } if(feof(infp)!=0){ cout<<"Empty code file:"< exit(1); } cout<<"Please input target file name:"; cin>>outfName; if((outfp=fopen(outfName,"wb"))==NULL){ cout<<"Can not open file:"< exit(1); } cout<<"Pocessing..."< unsigned int bit,c,i; CreateFromCodeFile(); //建立Huffman树 Read(bit); for(i=0;i c=2*count-1; //2*count-1为根结点的下标 while((HT[c].lchild!=0||HT[c].rchild!=0)&&(feof(infp)==0)){ if(bit==0)c=HT[c].lchild; else c=HT[c].rchild; Read(bit); } fputc(Leaf[c],outfp); //将字符写入outfp中 } fclose(infp);fclose(outfp); cout<<"Process end."< } void HuffmanTree::Stat() //统计字符出现频度并过滤掉频度为零的字符 { unsigned int i,cha; for(i=1;i<=n;i++)HT[i].weight=0; size=0; rewind(infp); cha=fgetc(infp); while(feof(infp)==0) //统计字符出现频度 { HT[cha+1].weight++; size++; cha=fgetc(infp); } count=0; for(cha=0;cha if(HT[cha+1].weight>0){ count++; Leaf[count]=cha; HT[count].weight=HT[cha+1].weight; char_index[cha]=count; } } } void HuffmanTree::Select(unsigned int k, unsigned int &s1, unsigned int &s2) {//s1,s2为权值最小的根,且s1的权值小于s2的权值 unsigned int root_count=0; //根结点数; unsigned int root_index[n]; //根结点下标; unsigned int tem,i,j; for(i=1;i<=k;i++) if(HT[i].parent==0) root_index[root_count++]=i; s1=root_index[0];s2=root_index[1]; if(HT[s1].weight>HT[s2].weight){ tem=s1;s1=s2;s2=tem; } for(i=2;i j=root_index[i]; if(HT[j].weight s2=j; if(HT[s1].weight>HT[s2].weight){ tem=s1;s1=s2;s2=tem; } } } } void HuffmanTree::Write(unsigned int bit) //向outfp中写入一个比特 { buf.bits++; buf.ch=(buf.ch<<1)+bit; if(buf.bits==8){ //缓冲区已满,写入outfp fputc(buf.ch,outfp); buf.bits=0; buf.ch=0; } } void HuffmanTree::Write(unsigned int num,unsigned int k) //向outfp中写入k个比特 { Stack unsigned int i,bit; for(i=1;i<=k;i++){ s.push(num & 1); num=(num>>1); } for(i=1;i<=k;i++){ s.top(bit); Write(bit); s.pop(); } } void HuffmanTree::WriteToOutfp() //强行写入outfp { unsigned int l=buf.bits; if(l>0) for(unsigned int i=0;i<8-l;i++)Write(0); } void HuffmanTree::Read(unsigned int &bit) //从infp中读出一个比特 { if(buf.bits==0){ buf.ch=fgetc(infp); buf.bits=8; } bit=(buf.ch & 128)>>7; buf.ch=buf.ch<<1; buf.bits--; } void HuffmanTree::Read(unsigned int &num,unsigned int k) //从infp中读出k个比特 { unsigned int bit; num=0; for(unsigned int i=0;i Read(bit); num=(num<<1)+bit; } } int HuffmanTree::NToBits(unsigned int num) //0~num之间的整数用二进位表示所需的位数 { unsigned int l=0,power=1; while(power<=num){ l++;power=power*2; } return l; } void HuffmanTree::CreateFromCodeFile() //由编码文件中存储的树结构建立Huffman树 { buf.bits=0; //清空缓冲区 buf.ch=0; unsigned int num,l,i; rewind(infp); fread(&size,sizeof(unsigned long),1,infp); Read(count,8); count=count+1; for(i=1;i<=count;i++) fread(&Leaf[i],sizeof(char),1,infp); l=NToBits(2*count-1); for(i=1;i<=count;i++){ HT[i].lchild=0; HT[i].rchild=0; } for(i=count+1;i<=2*count-1;i++){ HT[i].lchild=(Read(num,l),num); HT[i].rchild=(Read(num,l),num); } } void HuffmanTree::CreateFromSourceFile() //由被压缩文件建立Huffman树,将树结构存入编码文件的文件头部中,并求每个字符的Huffman编码 { Stat();//统计字符出现频度并过滤掉频度为零的字符 //由被压缩文件建立Huffman树 unsigned int i,s1,s2; for(i=1;i<=count;i++)HT[i].parent=HT[i].lchild=HT[i].rchild=0; for(i=count+1;i<=2*count-1;i++){//建立Huffman树 Select(i-1,s1,s2); //选择parent为0,权值最小的两个结点s1,s2 HT[s1].parent=HT[s2].parent=i; HT[i].parent=0;HT[i].lchild=s1;HT[i].rchild=s2; HT[i].weight=HT[s1].weight+HT[s2].weight; } //将树结构存入编码文件的文件头部中 unsigned int l; buf.bits=0; //清空缓冲区 buf.ch=0; rewind(outfp); fwrite(&size,sizeof(unsigned int),1,outfp); Write(count-1,8); for(i=1;i<=count;i++) fwrite(&Leaf[i],sizeof(char),1,outfp); l=NToBits(2*count-1); for(i=count+1;i<=2*count-1;i++){ Write(HT[i].lchild,l); Write(HT[i].rchild,l); } //求每个字符的Huffman编码 unsigned int start,c,f; char *cd; //编码临时变量 for(i=1;i<=n;i++) if(HuffmanCode[i]!=NULL){ delete []HuffmanCode[i]; //释放存储空间 HuffmanCode[i]=NULL; } cd=new char[count]; //分配求编码的工作空间 cd[count-1]='\0'; //编码结束符 for(i=1;i<=count;i++){ //逐位求Huffman编码 start=count-1; //编码结束符位置 for(c=i,f=HT[i].parent;f!=0;c=f,f=HT[c].parent) //从叶到根求编码 if(HT[f].lchild==c)cd[--start]='0'; else cd[--start]='1'; HuffmanCode[i]=new char[count-start]; //为第i个字符编码分配空间 strcpy(HuffmanCode[i],&cd[start]); //从cd复制编码到HuffmanCode } delete []cd; } 文件名:LK_stack.h template struct Node { Node_entry entry; //数据成员 Node Node(); //构造 Node(Node_entry item, Node }; template class Stack { public: // 标准栈声明方法 Stack(); bool empty() const; Error_code push(const Stack_entry &item); Error_code pop(); Error_code top(Stack_entry &item) const; void clear(); ~Stack(); Stack(const Stack void operator =(const Stack protected: Node }; template Node { next = NULL; } template Node { entry = item; next = add_on; } template Stack { top_node=NULL; } template bool Stack { if(top_node==NULL) return true; else return false; } template Error_code Stack { Node if (new_top == NULL) return overflow; top_node = new_top; return success; } template Error_code Stack { Node if (top_node == NULL) return underflow; top_node = old_top->next; delete old_top; return success; } template Error_code Stack { if(empty()) return underflow; else{ item=top_node->entry; return success; } } template void Stack { while (!empty()) pop(); } template Stack clear(); } template Stack { Node if (original_node == NULL) top_node = NULL; else { // 复制连接结点 top_node = new_copy = new Node while (original_node->next != NULL) { original_node = original_node->next; new_copy->next = new Node new_copy = new_copy->next; } } } template void Stack { Node if (original_node == NULL) new_top = NULL; else { new_copy = new_top = new Node while (original_node->next != NULL) { original_node = original_node->next; new_copy->next = new Node new_copy = new_copy->next; } } while (!empty()) pop(); top_node = new_top; } 文件名;Utility.h #include #include #include #include #include #include #include #include #include #include enum Error_code{success,fail,underflow,overflow,range_error}; //enum bool{false,true}; 六、运行结果: 实验数据压缩前形态为:we.txt内容为:we are you 123495kfj/.';' 压缩后实验数据用Microsoft Visual c++查看we.cpp: 还原后数据为ww.txt : we are you 123495kfj/.';' 由此可见程序完成了数据的无损压缩和解压。 七、实验运行情况分析(包括算法、运行结果、运行环境等问题的讨论)。 程序虽然很精简,但可以可以实现数据的压缩和解压,把一个文件压缩后,可以解压为其他的格式,所以这也可以作为一些文件的格式转换器。但是本程序也存在着不足,不能够压缩整个文件夹,当然也不能解压那些被压缩了的文件。只能解压Huffman压缩的文件。否则会出现错误。