Huffman编码(二叉树应用)
二、实验的目的和要求:
1.要求对文件进行Huffman编码的算法,以及对乙编码文件进行解码的算法,为简单起见,可以假设文件是存放在一个字符向量;
2.熟练掌握二叉树的应用;
3.熟练掌握计算机系统的基本操作方法,了解如何编辑、编译、链接和运行一个C++程序及二叉树上的基本运算;
4.上机调试程序,掌握查错、排错使程序能正确运行。
三、实验的环境:指硬件和软件环境
1.硬件环境:
内存1016M , 处理器:Intel(R) Core(TM)2 Duo CPU. ,
主频:2.20GHZ
2.软件环环境:
操作系统:Windows XP Microsoft visual C++ 6.0
四、算法描述:
可用特殊符号加自然语言或算法框图(程序流程图、PAD图等)或伪语言(like C++)。
:
五、源程序清单:
主文件名:Huffman.cpp
#include"Utility.h"
#include"Lk_stack.h"
#include"Huffman.h"
void main()
{
HuffmanTree hf;
char c=0;
char answer;
while(c!='3')
{
cout<<endl<<"1.Huffman 方式 压缩.";
cout<<endl<<"2.Huffman 方式 解压.";
cout<<endl<<"3.退出.";
cout<<endl<<"请选择1/2/3:";
cin>>c;
switch(c)
{
case '1': hf.Code(); break;
case '2': hf.UnCode(); break;
}
}
}
文件名:Huffman.h
const unsigned int n=256; //字符数
const unsigned int m=256*2-1; //结点总数
struct HTNode{ //压缩用Huffman树结点
unsigned long weight; //字符频度(权值)
unsigned int parent,lchild,rchild;
};
struct Buffer{ //字节缓冲压缩用Huffman树
char ch; //字节
unsigned int bits; //实际比特数
};
class HuffmanTree{ //Huffman树
public:
void Code(); //编码
void UnCode(); //译码
private:
HTNode HT[m+1]; //树结点表(HT[1]到HT[m])
char Leaf[n+1]; //叶结点对应字符(leaf[1]到leaf[n])
char *HuffmanCode[n+1]; //叶结点对应编码(*HuffmanCode[1]到*HuffmanCode[n])
unsigned int count; //频度大于零的字符数
unsigned int char_index[n]; //字符对应在树结点表的下标(char_index[0]到char_index[n-1])
unsigned long size; //被压缩文件长度
FILE *infp,*outfp; //输入/出文件
Buffer buf; //字符缓冲
void Stat(); //统计字符出现频度并过滤掉频度为零的字符
//在HT[0]~HT[k]中选择parent为-1,树值最小的两个结点s1,s2
void Select(unsigned int k, unsigned int &s1, unsigned int &s2);
void Write(unsigned int bit); //向outfp中写入一个比特
void Write(unsigned int num,unsigned int k);//向outfp中写入k个比特
void WriteToOutfp(); //强行写入outfp
void Read(unsigned int &bit); //从infp中读出一个比特
void Read(unsigned int &num,unsigned int k);//从infp中读出k个比特
int NToBits(unsigned int num); //0~num之间的整数用二进位表示所需的最少位数
void CreateFromCodeFile(); //由编码文件中存储的树结构建立Huffman树
//由被压缩文件建立Huffman树,将树结构存入编码文件的文件头部中,并求每个字符的Huffman编码
void CreateFromSourceFile();
};
void HuffmanTree::Code() //编码
{
char infName[256],outfName[256];
cout<<"Please input source file name(size less than 4GB):"; //被压缩文件最多4GB
cin>>infName;
if((infp=fopen(infName,"rb"))==NULL){
cout<<"Can not open file:"<<infName<<endl;
exit(1);
}
if(feof(infp)!=0){
cout<<"Empty source file:"<<infName<<endl;
exit(1);
}
cout<<"Please input code file name:";
cin>>outfName;
if((outfp=fopen(outfName,"wb"))==NULL){
cout<<"Can not open file:"<<outfName<<endl;
exit(1);
}
cout<<"Pocessing..."<<endl;
unsigned char ch;
unsigned int i,c;
for(i=0;i<=n;i++)HuffmanCode[i]=NULL;
CreateFromSourceFile();
rewind(infp);
ch=fgetc(infp);
while(feof(infp)==0){
c=char_index[ch];
for(i=0;i<strlen(HuffmanCode[c]);i++){
if(HuffmanCode[c][i]=='0')Write(0);
else Write(1);
}
ch=fgetc(infp);
}
WriteToOutfp();
fclose(infp);fclose(outfp);
cout<<"Process end."<<endl<<endl;
}
void HuffmanTree::UnCode()
{
char infName[256],outfName[256];
cout<<"Please input code file name:";
cin>>infName;
if((infp=fopen(infName,"rb"))==NULL){
cout<<"Can not open file:"<<infName<<endl;
exit(1);
}
if(feof(infp)!=0){
cout<<"Empty code file:"<<infName<<endl;
exit(1);
}
cout<<"Please input target file name:";
cin>>outfName;
if((outfp=fopen(outfName,"wb"))==NULL){
cout<<"Can not open file:"<<outfName<<endl;
exit(1);
}
cout<<"Pocessing..."<<endl;
unsigned int bit,c,i;
CreateFromCodeFile(); //建立Huffman树
Read(bit);
for(i=0;i<size;i++){
c=2*count-1; //2*count-1为根结点的下标
while((HT[c].lchild!=0||HT[c].rchild!=0)&&(feof(infp)==0)){
if(bit==0)c=HT[c].lchild;
else c=HT[c].rchild;
Read(bit);
}
fputc(Leaf[c],outfp); //将字符写入outfp中
}
fclose(infp);fclose(outfp);
cout<<"Process end."<<endl<<endl;
}
void HuffmanTree::Stat() //统计字符出现频度并过滤掉频度为零的字符
{
unsigned int i,cha;
for(i=1;i<=n;i++)HT[i].weight=0;
size=0;
rewind(infp);
cha=fgetc(infp);
while(feof(infp)==0) //统计字符出现频度
{
HT[cha+1].weight++;
size++;
cha=fgetc(infp);
}
count=0;
for(cha=0;cha<n;cha++){ //过滤掉频度为零的字符
if(HT[cha+1].weight>0){
count++;
Leaf[count]=cha;
HT[count].weight=HT[cha+1].weight;
char_index[cha]=count;
}
}
}
void HuffmanTree::Select(unsigned int k, unsigned int &s1, unsigned int &s2)
{//s1,s2为权值最小的根,且s1的权值小于s2的权值
unsigned int root_count=0; //根结点数;
unsigned int root_index[n]; //根结点下标;
unsigned int tem,i,j;
for(i=1;i<=k;i++)
if(HT[i].parent==0)
root_index[root_count++]=i;
s1=root_index[0];s2=root_index[1];
if(HT[s1].weight>HT[s2].weight){
tem=s1;s1=s2;s2=tem;
}
for(i=2;i<root_count;i++){
j=root_index[i];
if(HT[j].weight<HT[s2].weight){
s2=j;
if(HT[s1].weight>HT[s2].weight){
tem=s1;s1=s2;s2=tem;
}
}
}
}
void HuffmanTree::Write(unsigned int bit) //向outfp中写入一个比特
{
buf.bits++;
buf.ch=(buf.ch<<1)+bit;
if(buf.bits==8){ //缓冲区已满,写入outfp
fputc(buf.ch,outfp);
buf.bits=0;
buf.ch=0;
}
}
void HuffmanTree::Write(unsigned int num,unsigned int k) //向outfp中写入k个比特
{
Stack<unsigned int> s;
unsigned int i,bit;
for(i=1;i<=k;i++){
s.push(num & 1);
num=(num>>1);
}
for(i=1;i<=k;i++){
s.top(bit);
Write(bit);
s.pop();
}
}
void HuffmanTree::WriteToOutfp() //强行写入outfp
{
unsigned int l=buf.bits;
if(l>0)
for(unsigned int i=0;i<8-l;i++)Write(0);
}
void HuffmanTree::Read(unsigned int &bit) //从infp中读出一个比特
{
if(buf.bits==0){
buf.ch=fgetc(infp);
buf.bits=8;
}
bit=(buf.ch & 128)>>7;
buf.ch=buf.ch<<1;
buf.bits--;
}
void HuffmanTree::Read(unsigned int &num,unsigned int k) //从infp中读出k个比特
{
unsigned int bit;
num=0;
for(unsigned int i=0;i<k;i++){
Read(bit);
num=(num<<1)+bit;
}
}
int HuffmanTree::NToBits(unsigned int num) //0~num之间的整数用二进位表示所需的位数
{
unsigned int l=0,power=1;
while(power<=num){
l++;power=power*2;
}
return l;
}
void HuffmanTree::CreateFromCodeFile() //由编码文件中存储的树结构建立Huffman树
{
buf.bits=0; //清空缓冲区
buf.ch=0;
unsigned int num,l,i;
rewind(infp);
fread(&size,sizeof(unsigned long),1,infp);
Read(count,8);
count=count+1;
for(i=1;i<=count;i++)
fread(&Leaf[i],sizeof(char),1,infp);
l=NToBits(2*count-1);
for(i=1;i<=count;i++){
HT[i].lchild=0;
HT[i].rchild=0;
}
for(i=count+1;i<=2*count-1;i++){
HT[i].lchild=(Read(num,l),num);
HT[i].rchild=(Read(num,l),num);
}
}
void HuffmanTree::CreateFromSourceFile()
//由被压缩文件建立Huffman树,将树结构存入编码文件的文件头部中,并求每个字符的Huffman编码
{
Stat();//统计字符出现频度并过滤掉频度为零的字符
//由被压缩文件建立Huffman树
unsigned int i,s1,s2;
for(i=1;i<=count;i++)HT[i].parent=HT[i].lchild=HT[i].rchild=0;
for(i=count+1;i<=2*count-1;i++){//建立Huffman树
Select(i-1,s1,s2); //选择parent为0,权值最小的两个结点s1,s2
HT[s1].parent=HT[s2].parent=i;
HT[i].parent=0;HT[i].lchild=s1;HT[i].rchild=s2;
HT[i].weight=HT[s1].weight+HT[s2].weight;
}
//将树结构存入编码文件的文件头部中
unsigned int l;
buf.bits=0; //清空缓冲区
buf.ch=0;
rewind(outfp);
fwrite(&size,sizeof(unsigned int),1,outfp);
Write(count-1,8);
for(i=1;i<=count;i++)
fwrite(&Leaf[i],sizeof(char),1,outfp);
l=NToBits(2*count-1);
for(i=count+1;i<=2*count-1;i++){
Write(HT[i].lchild,l);
Write(HT[i].rchild,l);
}
//求每个字符的Huffman编码
unsigned int start,c,f;
char *cd; //编码临时变量
for(i=1;i<=n;i++)
if(HuffmanCode[i]!=NULL){
delete []HuffmanCode[i]; //释放存储空间
HuffmanCode[i]=NULL;
}
cd=new char[count]; //分配求编码的工作空间
cd[count-1]='\0'; //编码结束符
for(i=1;i<=count;i++){ //逐位求Huffman编码
start=count-1; //编码结束符位置
for(c=i,f=HT[i].parent;f!=0;c=f,f=HT[c].parent) //从叶到根求编码
if(HT[f].lchild==c)cd[--start]='0';
else cd[--start]='1';
HuffmanCode[i]=new char[count-start]; //为第i个字符编码分配空间
strcpy(HuffmanCode[i],&cd[start]); //从cd复制编码到HuffmanCode
}
delete []cd;
}
文件名:LK_stack.h
template<class Node_entry>
struct Node {
Node_entry entry; //数据成员
Node<Node_entry> *next;
Node(); //构造
Node(Node_entry item, Node<Node_entry> *add_on = NULL);
};
template<class Stack_entry>
class Stack {
public: // 标准栈声明方法
Stack();
bool empty() const;
Error_code push(const Stack_entry &item);
Error_code pop();
Error_code top(Stack_entry &item) const;
void clear();
~Stack();
Stack(const Stack<Stack_entry> &original);
void operator =(const Stack<Stack_entry> &original);
protected:
Node<Stack_entry> *top_node;
};
template<class Node_entry>
Node<Node_entry>::Node()
{
next = NULL;
}
template<class Node_entry>
Node<Node_entry>::Node(Node_entry item, Node<Node_entry> *add_on)
{
entry = item;
next = add_on;
}
template<class Stack_entry>
Stack<Stack_entry>::Stack()
{
top_node=NULL;
}
template<class Stack_entry>
bool Stack<Stack_entry>::empty() const
{
if(top_node==NULL)
return true;
else
return false;
}
template<class Stack_entry>
Error_code Stack<Stack_entry>::push(const Stack_entry &item)
{
Node<Stack_entry> *new_top = new Node<Stack_entry>(item, top_node);
if (new_top == NULL) return overflow;
top_node = new_top;
return success;
}
template<class Stack_entry>
Error_code Stack<Stack_entry>::pop()
{
Node<Stack_entry> *old_top = top_node;
if (top_node == NULL) return underflow;
top_node = old_top->next;
delete old_top;
return success;
}
template<class Stack_entry>
Error_code Stack<Stack_entry>::top(Stack_entry &item) const
{
if(empty())
return underflow;
else{
item=top_node->entry;
return success;
}
}
template<class Stack_entry>
void Stack<Stack_entry>::clear() // 清除栈
{
while (!empty())
pop();
}
template<class Stack_entry>
Stack<Stack_entry>::~Stack() // 析构函数 删除栈
clear();
}
template<class Stack_entry>
Stack<Stack_entry>::Stack(const Stack<Stack_entry> &original) // copy constructor
{
Node<Stack_entry> *new_copy, *original_node = original.top_node;
if (original_node == NULL) top_node = NULL;
else
{ // 复制连接结点
top_node = new_copy = new Node<Stack_entry>(original_node->entry);
while (original_node->next != NULL)
{
original_node = original_node->next;
new_copy->next = new Node<Stack_entry>(original_node->entry);
new_copy = new_copy->next;
}
}
}
template<class Stack_entry>
void Stack<Stack_entry>::operator = (const Stack<Stack_entry> &original) // Overload assignment
{
Node<Stack_entry> *new_top, *new_copy, *original_node = original.top_node;
if (original_node == NULL) new_top = NULL;
else
{
new_copy = new_top = new Node<Stack_entry>(original_node->entry);
while (original_node->next != NULL)
{
original_node = original_node->next;
new_copy->next = new Node<Stack_entry>(original_node->entry);
new_copy = new_copy->next;
}
}
while (!empty())
pop();
top_node = new_top;
}
文件名;Utility.h
#include<string.h> //standard string operations
#include<iostream.h> //standard iostream operations
#include<limits.h> //numeric limits
#include<math.h> //mathematical functions
#include<fstream.h> //file input and output
#include<ctype.h> //character classification
#include<time.h> //date and time function
#include<conio.h> //con input and output
#include<stdlib.h> //standard libray
#include<stdio.h> //standard I/O libray
enum Error_code{success,fail,underflow,overflow,range_error};
//enum bool{false,true};
六、运行结果:
实验数据压缩前形态为:we.txt内容为:we are you 123495kfj/.';'
压缩后实验数据用Microsoft Visual c++查看we.cpp:
还原后数据为ww.txt : we are you 123495kfj/.';'
由此可见程序完成了数据的无损压缩和解压。
七、实验运行情况分析(包括算法、运行结果、运行环境等问题的讨论)。
程序虽然很精简,但可以可以实现数据的压缩和解压,把一个文件压缩后,可以解压为其他的格式,所以这也可以作为一些文件的格式转换器。但是本程序也存在着不足,不能够压缩整个文件夹,当然也不能解压那些被压缩了的文件。只能解压Huffman压缩的文件。否则会出现错误。