最简单文件压缩程序huffman

   正在恶补数据结构,今天看到了二叉树,huffman编码,发现压缩程序很有意思,就按照huffman的编码思想实现了一个,没有经过算法改进,但是没有用第三方库,还能压缩一点空间,花了一天写完的。编码效率还是很菜。

 

只要调用compress  和decompress就可以压缩,解压,当然不是zip和rar,离他们还差十万八千里啊!

 

 

#include <iostream>

#define LEFT 0
#define RIGHT 1

using namespace std;

typedef struct huffmanTreetype{
    huffmanTreetype(){
       weight = 0;
       huffmancode = NULL;
       codelen = 0;
       list_next = NULL;
       list_prev = NULL;
       parent = NULL;
       left_child = NULL;
       right_child = NULL;   
       isleafnode = true;
       l_or_r = 2;
       frequency = 0;
    }    
    void printCode(){
       cout<<ch<<"  huffmancode:";
       for(int i = 0 ;i < codelen; i++){
           cout<<(int)huffmancode[i];
       }       
       cout<<endl;
    }    
    char *huffmancode;
    char codelen;
    char l_or_r;
    unsigned char ch;
    bool isleafnode;
    float weight;
    int frequency;
    struct huffmanTreetype *list_next,*list_prev,*parent,*left_child,*right_child;
    
}huffmanTree;    

huffmanTree *leaflist_header = NULL; //叶子节点头
huffmanTree *listtree_header = NULL; //huffman树 
int compress_len = 0;  //压缩后字节总长 
int compresscount = 0;
float compressprogress = 0; //压缩进度 

//加入链表末尾 
void appendList(huffmanTree *const header,huffmanTree *node){
    huffmanTree *iterator = header;
    while(iterator->list_next != NULL){
        iterator = iterator->list_next;
    }    
    iterator->list_next = node;
    node->list_prev = iterator;
    node->list_next = NULL;
}    

//断开链表中的元素,但不销毁
huffmanTree*  cutElement(huffmanTree *const header,huffmanTree *node){
    huffmanTree *iterator = header;
    huffmanTree *newheader = header;
    bool modify = false;
    while(iterator != NULL){
        if(iterator == node){
            modify = true;
            huffmanTree *node_prev = iterator->list_prev;
            huffmanTree *node_next = iterator->list_next;
            if(node_prev != NULL){
               node_prev->list_next = node_next;   
            }    
            if(node_next != NULL){
               node_next->list_prev = node_prev;   
            }    
            if(iterator == header){
               newheader = iterator->list_next;
               newheader->list_prev = NULL;
            }    
        }
        iterator = iterator->list_next;    
    }
    //把节点从链表中完全断开 
    if(modify){
        node->list_prev = NULL;
        node->list_next = NULL;    
    }        
    return newheader;
}    

//链表长度 
int listlength(huffmanTree *const header){
    huffmanTree *iterator = header;
    int len = 0;
    while(iterator != NULL){
        len++;
        iterator = iterator->list_next;
    }    
    return len;
}    

//打印链表 
void printlist(huffmanTree *const header){
    huffmanTree *iterator = header;
    while(iterator != NULL){
        cout<<"("<<iterator->ch<<" "<<iterator->weight<<")  ";
        iterator = iterator->list_next;
    }    
    cout<<endl;
}    

//先序取出叶子节点 
void preOrderTree(huffmanTree *rootnode){
   if(rootnode != NULL){
      if(rootnode->isleafnode){
          //cout<<rootnode->ch<<"  ";
         //如果为叶子节点,把它加入叶子链表中
          if(leaflist_header == NULL){
              leaflist_header = rootnode;
          }else{
              appendList(leaflist_header,rootnode);   
          }        
      }    
      preOrderTree(rootnode->left_child);
      preOrderTree(rootnode->right_child);   
   }    
}    

//设置huffman编码
void setfuffmanCode(huffmanTree *const listheader){
    huffmanTree *list_iterator = listheader;
    huffmanTree *tree_iterator = NULL;
    while(list_iterator != NULL){
        //左支为0,右支为1 
        tree_iterator = list_iterator;
        //首先要计算编码有多少位 
        while(tree_iterator->parent != NULL){
            list_iterator->codelen++;
            tree_iterator = tree_iterator->parent;
        }//tree while    
        //为code分配空间
        list_iterator->huffmancode = new char[list_iterator->codelen];
        tree_iterator = list_iterator;
        
        compress_len += (list_iterator->frequency * list_iterator->codelen);
        for(int i = list_iterator->codelen - 1;i >= 0 ;i--){
            list_iterator->huffmancode[i] = tree_iterator->l_or_r;
            tree_iterator = tree_iterator->parent;
        }        
        //list_iterator->printCode();
        
        list_iterator = list_iterator->list_next;
    } //list while   
    int leavetemp = compress_len%8;
    compress_len = compress_len/8;
    if(leavetemp != 0){
       compress_len ++;    
    }
}     

//选中剩下节点中两个最小的 
huffmanTree* findLasttwo(huffmanTree * header,huffmanTree **lasttwo){
    huffmanTree *iterator = header;
    lasttwo[0] = iterator;
    iterator = iterator->list_next;
   
    while(iterator != NULL){
        if(iterator->weight < lasttwo[0]->weight){
            lasttwo[0] = iterator;
        }    
        iterator = iterator->list_next;
    }    //end while
    //找倒数第二的 ,先解除最小的,最后加上最小的 
    header = cutElement(header,lasttwo[0]);
    iterator = header;
    lasttwo[1] = iterator;
    iterator = iterator->list_next;
     
    while(iterator != NULL){
        
        if(iterator->weight < lasttwo[1]->weight){  
             lasttwo[1] = iterator;
        }    
        
        iterator = iterator->list_next;
    }    //end while
    
    appendList(header,lasttwo[0]);
    return header;
}    

//根据统计数据创建huffman树 
void createHuffTree(int *statistics){
     //构造huffman树 
   int total_frequency = 0;
   for(int i = 0 ;i < 256 ;i++){
       total_frequency += statistics[i];
   }    
   for(int i = 0 ;i < 256 ;i++){
      if(statistics[i] != 0){ 
        //计算权值 
        huffmanTree *node = new huffmanTree;
        node->ch = i;
        node->frequency = statistics[i];     
        node->weight = statistics[i]/(float)total_frequency; 
        if(listtree_header == NULL){
            listtree_header = node;   
        }else{    
            appendList(listtree_header,node);
        }    
        //cout<<(char)i<<": weight  "<<node->weight<<endl;
      }    
   }    
   
   //直到链表中只有一个元素才停止构造树
   //选中剩下节点中两个权值非0最小的构造新节点 
   huffmanTree* lasttwo[2];
   //printlist(listtree_header);
   
   while(listlength(listtree_header) >= 2){//链表中至少有两个元素 
       listtree_header = findLasttwo(listtree_header,lasttwo);
       //cout<<"listlen:"<<listlength(list_header)<<endl;
       //cout<<"last two 0 :"<<lasttwo[0]->ch<<" 1:"<<lasttwo[1]->ch<<endl;
       //开始生成树结构 
       huffmanTree *node = new huffmanTree;
       lasttwo[0]->parent = node;
       lasttwo[0]->l_or_r = LEFT;
       lasttwo[1]->parent = node;
       lasttwo[1]->l_or_r = RIGHT;
       node->ch = '#';
       node->isleafnode = false;
       node->weight = lasttwo[0]->weight + lasttwo[1]->weight;
       node->left_child = lasttwo[0];
       node->right_child = lasttwo[1];
           
       //将链表重新链接好,可能把链表头都合并了 
       appendList(listtree_header,node);
       listtree_header = cutElement(listtree_header,lasttwo[0]);
       listtree_header = cutElement(listtree_header,lasttwo[1]);
       //printlist(listtree_header);
       
   }     
}    

int findCode(huffmanTree *const header,unsigned char src_byte,char **code){
    huffmanTree *iterator = header;
    while(iterator != NULL){
        if(iterator->ch == src_byte){
           *code = iterator->huffmancode;
           
           return iterator->codelen;
        }   
         iterator = iterator->list_next;
    }    
    return 0;
}    

#define setbit(x,y) x|=(1<<y) //将X的第Y位置1
#define getbit(x,y) (x&(1<<y))>>y        //读取x的第y位 
//由huffman压缩数据
void huffCompress(huffmanTree *const listheader,unsigned char *src,int src_len,unsigned char *dest,int dest_len){
    //扫描源数组,找到相应编码,写入位中
    //一次编码不够8bit记录下来,下次接着写 
    // 
    char *codeaddr;
    int destbitindex = 0;       //总bit索引 
    int destbyteindex = 0;      //byteindex由bitindex计算得来 
    for(int i = 0 ;i < src_len ;i++){
         compresscount++;
         compressprogress = compresscount/(float)src_len;
         
         if(compresscount%500000 == 0){
    
            cout<<"progress:"<<compressprogress * 100<<"%"<<endl;
            
         }    
         int codelen = findCode(listheader,src[i],&codeaddr);
         //先把上次遗留的bit位填满 
         int codebitscount = 0;
         //填写上次空位,从左边高位开始 
         for(int j = 0 ;j < codelen;j++){
             if(codeaddr[codebitscount++] == 1){
                 destbyteindex = destbitindex / 8;
                 int bitoffset = destbitindex % 8;  //离左边的偏移量 
                 setbit(dest[destbyteindex],7 - bitoffset);
                 
             }    
             destbitindex++;
         }    
    }    
}     

//传进huff树,源二进制串,目标串,目标串长度(此即解压后的长度) 
void huffDecompress(huffmanTree *const treeheader,unsigned char *binary_src,int src_len,unsigned char *dest,int dest_len){
    int bitindex = 0;
    int byteindex = 0;
    int destbyteindex = 0;
    huffmanTree *iterator = treeheader;
   // cout<<"数字:"; 
    //for(int i = 0 ;i < src_len;i++){
       //int a = binary_src[i];
       //char binbuf[32]; //存储二进制字串的空间
       //printf("%s", itoa(a, binbuf, 2)); //最后一个参数2表示2进制 
    //}   
    //cout<<endl<<"取数:";
    while(true){
       
       if(iterator->isleafnode){
           //查到了叶子节点 
           dest[destbyteindex] = iterator->ch;
           iterator = treeheader; //迭代器归位 
           destbyteindex++;
           if(destbyteindex == dest_len)break;    
       }    
       
       byteindex = bitindex / 8;
       int bitoffset = bitindex % 8; //离左边的偏移量 
       //读取源二进制串的一位,直到找到叶子节点 
       int srcbit = getbit(binary_src[byteindex],7 - bitoffset);
       //cout<<srcbit;
       if(srcbit == 0){
           iterator = iterator->left_child;  //左节点 
       }
       if(srcbit == 1){
           iterator = iterator->right_child;
       }        
       
       bitindex++; 
    }    
}     

//压缩文件
void compressFile(char *srcfilename){
    int srcfilenamelen = strlen(srcfilename);
    char compressname[srcfilenamelen + 15];
    compressname[0] = '\0';
    char suffix[] = ".huffman-YU";
    strcat(compressname,srcfilename);
    strcat(compressname,suffix);
    FILE *srcfile = fopen(srcfilename,"rb+");
    fseek(srcfile,0,SEEK_END);
    int file_len = ftell(srcfile);
    fseek(srcfile,0,SEEK_SET);
    unsigned char * filememory = new unsigned char[file_len];
    fread(filememory,1,file_len,srcfile);
    int statistics[256];
    for(int i = 0 ;i < 256 ;i++){
       statistics[i] = 0;
    }    
    for(int i = 0 ;i < file_len ;i++){
           statistics[filememory[i]]++;
    }    
    
     
    //用统计数据创建huffman树 
    createHuffTree(statistics);
         
    //给叶子节点编码 
    //先把叶子节点取出 
    preOrderTree(listtree_header);
    //cout<<endl<<"leaf:"<<endl;
    //cout<<"listlen:"<<listlength(leaflist_header); 
    //printlist(leaflist_header);
    //设置叶子节点的huffman编码 
    setfuffmanCode(leaflist_header);
    //根据huffman编码压缩数据 
    unsigned char * compress_dest = new unsigned char[compress_len];
    //清零
    for(int i = 0 ;i < compress_len;i++){
           compress_dest[i] = 0;
    }     
    
    huffCompress(leaflist_header,filememory,file_len,compress_dest,compress_len);
    cout<<"compress_len:"<<compress_len<<endl;
    //将压缩后的数据写入文件
    //先写文件名长度,文件名,256个统计数据
    FILE * compress_file = fopen(compressname,"wb+"); 
    fwrite(&srcfilenamelen,sizeof(int),1,compress_file);
    fwrite(srcfilename,sizeof(char),srcfilenamelen,compress_file);
    fwrite(statistics,sizeof(int),256,compress_file);
    //写入压缩数据长度和数据
    fwrite(&compress_len,sizeof(int),1,compress_file);
    fwrite(compress_dest,sizeof(char),compress_len,compress_file);
    
    fclose(srcfile); 
    fclose(compress_file);
    
}     

//解压文件
void decompressFile(char *srcfilename){
    cout<<endl<<"解压:"; 
    FILE *srcfile = fopen(srcfilename,"rb+"); 
    //读取文件名
    int filenamelen;
    fread(&filenamelen,sizeof(int),1,srcfile); 
    char *filename = new char[filenamelen + 1];
    fread(filename,sizeof(char),filenamelen,srcfile);
    filename[filenamelen] = '\0';
    //读取统计数据
    int statistics[256];
    int compressdatalen;
    fread(statistics,sizeof(int),256,srcfile);
    fread(&compressdatalen,sizeof(int),1,srcfile);
    unsigned char *compressdata = new unsigned char[compressdatalen];
    fread(compressdata,sizeof(char),compressdatalen,srcfile);
    int decompressdatalen = 0;
    for(int i = 0 ;i < 256; i++){
        decompressdatalen += statistics[i];
    }    
    cout<<"decompressdatalen: "<<decompressdatalen<<endl;
    createHuffTree(statistics); 
    //给叶子节点编码 
    //先把叶子节点取出 
    preOrderTree(listtree_header);
    //设置叶子节点的huffman编码 
    setfuffmanCode(leaflist_header);
    //解压
    unsigned char *decompressdata = new unsigned char[decompressdatalen];
    huffDecompress(listtree_header,compressdata,compressdatalen,decompressdata,decompressdatalen); 
    char attachchars[] = "copy-";
    char copyfilename[100];
    copyfilename[0] = '\0';
    strcat(copyfilename,attachchars);
    strcat(copyfilename,filename);
    cout<<"copyname:"<<copyfilename<<endl;
    FILE *destfile = fopen(copyfilename,"wb+");
    fseek(destfile,0,SEEK_SET);
    fwrite(decompressdata,sizeof(char),decompressdatalen,destfile);
    fclose(srcfile);
    fclose(destfile);
}     

main(){

   //compressFile("iphone.pdf");
   decompressFile("iphone.pdf.huffman-YU");
   
   system("PAUSE");
}    





你可能感兴趣的:(数据结构,算法,Blog,J#)