正在恶补数据结构,今天看到了二叉树,huffman编码,发现压缩程序很有意思,就按照huffman的编码思想实现了一个,没有经过算法改进,但是没有用第三方库,还能压缩一点空间,花了一天写完的。编码效率还是很菜。
只要调用compress 和decompress就可以压缩,解压,当然不是zip和rar,离他们还差十万八千里啊!
#include <iostream> #define LEFT 0 #define RIGHT 1 using namespace std; typedef struct huffmanTreetype{ huffmanTreetype(){ weight = 0; huffmancode = NULL; codelen = 0; list_next = NULL; list_prev = NULL; parent = NULL; left_child = NULL; right_child = NULL; isleafnode = true; l_or_r = 2; frequency = 0; } void printCode(){ cout<<ch<<" huffmancode:"; for(int i = 0 ;i < codelen; i++){ cout<<(int)huffmancode[i]; } cout<<endl; } char *huffmancode; char codelen; char l_or_r; unsigned char ch; bool isleafnode; float weight; int frequency; struct huffmanTreetype *list_next,*list_prev,*parent,*left_child,*right_child; }huffmanTree; huffmanTree *leaflist_header = NULL; //叶子节点头 huffmanTree *listtree_header = NULL; //huffman树 int compress_len = 0; //压缩后字节总长 int compresscount = 0; float compressprogress = 0; //压缩进度 //加入链表末尾 void appendList(huffmanTree *const header,huffmanTree *node){ huffmanTree *iterator = header; while(iterator->list_next != NULL){ iterator = iterator->list_next; } iterator->list_next = node; node->list_prev = iterator; node->list_next = NULL; } //断开链表中的元素,但不销毁 huffmanTree* cutElement(huffmanTree *const header,huffmanTree *node){ huffmanTree *iterator = header; huffmanTree *newheader = header; bool modify = false; while(iterator != NULL){ if(iterator == node){ modify = true; huffmanTree *node_prev = iterator->list_prev; huffmanTree *node_next = iterator->list_next; if(node_prev != NULL){ node_prev->list_next = node_next; } if(node_next != NULL){ node_next->list_prev = node_prev; } if(iterator == header){ newheader = iterator->list_next; newheader->list_prev = NULL; } } iterator = iterator->list_next; } //把节点从链表中完全断开 if(modify){ node->list_prev = NULL; node->list_next = NULL; } return newheader; } //链表长度 int listlength(huffmanTree *const header){ huffmanTree *iterator = header; int len = 0; while(iterator != NULL){ len++; iterator = iterator->list_next; } return len; } //打印链表 void printlist(huffmanTree *const header){ huffmanTree *iterator = header; while(iterator != NULL){ cout<<"("<<iterator->ch<<" "<<iterator->weight<<") "; iterator = iterator->list_next; } cout<<endl; } //先序取出叶子节点 void preOrderTree(huffmanTree *rootnode){ if(rootnode != NULL){ if(rootnode->isleafnode){ //cout<<rootnode->ch<<" "; //如果为叶子节点,把它加入叶子链表中 if(leaflist_header == NULL){ leaflist_header = rootnode; }else{ appendList(leaflist_header,rootnode); } } preOrderTree(rootnode->left_child); preOrderTree(rootnode->right_child); } } //设置huffman编码 void setfuffmanCode(huffmanTree *const listheader){ huffmanTree *list_iterator = listheader; huffmanTree *tree_iterator = NULL; while(list_iterator != NULL){ //左支为0,右支为1 tree_iterator = list_iterator; //首先要计算编码有多少位 while(tree_iterator->parent != NULL){ list_iterator->codelen++; tree_iterator = tree_iterator->parent; }//tree while //为code分配空间 list_iterator->huffmancode = new char[list_iterator->codelen]; tree_iterator = list_iterator; compress_len += (list_iterator->frequency * list_iterator->codelen); for(int i = list_iterator->codelen - 1;i >= 0 ;i--){ list_iterator->huffmancode[i] = tree_iterator->l_or_r; tree_iterator = tree_iterator->parent; } //list_iterator->printCode(); list_iterator = list_iterator->list_next; } //list while int leavetemp = compress_len%8; compress_len = compress_len/8; if(leavetemp != 0){ compress_len ++; } } //选中剩下节点中两个最小的 huffmanTree* findLasttwo(huffmanTree * header,huffmanTree **lasttwo){ huffmanTree *iterator = header; lasttwo[0] = iterator; iterator = iterator->list_next; while(iterator != NULL){ if(iterator->weight < lasttwo[0]->weight){ lasttwo[0] = iterator; } iterator = iterator->list_next; } //end while //找倒数第二的 ,先解除最小的,最后加上最小的 header = cutElement(header,lasttwo[0]); iterator = header; lasttwo[1] = iterator; iterator = iterator->list_next; while(iterator != NULL){ if(iterator->weight < lasttwo[1]->weight){ lasttwo[1] = iterator; } iterator = iterator->list_next; } //end while appendList(header,lasttwo[0]); return header; } //根据统计数据创建huffman树 void createHuffTree(int *statistics){ //构造huffman树 int total_frequency = 0; for(int i = 0 ;i < 256 ;i++){ total_frequency += statistics[i]; } for(int i = 0 ;i < 256 ;i++){ if(statistics[i] != 0){ //计算权值 huffmanTree *node = new huffmanTree; node->ch = i; node->frequency = statistics[i]; node->weight = statistics[i]/(float)total_frequency; if(listtree_header == NULL){ listtree_header = node; }else{ appendList(listtree_header,node); } //cout<<(char)i<<": weight "<<node->weight<<endl; } } //直到链表中只有一个元素才停止构造树 //选中剩下节点中两个权值非0最小的构造新节点 huffmanTree* lasttwo[2]; //printlist(listtree_header); while(listlength(listtree_header) >= 2){//链表中至少有两个元素 listtree_header = findLasttwo(listtree_header,lasttwo); //cout<<"listlen:"<<listlength(list_header)<<endl; //cout<<"last two 0 :"<<lasttwo[0]->ch<<" 1:"<<lasttwo[1]->ch<<endl; //开始生成树结构 huffmanTree *node = new huffmanTree; lasttwo[0]->parent = node; lasttwo[0]->l_or_r = LEFT; lasttwo[1]->parent = node; lasttwo[1]->l_or_r = RIGHT; node->ch = '#'; node->isleafnode = false; node->weight = lasttwo[0]->weight + lasttwo[1]->weight; node->left_child = lasttwo[0]; node->right_child = lasttwo[1]; //将链表重新链接好,可能把链表头都合并了 appendList(listtree_header,node); listtree_header = cutElement(listtree_header,lasttwo[0]); listtree_header = cutElement(listtree_header,lasttwo[1]); //printlist(listtree_header); } } int findCode(huffmanTree *const header,unsigned char src_byte,char **code){ huffmanTree *iterator = header; while(iterator != NULL){ if(iterator->ch == src_byte){ *code = iterator->huffmancode; return iterator->codelen; } iterator = iterator->list_next; } return 0; } #define setbit(x,y) x|=(1<<y) //将X的第Y位置1 #define getbit(x,y) (x&(1<<y))>>y //读取x的第y位 //由huffman压缩数据 void huffCompress(huffmanTree *const listheader,unsigned char *src,int src_len,unsigned char *dest,int dest_len){ //扫描源数组,找到相应编码,写入位中 //一次编码不够8bit记录下来,下次接着写 // char *codeaddr; int destbitindex = 0; //总bit索引 int destbyteindex = 0; //byteindex由bitindex计算得来 for(int i = 0 ;i < src_len ;i++){ compresscount++; compressprogress = compresscount/(float)src_len; if(compresscount%500000 == 0){ cout<<"progress:"<<compressprogress * 100<<"%"<<endl; } int codelen = findCode(listheader,src[i],&codeaddr); //先把上次遗留的bit位填满 int codebitscount = 0; //填写上次空位,从左边高位开始 for(int j = 0 ;j < codelen;j++){ if(codeaddr[codebitscount++] == 1){ destbyteindex = destbitindex / 8; int bitoffset = destbitindex % 8; //离左边的偏移量 setbit(dest[destbyteindex],7 - bitoffset); } destbitindex++; } } } //传进huff树,源二进制串,目标串,目标串长度(此即解压后的长度) void huffDecompress(huffmanTree *const treeheader,unsigned char *binary_src,int src_len,unsigned char *dest,int dest_len){ int bitindex = 0; int byteindex = 0; int destbyteindex = 0; huffmanTree *iterator = treeheader; // cout<<"数字:"; //for(int i = 0 ;i < src_len;i++){ //int a = binary_src[i]; //char binbuf[32]; //存储二进制字串的空间 //printf("%s", itoa(a, binbuf, 2)); //最后一个参数2表示2进制 //} //cout<<endl<<"取数:"; while(true){ if(iterator->isleafnode){ //查到了叶子节点 dest[destbyteindex] = iterator->ch; iterator = treeheader; //迭代器归位 destbyteindex++; if(destbyteindex == dest_len)break; } byteindex = bitindex / 8; int bitoffset = bitindex % 8; //离左边的偏移量 //读取源二进制串的一位,直到找到叶子节点 int srcbit = getbit(binary_src[byteindex],7 - bitoffset); //cout<<srcbit; if(srcbit == 0){ iterator = iterator->left_child; //左节点 } if(srcbit == 1){ iterator = iterator->right_child; } bitindex++; } } //压缩文件 void compressFile(char *srcfilename){ int srcfilenamelen = strlen(srcfilename); char compressname[srcfilenamelen + 15]; compressname[0] = '\0'; char suffix[] = ".huffman-YU"; strcat(compressname,srcfilename); strcat(compressname,suffix); FILE *srcfile = fopen(srcfilename,"rb+"); fseek(srcfile,0,SEEK_END); int file_len = ftell(srcfile); fseek(srcfile,0,SEEK_SET); unsigned char * filememory = new unsigned char[file_len]; fread(filememory,1,file_len,srcfile); int statistics[256]; for(int i = 0 ;i < 256 ;i++){ statistics[i] = 0; } for(int i = 0 ;i < file_len ;i++){ statistics[filememory[i]]++; } //用统计数据创建huffman树 createHuffTree(statistics); //给叶子节点编码 //先把叶子节点取出 preOrderTree(listtree_header); //cout<<endl<<"leaf:"<<endl; //cout<<"listlen:"<<listlength(leaflist_header); //printlist(leaflist_header); //设置叶子节点的huffman编码 setfuffmanCode(leaflist_header); //根据huffman编码压缩数据 unsigned char * compress_dest = new unsigned char[compress_len]; //清零 for(int i = 0 ;i < compress_len;i++){ compress_dest[i] = 0; } huffCompress(leaflist_header,filememory,file_len,compress_dest,compress_len); cout<<"compress_len:"<<compress_len<<endl; //将压缩后的数据写入文件 //先写文件名长度,文件名,256个统计数据 FILE * compress_file = fopen(compressname,"wb+"); fwrite(&srcfilenamelen,sizeof(int),1,compress_file); fwrite(srcfilename,sizeof(char),srcfilenamelen,compress_file); fwrite(statistics,sizeof(int),256,compress_file); //写入压缩数据长度和数据 fwrite(&compress_len,sizeof(int),1,compress_file); fwrite(compress_dest,sizeof(char),compress_len,compress_file); fclose(srcfile); fclose(compress_file); } //解压文件 void decompressFile(char *srcfilename){ cout<<endl<<"解压:"; FILE *srcfile = fopen(srcfilename,"rb+"); //读取文件名 int filenamelen; fread(&filenamelen,sizeof(int),1,srcfile); char *filename = new char[filenamelen + 1]; fread(filename,sizeof(char),filenamelen,srcfile); filename[filenamelen] = '\0'; //读取统计数据 int statistics[256]; int compressdatalen; fread(statistics,sizeof(int),256,srcfile); fread(&compressdatalen,sizeof(int),1,srcfile); unsigned char *compressdata = new unsigned char[compressdatalen]; fread(compressdata,sizeof(char),compressdatalen,srcfile); int decompressdatalen = 0; for(int i = 0 ;i < 256; i++){ decompressdatalen += statistics[i]; } cout<<"decompressdatalen: "<<decompressdatalen<<endl; createHuffTree(statistics); //给叶子节点编码 //先把叶子节点取出 preOrderTree(listtree_header); //设置叶子节点的huffman编码 setfuffmanCode(leaflist_header); //解压 unsigned char *decompressdata = new unsigned char[decompressdatalen]; huffDecompress(listtree_header,compressdata,compressdatalen,decompressdata,decompressdatalen); char attachchars[] = "copy-"; char copyfilename[100]; copyfilename[0] = '\0'; strcat(copyfilename,attachchars); strcat(copyfilename,filename); cout<<"copyname:"<<copyfilename<<endl; FILE *destfile = fopen(copyfilename,"wb+"); fseek(destfile,0,SEEK_SET); fwrite(decompressdata,sizeof(char),decompressdatalen,destfile); fclose(srcfile); fclose(destfile); } main(){ //compressFile("iphone.pdf"); decompressFile("iphone.pdf.huffman-YU"); system("PAUSE"); }