//*******************************controll.h********************** //*****************压缩命令和解压命令********** #ifndef CONTROLL_H #define CONTROLL_H class Controll //:public HuffmanTree { public: void compress_file(const char *,const char *,const char *); void uncompress_file(const char *,const char *,const char *); void freFromFile(const char *,char **,char *,int *); int powmy(int,int); }; #endif //controll.h end
//*******************************huffman.h********************** //*****************叶子结点为n的哈夫曼树共有2n-1个结点********** #ifndef HUFFMAN_H #define HUFFMAN_H class HuffmanNode { public: char info; //结点信息 double weight; //结点权值 int parent, lchild, rchild; //父亲结点,左右孩子结点 HuffmanNode() { parent=lchild=rchild=-1; } HuffmanNode(const char &data, const double &wt, const int &pa=-1, const int &lch=-1, const int &rch=-1) { info=data; weight=wt; parent=pa; lchild=lch; rchild=rch; } }; //class HuffmanNode end /*现在我把它调出来为的就是在controlll类当中使用这个类*/ class Code { //HuffmanTree的私有类,编码类 public: Code():length(10) { ptr=new char[length]; } ~Code() { delete[] ptr; } char *ptr; const int length; }; class HuffmanTree { public: HuffmanTree(const int &s=100) { maxSize=(s>100?s:100); arrayTree=new HuffmanNode[maxSize]; currentSize=0; codeArray=0; } ~HuffmanTree() { delete[] arrayTree; if (codeArray!=0) delete[] codeArray; } void run(const char*, const char*); int getSumBytes();//定义待压缩文件的总bytes数 int currentSize; //当前数组大小 HuffmanNode *arrayTree; //哈夫曼结点数组 Code *codeArray; //数组大小为currentSize int sum_bits;//定义压缩后文件的总bit数 private: int maxSize; //数组最大值 //int sum_bytes; void insert(const char&, const double&); //插入结点 void createHuffmanTree(); //创建哈夫曼树 void createHuffmanCode(); //创建哈夫曼编码 void writeCodeToFile(const char *);//将Huffman编码写入到词频表文件当中 int findPosition(const char &) const; //返回字符在arrayTree[]中的位置 int isEqual(const char *s) const; //判断s是否存在于编码系统中,若存在则返回s在编码系统中的位置,否则返回-1 void reverse(char arr[]); }; //class HuffmanTree end #endif //huffman.h end
//**************************controll.cpp********************** #include <iostream> #include <limits> #include <cstring> #include<cstdlib> #include <cstdio> #include "controll.h" #include "huffman.h" using namespace std; int Controll::powmy(int a,int b) { if(b==0) return 1; int i = 0; int result = 1; for(;i<b;i++) { result *=a; } return result; } void Controll::freFromFile(const char *codeFilename,char **HC,char value[],int res[]) { FILE *fe = fopen(codeFilename,"rb"); if(fe == NULL) { printf("打开文件失败!"); return; } int num[10]; int m,i; char * cd = (char *)malloc((100+1)*sizeof(char)); //临时保存的编码文件名 char c; for(i=1;;i++) { c = fgetc(fe); if(c=='#') break; int j = 0; while(c!=' ') { cd[j++] = c; c = fgetc(fe); } cd[j] = '\0'; HC[i] = (char *)malloc((j+1)*sizeof(char)); strcpy(HC[i],&cd[0]); /*以上完成了huffman编码的读出 下面将Huffman编码对应的字符写进codeArray*/ c = fgetc(fe); value[i] = c; fgetc(fe); } int k; c = fgetc(fe); //头一个读取#,后一个才开始读取数据 k = 0; while(c!='#') { num[k++] = c-'0'; c = fgetc(fe); } m = 0; res[0] = 0; for(k=k-1;k>=0;k--) { //printf("powmy(10,m)=%d ",powmy(10,m)); res[0]+=(num[k]*powmy(10,m)); //printf("%d sum_bit=%d m=%d ",num[k],sum_bit,m); m = m + 1; } printf("sum_bits=%d ",res[0]); c = fgetc(fe); c = fgetc(fe);//头一个读取#,后一个才开始读取数据 k = 0; while(c!='#') { num[k++] = c-'0'; c = fgetc(fe); } m = 0; res[1] = 0; for(k=k-1;k>=0;k--) { res[1]+=num[k]*powmy(10,m); m++; } printf("currentSize =%d\n",res[1]); fclose(fe); //果然是这个问题的原因导致的。 没有关闭文件会导致出错 /* fclose(fe); FILE *fww = fopen("C:\\out4.txt","wb");//打开词频表文件 for(i=0;i<currentSize;i++) { fprintf(fww,"%s %c\n",HC[i],value[i]); } printf("hehe"); fprintf(fww,"#%d#",sum_bit); fprintf(fww,"#%d#",currentSize);*/ } void Controll::compress_file(const char *sourceFilename,const char *codeFilename,const char *geneFilename) { HuffmanTree tree(620); tree.run(sourceFilename,codeFilename); FILE *fo = fopen(sourceFilename,"rb"); FILE *fw = fopen(geneFilename,"wb"); if(fo == NULL || fw == NULL) { printf("文件打开失败!"); return; } int aa = 0; int sum_bytes = tree.getSumBytes(); //得到文件的总字节数,用于计算压缩百分比 int sum = 0; //用于计算八位的值,从而写进压缩文件当中 int i,flag = 0,j,k=0; int temp[1000]; memset(temp,0,sizeof(temp)); printf("before compress sumbytes=%d after compress sumytes=%d\n",sum_bytes,tree.sum_bits/8); printf("The compress efficiency is %4.2f%%\n",(double)tree.sum_bits/8*1.0/(sum_bytes*1.0)*100); while(!feof(fo)) { sum = 0; char one_byte = fgetc(fo); aa++; for(i=0;i<tree.currentSize;i++) { if(one_byte == tree.arrayTree[i].info) { flag +=strlen(tree.codeArray[i].ptr); int len = strlen(tree.codeArray[i].ptr); if(flag<8) { for(j=0;j<len;j++) temp[k++] = tree.codeArray[i].ptr[j] - '0'; } else if(flag>=8) { for(j=0;k<8;j++) temp[k++] = tree.codeArray[i].ptr[j] - '0'; for(;j<len;j++) temp[k++] = tree.codeArray[i].ptr[j] - '0'; sum+=temp[0]*128+temp[1]*64+temp[2]*32+temp[3]*16+temp[4]*8 +temp[5]*4+temp[6]*2+temp[7]*1; for(j=0;j<8;j++) temp[j] = 0; for(j=8;j<k;j++) temp[j-8] = temp[j]; k = flag = j-8; char c = sum; fputc(c,fw); if(aa%1000==0) { printf("\r%4.2f%%",(double)aa/sum_bytes*100.0); } fflush(fw); i = tree.currentSize+1; } } } } aa = sum_bytes; printf("\r%4.2f%%",(double)aa/sum_bytes*100.0); //printf("压缩成功!"); /*考虑到最后可能没有凑够八位的情况*/ if(flag) { sum+=temp[0]*128+temp[1]*64+temp[2]*32+temp[3]*16+temp[4]*8 +temp[5]*4+temp[6]*2+temp[7]*1; char c = sum; fputc(c,fw); fflush(fw); } fclose(fw); fclose(fo); } void Controll::uncompress_file(const char *geneFilename,const char *codeFilename,const char *backFilename) { char **HC = (char**)malloc(260*sizeof(char*));//用于保存从文件当中读取的huffman编码 char value[270]; int res[2]; //果然,还是数组地址符比较好的 关于整形的值如何变化,我记得我也是实验过的,是可以的 freFromFile(codeFilename,HC,value,res); int sum_bits = res[0]; int currentSize = res[1]; FILE *fo = fopen(geneFilename,"rb"); FILE *fw = fopen(backFilename,"wb"); if(fo==NULL || fw==NULL) { printf("文件打开失败!"); return; } char str[1000]; int i,j,k,temp = 0; int index; int sum_bit2 = sum_bits; int num[10]; while(!feof(fo)) { if(sum_bit2<0) break; sum_bit2 -=8; int data = fgetc(fo); if(data == -1) break; if(sum_bit2<0) { index = 0-sum_bit2; } else { index = 0; } memset(num,0,sizeof(num)); /*这是可以综合出一个函数的*/ i = 0; while(data) { num[i++] = data%2; data = data/2; } i = temp; for(k=7;k>=index;i++,k--) { if(num[k]) str[i] = '1'; else str[i] = '0'; str[i+1] ='\0'; for(j=1;j<=currentSize;j++) { if(strcmp(str,HC[j])==0) { fputc(value[j],fw); if((sum_bits-sum_bit2)%8000==0) { //cout<<'\r'<<(double)(sum_bits-sum_bit2)/sum_bits*100.0<<'%%'; printf("\r%4.2f%%",(double)(sum_bits-sum_bit2)/sum_bits*100.0); } fflush(fw); j = currentSize+1; i = -1; } } } if(i) { temp = i; } else { temp = 0; } } sum_bit2 = 0; printf("\r%4.2f%%",(double)(sum_bits-sum_bit2)/sum_bits*100.0); fclose(fw); fclose(fo); }
//**************************huffman.cpp********************** #include <iostream> #include <fstream> //for ofstream ifstream #include <limits> //for numeric_limits<double>::max() #include <cstdlib> //for exit() #include <cstring> //for strlen(), strcpy(), strcmp() #include <cstdio> #include "huffman.h" using namespace std; void HuffmanTree::insert(const char &data, const double &wt) { //插入结点 if (2*currentSize-1 >= maxSize) //叶子结点为n的哈夫曼树共有2n-1个结点 return; arrayTree[currentSize].info=data; arrayTree[currentSize].weight=wt; currentSize++; } /*将词频表存入相应的文件当中*/ void HuffmanTree::writeCodeToFile(const char *outFilename) { int i; FILE *fw = fopen(outFilename,"wb");//打开词频表文件 for(i=0;i<currentSize;i++) { fprintf(fw,"%s %c\n",codeArray[i].ptr,arrayTree[i].info); } sum_bits = 0; for(i=0;i<currentSize;i++) { sum_bits += arrayTree[i].weight*strlen(codeArray[i].ptr); } fprintf(fw,"#%d#",sum_bits); fprintf(fw,"#%d#",currentSize); fclose(fw); } int HuffmanTree::getSumBytes() { int sum_bytes = 0; int i = 0; for(i=0;i<currentSize;i++) { sum_bytes+=arrayTree[i].weight; } return sum_bytes; } void HuffmanTree::reverse(char arr[]) { //反转字符串 const int len=strlen(arr); char *p; p=new char[len+1]; strcpy(p, arr); p[len]='\0'; int k=0; for (int i=len-1; i>=0; i--) arr[i]=p[k++]; arr[len]='\0'; delete[] p; } int HuffmanTree::findPosition(const char &ch) const { //返回字符ch在arrayTree[]中的位置 for (int i=0; i<currentSize; i++) if (arrayTree[i].info == ch) return i; return -1; } int HuffmanTree::isEqual(const char *s) const { //判断s的编码是否存在,若存在返回编码在数组codeArray[]中的位置,否则返回-1 for (int i=0; i<currentSize; i++) if (strlen(s) == strlen(codeArray[i].ptr)) //可以去掉此行 if (strcmp(s, codeArray[i].ptr) == 0) return i; return -1; } void HuffmanTree::createHuffmanTree() { //构造huffmanTree int i=currentSize; int k; double wt1, wt2; int lnode = 0, rnode = 0; while (i < 2*currentSize-1) { wt1=wt2=numeric_limits<double>::max(); k=0; while (k < i) { if (arrayTree[k].parent==-1) { if (arrayTree[k].weight<wt1) { wt2=wt1; rnode=lnode; wt1=arrayTree[k].weight; lnode=k; } else if (arrayTree[k].weight<wt2) { wt2=arrayTree[k].weight; rnode=k; } } k++; } arrayTree[i].weight = arrayTree[lnode].weight+arrayTree[rnode].weight; arrayTree[i].lchild=lnode; arrayTree[i].rchild=rnode; arrayTree[lnode].parent=arrayTree[rnode].parent=i; i++; } } void HuffmanTree::createHuffmanCode() { //构造huffmanCode,即哈夫曼编码 codeArray=new Code[currentSize]; int i=0; int k, n, m; while (i < currentSize) { k = arrayTree[i].parent; n=0; m=i; while (k!=-1 && k<currentSize*2-1) { if (arrayTree[k].lchild==m) codeArray[i].ptr[n++]='0'; else if (arrayTree[k].rchild==m) codeArray[i].ptr[n++]='1'; m=k; k=arrayTree[m].parent; } codeArray[i].ptr[n]='\0'; reverse(codeArray[i].ptr); //反转字符串,使之变成正确的编码 i++; } } void HuffmanTree::run(const char *inFilename,const char *outFilename) { //run函数的实现 FILE *fo = fopen(inFilename,"rb");//读入待压缩文件 if(fo==NULL) { cerr<<"\""<<inFilename<<"\" could not open."<<endl; exit(1); } char ch; int pos; // 从文件当中读入字符,并且统计字符个数 ch = fgetc(fo); while(!feof(fo)) { //printf("hehe "); pos = findPosition(ch); if (pos != -1) arrayTree[pos].weight++; else insert(ch, 1); ch = fgetc(fo); } cout<<endl; createHuffmanTree(); //构造huffman树 createHuffmanCode(); //对统计字符进行编码 writeCodeToFile(outFilename); //将编码表存入文件 fclose(fo); } //huffman.cpp end
//*****************************main.cpp************************* #include "huffman.h" #include "controll.h" #include<cstring> int main(int argc,char **argv) { const char *codeFileName ="C:\\out1.txt"; //词频表文件名 Controll controller; //执行压缩命令 if(strcmp("-c",argv[1])==0) { controller.compress_file(argv[2],codeFileName,argv[3]); } //执行解压命令 else if(strcmp("-u",argv[1])==0) { controller.uncompress_file(argv[2],codeFileName,argv[3]); } return 0; } //main.cpp end