本实验构建最优二叉树来实现哈夫曼编码
使用VS2017完成
关于哈夫曼编码的头文件huffman.h
//huffman.h #ifndef HUFFMAN_H #define HUFFMAN_H #define OK 1 #define SIZE 256 struct HTNode { int weight;//权值 int parent;//父节点 int lchild;//左孩子 int rchild;//右孩子 }; typedef HTNode *HuffmanTree;//动态分配数组存储Huffman树 typedef char **HuffmanCode;//动态分配哈夫曼编码表 //void PreorderTraverse(int root, HuffmanTree pHT); int HuffmanCoding(HuffmanCode &pHC, HuffmanTree &pHT); int Select(HuffmanTree pHT, int nSize); void TestHufTree(HuffmanTree pHT); void TestHufCode(int root, HuffmanTree pHT, HuffmanCode pHC); void TestHufTreeN(int root, HuffmanTree pHT); int HfmTree(HuffmanTree &pHT, int *w, int n); #endif
相关实现huffman.cpp
//Huffman.cpp #include#include #include"huffman.h" #pragma warning( disable : 4996) using namespace std; /* void PreorderTraverse(int root, HuffmanTree pHT) { cout << pHT[root].weight << " ";//访问节点 if (pHT[root].lchild)//左孩子 { PreorderTraverse(pHT[root].lchild, pHT); } if (pHT[root].rchild)//右孩子 { PreorderTraverse(pHT[root].rchild, pHT); } } */ int HuffmanCoding(HuffmanCode &pHC, HuffmanTree &pHT) { // pHC = (HuffmanCode)malloc((SIZE + 1) * sizeof(char*)); //无栈非递归遍历 char cd[SIZE] = { '\0' };//记录访问路径 int cdlen = 0;//记录当前路径长度 for (int i = 1; i < 512; i++) { pHT[i].weight = 0;//遍历 Huffman树时用作节点的状态标志 } int p = 2*SIZE-1;//根节点 while (p != 0) { if (pHT[p].weight == 0)//向左 { pHT[p].weight = 1; if (pHT[p].lchild != 0) { p = pHT[p].lchild; cd[cdlen++] = '0'; } else if (pHT[p].rchild == 0)//登记叶子节点的字符编码 { pHC[p] = (char*)malloc((cdlen+1) * sizeof(char)); cd[cdlen] = '\0'; strcpy(pHC[p], cd);//复制编码 } } else if (pHT[p].weight == 1)//向右 { pHT[p].weight = 2; if (pHT[p].rchild != 0)//右孩子为叶子节点 { p = pHT[p].rchild; cd[cdlen++] = '1'; } } else { //退回父节点,编码长度减1 pHT[p].weight = 0; p = pHT[p].parent; --cdlen; } // printf("*"); } return OK; } int Select(HuffmanTree pHT, int nSize) { int minValue = 0x7FFFFFFF;//最小值 int min = 0; //找到最小权值的元素序号 for (int i = 1; i <= nSize; i++) { if (pHT[i].parent == 0 && pHT[i].weight < minValue) { minValue = pHT[i].weight; min = i; } } return min; } void TestHufTree(HuffmanTree pHT) { for (int i = 1; i < 2*SIZE; i++) { printf("pHT[%d]\t%d\t%d\t%d\t%d\n", i, pHT[i].weight, pHT[i].parent,pHT[i].lchild,pHT[i].rchild); } } int HfmTree(HuffmanTree &pHT, int *w, int n) { int m = 2 * n - 1; pHT = (HuffmanTree)malloc((m + 1) * sizeof(HTNode)); if (!pHT) { cerr << "内存分配失败! " << endl; return -1; } //初始化树 HuffmanTree p = pHT + 1;//0号单元不使用 for (int i = 0; i < m; i++) { p->weight = (i < n) ? w[i] : 0; p->parent = 0; p->lchild = 0; p->rchild = 0; p++; } for (int i = n + 1; i <= m; i++) { //第一个最小元素 int s1 = Select(pHT, i - 1);//找出前i-1个中最小元素 pHT[s1].parent = i; //第二个最小元素 int s2 = Select(pHT, i - 1); pHT[s2].parent = i; pHT[i].weight = pHT[s1].weight + pHT[s2].weight; pHT[i].lchild = s1; pHT[i].rchild = s2; } return 0; } void TestHufCode(int root, HuffmanTree pHT, HuffmanCode pHC) { if (pHT[root].lchild == 0 && pHT[root].rchild == 0) { printf("0x%02X %s\n", root - 1, pHC[root]); } if (pHT[root].lchild)//访问左孩子 { TestHufCode(pHT[root].lchild, pHT, pHC); } if (pHT[root].rchild) { TestHufCode(pHT[root].rchild, pHT, pHC); } } void TestHufTreeN(int root, HuffmanTree pHT) { cout << pHT[root].weight << "\t"< "\t"< "\t"< "\n"; if (pHT[root].lchild != 0) { TestHufTreeN(pHT[root].lchild, pHT); } if (pHT[root].rchild != 0) { TestHufTreeN(pHT[root].rchild, pHT); } }
压缩相关操作的头文件Compress.h
//Compress.h #ifndef COMPRESS_H #define COMPRESS_H int Compress(const char *pFilename); char Str2byte(const char *pBinStr); int Encode(const char*pFilename, const HuffmanCode pHC, char *pBuffer, const int nSize); struct HEAD { char type[4];//文件类型 int length;//原文件长度 int weight[256];//权值数值 }; int WriteFile(const char*pFilename, const HEAD sHead, const char * pBuffer, const int nSize); int InitHead(const char *pFilename, HEAD &sHead); #endif
具体实现Compress.cpp
1 //Compress.cpp 2 3 #include"huffman.h" 4 #include"Compress.h" 5 #include6 #pragma warning( disable : 4996) 7 using namespace std; 8 //Compress 9 //InitHead 10 //Encode 11 //Str2byte 12 //WriteFile 13 char Str2byte(const char *pBinStr) 14 { 15 char b = 0x00; 16 for (int i = 0; i < 8; i++) 17 { 18 b = b << 1; 19 if (pBinStr[i] == '1') 20 { 21 b = b | 0x01; 22 } 23 } 24 return b; 25 } 26 27 int Compress(const char *pFilename) 28 { 29 int weight[256] = { 0 }; 30 //以二进制打开文件 31 FILE* in = fopen(pFilename, "rb"); 32 if (in == NULL) 33 { 34 cout << "Failed to open the file!" << endl; 35 exit(0); 36 } 37 cout << "成功打开文件 " << pFilename << endl; 38 int ch; 39 while ((ch = getc(in)) != EOF) 40 { 41 weight[ch]++; 42 } 43 fclose(in); 44 //cout << "Byte Weight" << endl; 45 //for (int i = 0; i < SIZE; i++) 46 //{ 47 // printf("0x%02X %d\n", i, weight[i]); 48 //} 49 50 HuffmanTree hfmt; 51 HfmTree(hfmt, weight, SIZE); 52 cout << "成功生成哈夫曼树" << endl; 53 // TestHufTree(hfmt); 54 // TestHufTreeN(511, hfmt); 55 HuffmanCode hfmc=(HuffmanCode)malloc((SIZE+1)*sizeof(char*)); 56 // for (int i = 1; i <= SIZE; i++) 57 // hfmt[i].weight = weight[i - 1] 58 //根据哈夫曼树进行编码 59 HuffmanCoding(hfmc, hfmt); 60 cout << "成功完成哈夫曼编码" << endl; 61 // cout << "先序遍历哈夫曼树输出编码信息:" << endl; 62 // TestHufCode(2 * SIZE - 1, hfmt, hfmc);//测试哈夫曼编码 63 // cout << "压缩后的文件编码:" << endl; 64 65 //计算编码缓冲区大小 66 int nSize = 0; 67 for (int i = 0; i < 256; i++) 68 { 69 nSize += weight[i] * strlen(hfmc[i+1]); 70 } 71 nSize = (nSize % 8) ? nSize / 8 + 1 : nSize / 8; 72 73 // cout <<"nSize = "< 74 75 //对原文件进行压缩编码 76 char* pBuffer = NULL; 77 pBuffer = (char *)malloc(nSize*sizeof(char)); 78 memset(pBuffer, 0, (nSize) * sizeof(char)); 79 // cout << "begin: " << strlen(pBuffer) << endl; 80 //// cout << "----"; 81 // int n; 82 // cout << "input n:"; 83 // cin >> n; 84 //将编码写入缓冲区 85 Encode(pFilename, hfmc, pBuffer, nSize); 86 // cout << "after: " << strlen(pBuffer) << endl; 87 // cout << "len of puf = " << strlen(pBuffer) << endl; 88 // cout << "!pBuffer = " << !pBuffer << endl; 89 if (!pBuffer) 90 { 91 cout << "!pBuffer = " << !pBuffer << endl; 92 return -1; 93 } 94 cout << "\n压缩完毕" << endl; 95 //for (int i = 1; i < strlen(pBuffer); i++) 96 //{ 97 // printf("%d", pBuffer[i]); 98 //} 99 100 HEAD sHead; 101 InitHead(pFilename, sHead); 102 cout <<"原文件"<< pFilename<<"大小为:" << sHead.length << "Byte" << endl; 103 int len_after = WriteFile(pFilename, sHead, pBuffer, nSize); 104 cout << "大小为:" << len_after << "Byte \n头文件sHead大小为:" << sizeof(sHead)<<"Byte"<<endl; 105 cout << "压缩比率:" << (double)len_after * 100 / sHead.length << "%" << endl; 106 free(hfmt); 107 free(hfmc); 108 free(pBuffer); 109 return OK; 110 } 111 112 113 int Encode(const char*pFilename, const HuffmanCode pHC, char *pBuffer, const int nSize) 114 { 115 //开辟缓冲区 116 // cout << "+++++"; 117 FILE* in = fopen(pFilename, "rb"); 118 if (in == NULL) 119 { 120 cout << "Failed to open the file!" << endl; 121 exit(0); 122 } 123 pBuffer = (char*)malloc(nSize * sizeof(char)); 124 if (!pBuffer) 125 { 126 cerr << "开辟缓冲区失败" << endl; 127 return -1; 128 } 129 cout << "loading"; 130 int sign = 0;//用于控制小数点输出 131 char cd[SIZE] = { 0 };//工作区 132 int pos = 0;//缓冲区指针 133 int ch; 134 //扫描文件,根据huffmman编码表对其进行压缩,压缩结果暂存到缓冲区中 135 while ((ch = getc(in)) != EOF) 136 { 137 if (sign % 1000 == 1) 138 printf("."); 139 sign++; 140 strcat(cd, pHC[ch+1]);//从HC复制编码串到cd 141 142 143 //打印压缩后的文件编码 144 // printf("%s", pHC[ch + 1]); 145 146 147 //压缩编码 148 while (strlen(cd) >= 8) 149 { 150 //截取字符串左边的8个字符,编码成字节 151 pBuffer[pos++] = Str2byte(cd); 152 //字符串整体左移8个字节 153 for (int i = 0; i < SIZE - 8; i++) 154 { 155 cd[i] = cd[i + 8]; 156 } 157 } 158 } 159 if (strlen(cd) > 0) 160 { 161 pBuffer[pos++] = Str2byte(cd); 162 } 163 fclose(in); 164 //for (int i = 1; i < nSize; i++) 165 //{ 166 // printf("%d ", pBuffer[i]); 167 //} 168 // cout << endl<<"before: " << strlen(pBuffer) << endl; 169 return OK; 170 } 171 172 int InitHead(const char *pFilename, HEAD &sHead) 173 { 174 //初始化文件头 175 strcpy(sHead.type, "HUF");//文件类型 176 sHead.length = 0;//原文件长度 177 for (int i = 0; i < SIZE; i++) 178 { 179 sHead.weight[i] = 0; 180 } 181 FILE *in = fopen(pFilename, "rb"); 182 int ch; 183 while ((ch = fgetc(in)) != EOF) 184 { 185 sHead.weight[ch]++; 186 sHead.length++; 187 } 188 fclose(in); 189 in = NULL; 190 return OK; 191 } 192 193 int WriteFile(const char*pFilename, const HEAD sHead, const char * pBuffer, const int nSize) 194 { 195 //生成文件名 196 char filename[256] = { 0 }; 197 strcpy(filename, pFilename); 198 strcat(filename, ".huf"); 199 //以二进制流形式打开文件 200 FILE *out = fopen(filename, "wb"); 201 //写文件头 202 fwrite(&sHead, sizeof(char), 1, out); 203 //写压缩后的编码 204 fwrite(pBuffer, sizeof(char), nSize, out); 205 //关闭文件,释放文件指针 206 fclose(out); 207 out = NULL; 208 cout << "生成压缩文件:" << filename << endl; 209 int len = sizeof(HEAD) + strlen(pFilename) + 1 + nSize; 210 return len; 211 }
主函数Main.cpp
//Main.cpp #include"huffman.h" #include"Compress.h" #include#include using namespace std; #pragma warning( disable : 4996) int main() { cout << "= = = = = = = =Huffman 文件压缩= = = = = = = =" << endl; cout << "请输入文件名:"; char filename[256]; cin>>filename; Compress(filename); // system("pause"); return 0; }