这个周末对哈弗曼编码进行了编写,本来打算自己写,结果发现笨的一批,全程参考此博客,而且还耗费了两天时间。写在这里主要算是给自己一个记录吧,毕竟也花费时间整了。
参考链接: https://blog.csdn.net/qq_40738840/article/details/85406330?ops_request_misc=&request_id=&biz_id=102&utm_term=%E5%93%88%E5%BC%97%E6%9B%BC%E7%BC%96%E7%A0%81%E4%B8%8E%E8%A7%A3%E7%A0%81&utm_medium=distribute.pc_search_result.none-task-blog-2allsobaiduweb~default-7-85406330.142v10control,157v4control&spm=1018.2226.3001.4187
此博主的程序中在读取二进制文件时,会出错,我本人做了些小小的改动,已经可以正常运行了。
/*
功能:实现对文件的压缩
程序流程:
1、对文件进行IO加载;
2、对输入的字符进行统计,并计算每个字符的权重,保存在weight文件中;
3、进行构造哈夫曼树,且进行创建哈夫曼编码;
1、哈夫曼树的创建:
构造森林全是根,选用两小造新树;
删除两小添新人,重复2、3剩单根。
根据输入的权重,进行两小结合。
2、哈夫曼编码:
左分支标0,有分支标1;
把从根到每个叶子的标号连接起来,作为叶子代表的字符编码;
保存到二进制文件中;
3、解码:
加载二进制文件到哈弗曼结构中;
通过查找0、1寻找叶子节点;
*/
#include
#include
#include // getline()函数
#include // atoi()函数
#include
#include
using namespace std;
// 哈弗曼树的存储结构
typedef struct
{
int ascii; // 字符
int weight; // 权重
int parent, Ich, rch;// 双亲,左孩子,右孩子
}HTNode,*HuffmanTree;
// 对data文件进行加载,并计算每个字符的权重,保存到weight.txt中
void WeightCalculate(string inputPath, string outputPath,int &n) // n表示字符的个数
{
int cof[256]; //存储相应字符出现的次数,字符ASCII为下标。charater_occur_frequency
for (size_t i = 0; i < 256; i++) // 初始化字符统计表
{
cof[i] = 0;
}
// 加载文件
fstream ifs(inputPath, ios::in);
if (!ifs)
{
cout << "文件打开失败!" << endl;
exit(1);
}
int sum = 0; // 总行数
string s; // 获取一行数据
while (true)
{
sum++; // 用来记录换行符的个数
getline(ifs, s);
if (!ifs) break;
for (size_t i = 0; i < s.size(); i++)
{
int a = s[i];
cof[a]++; // 计数
}
}
ifs.close(); // 关闭文件
//int a = '\n'; // 换行符
//cof[a] += sum; // 换行符的个数
// 写入文件的总个数,并将每个字符依次输入到文件中
n = 0;
for (size_t i = 0; i < 256; i++)if (cof[i] != 0) ++n;
cout << "字符总个数:" << n << endl;
fstream ofs(outputPath, ios::out); // 文件输出
if (!ofs) cout << "文件打开失败!" << endl;
ofs << n <<endl; // 将字符总个数加载到文件中;不知道为什么多一行空字符,所以这一行不加endl;
for (size_t i = 0; i < 256; i++)
{
if (cof[i] != 0)
{
char ch = i;
ofs << ch << " "<< cof[i] << endl;
}
}
ofs.close();
}
// 构造哈弗曼树,依照规则,且左子树权重小于右子树权重
void CreateHuffman(HuffmanTree &HTree,int n,string weightFilePath,string huffmanTreePath,string huffmanCodeFilePath)
{
// 打开weight.txt文件
fstream ifs(weightFilePath, ios::in);
if (!ifs) cout << "weight文件加载失败" << endl;
HTree = new HTNode[2 * n]; // n表示结点数,构造2n-1个,其中0号不用
for (size_t i = 1; i < 2*n; i++) // 全部清零
{
HTree[i].ascii = HTree[i].weight = HTree[i].parent = HTree[i].Ich = HTree[i].rch = 0;
}
string s;
getline(ifs, s); // 获取第一行的总数
int count = 1;
while (getline(ifs,s)) // 从文件中读取ASCII码及相应权值
{
if (!ifs) break;
HTree[count].ascii = s[0];
string weight = s.substr(2, s.size()); // 截取字符串
HTree[count].weight = atoi(weight.c_str());
cout << "截取的字符串:" << weight << endl;
cout << "总行字符串" << s << endl;
cout << HTree[count].ascii << " " << HTree[count].weight << endl;
++count;
}
ifs.close();
for (size_t i = n+1; i < 2*n; i++)
{
// 寻找最小、此小值,记录下标
int min1 = INT_MAX,min2 = INT_MAX;
int index1 = 0,index2 = 0;
for (size_t j = 1; j < i; j++) // 找到最小值、次小值
{
if (HTree[j].parent == 0) // 双亲为0表示尚未操作
{
if (min1 > HTree[j].weight)
{
min2 = min1; // 先赋值给此小值
index2 = index1;
min1 = HTree[j].weight;
index1 = j;
}
else if (min2 > HTree[j].weight)
{
min2 = HTree[j].weight;
index2 = j;
}
}
}
// 更新数组表 --------->5处状态更新
HTree[i].weight = HTree[index1].weight + HTree[index2].weight; // 双亲权值更新
HTree[index1].parent = HTree[index2].parent = i; // 孩子的双亲结点更新
if (HTree[index1].weight < HTree[index2].weight) // 两个节点权值不同,左小右大;相同,下标小者在左
{
HTree[i].Ich = index1; // 下标赋值
HTree[i].rch = index2;
}
else if (HTree[index1].weight > HTree[index2].weight)
{
HTree[i].Ich = index2;
HTree[i].rch = index1;
}
else
{
if (index1 < index2)
{
HTree[i].Ich = index1; // 下标赋值
HTree[i].rch = index2;
}
else
{
HTree[i].Ich = index2;
HTree[i].rch = index1;
}
}
}
// 写入文件
fstream outFile(huffmanTreePath, ios::out);
if (!outFile) cout << "哈弗曼树文件打开失败!" << endl;
outFile << n << endl; // 结点个数
for (size_t i = 1; i < 2*n; i++)
{
outFile << " " << HTree[i].ascii << " " << HTree[i].weight << " " << HTree[i].parent << " " << HTree[i].Ich << " " << HTree[i].rch << endl;
}
outFile.close();
// 建立编码表,写入字符,权值,编码
outFile.open(huffmanCodeFilePath, ios::out);
if (!outFile) cout << "哈弗曼编码表.txt,打开失败!" << endl;
// 利用栈从叶子出发,读取每个字符的编码,再写入文件 ->这一步太绕了
stack<char> code;
for (size_t i = 1; i <= n; i++) // 对n个字符求编码
{
int j = i; // 从第i个开始
while (HTree[j].parent!=0) // 只要双亲结点!=0就继续
{
int p = HTree[j].parent; // 找双亲结点的索引
if (p!=0)
{
int l, r; // 定义左子树和右子树的索引值
l = HTree[p].Ich; // 根据双亲,找左子树的索引值
r = HTree[p].rch; // 根据双亲,找右子树的索引值
if (j == l) code.push('0'); // 如果根据双亲索引的值在左子树,则记为0
if (j == r) code.push('1'); // 如果根据双亲索引的值在右子树,则记为1
j = p; // 将双亲重新赋值给j,从叶子结点反向搜索
}
}
outFile << HTree[i].ascii << " " << HTree[i].weight << " "; // 写入字符,权值
while (!code.empty()) // 写入编码
{
outFile << code.top(); // 写入编码
code.pop();
}
outFile << endl;
}
outFile.close();
}
// 对文件进行压缩
void Code(string resoureFilePath,string huffmanCodePath, string binaryFilePath)
{
char code[20];
fstream ifs(huffmanCodePath, ios::in);
if (!ifs) cout << "哈弗曼文件打开错误!" << endl;
string s, codeList[256]; // 每个编码,和编码组
int ch, w; // 字符和权重
while (true)
{
ifs >> ch >> w >> s;
if (!ifs) break;
codeList[ch] = s; // 根据哈希表建立映射关系, 将字符ch表示为下标,将s表示为编码
}
ifs.close();
ifs.open(resoureFilePath, ios::in); // 打开所要压缩的文本文件
if (!ifs) cout << "打开源文件失败!" << endl;
ofstream ofs(binaryFilePath, ios::binary); // 输出二进制文件
if (!ofs) cout << "二进制文件打开失败!" << endl;
while (true)
{
getline(ifs, s); // 获取源文件的一行数据
if (!ifs) break;
int a; // 为字符转整型做准备,因为txt文件中,存储的就是int类型
string s2;
for (size_t i = 0; i < s.size(); i++)
{
a = s[i]; //每一个char转换为int
int j;
for (j = 0; j < codeList[a].size(); j++)
{
s2 = codeList[a]; // 根据上面的映射表,进行映射寻值,根据a作为下标,也就是值,作为下标,寻找编码值
code[j] = s2[j];
}
code[j] = '\0'; // 加上这一句为了表示终止符
ofs.write((char*)code,20*sizeof(char)); // 二进制文件必须用write和read
}
}
ifs.close();
ofs.close();
}
// 再次进行解码
void Decode(string binaryFilePath,string huffmanTreePath, string decodePath)
{
char code[20];
fstream ifs(huffmanTreePath, ios::in); // 打开哈弗曼树文件
if (!ifs) cout << "无法打开哈弗曼树文件!" << endl;
int n; // 节点数
ifs >> n; // n个节点
HuffmanTree HTree; //创建哈弗曼树
HTree = new HTNode[2*n]; // 申请内存
for (size_t i = 1; i < 2*n; i++)
{
ifs >> HTree[i].ascii >> HTree[i].weight >> HTree[i].parent >> HTree[i].Ich >> HTree[i].rch;
}
ifs.close();
// 处理编码信息
ifs.open(binaryFilePath, ios::in|ios::binary);
if (!ifs) cout << "二进制文件打开错误!" << endl;
fstream ofs(decodePath, ios::out); // 将文件保存到该路径下
if (!ofs) cout << "打开decode.txt失败!" << endl;
// 解码开始
char ch;
int root = 2 * n - 1;
while (true)
{
ifs.read((char*)code, 20 * sizeof(char));
if (!ifs) break;
for (size_t i = 0; code[i]!='\0'; i++)
{
ch = code[i];
if (ch == '0') root = HTree[root].Ich; // 碰到0,向左走,走到叶子节点,那么这个值就是字符
else if (ch == '1') root = HTree[root].rch;
if (HTree[root].Ich == 0) // 不管左子树还是右子树,他们的孩子都是0,所以随便找一个就可以了。
{
char cht = HTree[root].ascii;
ofs << cht;
root = 2 * n - 1;
}
}
}
ofs.close();
ifs.close();
}
int main()
{
string inputPath = "data.txt"; // 加载源文件
string outputPath = "weight.txt"; // 输出权重文件
string huffmanTree = "HuffmanTree.txt"; // 哈弗曼树
string huffmanCodePath = "huffmanCodePath.txt"; // 输出哈弗曼编码文件
string binaryFilePath = "binaryFilePath.dat"; // 二进制文件
string decodePath = "decode.txt"; // 解码路径
int n = 0;
WeightCalculate(inputPath, outputPath,n);
HuffmanTree HT;
CreateHuffman(HT, n,outputPath,huffmanTree,huffmanCodePath);
Code(inputPath, huffmanCodePath, binaryFilePath);
Decode(binaryFilePath, huffmanTree, decodePath);
return 0;
}
加载的文件内容,如下:
Life is picturesque, there are thick ink splashing, there are light pen light description; Life is like a song, there are light chanting and shallow singing, there are also Huang zhongdalu. You don’t see the life of Li Taibai and Du Gongbu. They have created the greatness of life; Pei Duofei, Shelley’s life, light pen light description, finally write the true meaning of life.