前言:个人实现的一个Huffman编码系统,因为被CSDN(bad table manners)恶心到了,所以无偿地分享给大家,该系统支持键盘、csv文件(即Excel文件)以及txt文本文件的输入。另外两个文件会放在后文中,请自己创建相应的文件并放在同一路径下。接下来就开始对我的代码进行讲解,个人建议有一定程序设计基础的读者先自行学习Huffman编码相关的知识再来看,这里就不做赘述了。
采用静态三叉链表存储Huffman二叉树,这样更有助于建立结点之间的信息。其结点的结构体如下,因为该结构体同时存储了结点的双亲和左右孩子结点的下标信息,而且采用的是值存储而非指针指向的方式,所以名为静态三叉链表。data域本来不想初始化的,但是Visual Studio 2019一直警告我初始化,所以还是随便初始化了一下…
struct HuffmanNode // Huffman结点结构
{
char data = '\0'; // 存储的字符
int weigth = 0; // 权值
int parent = -1; // 双亲结点的下标
int lchild = -1; // 左孩子的下标
int rchild = -1; // 右孩子的下标
};
然后就是Huffman树创建完成后,每个字符都应该有其自己的编码,所以要换结构体存储,其结构体如下。
struct HuffmanCodeTable // Huffman编码表结构
{
char data; // 存储的字符
char code[MAXLEN]; // 该字符的Huffman编码
};
int InputCharCnt = 0; // 统计字符集中的字符个数
int HuffmanTreeNodeCnt; // 统计Huffman树的结点数
HuffmanNode HN[MAXLEN]; // Huffman结点数组
HuffmanCodeTable HCT[MAXLEN]; // Huffman编码表
由于在创建Huffman二叉树后,所有原字符集中的字符都会变为根结点,所以要用一个变量记录原字符集中的字符个数。同时,创建完的Huffman二叉树的结点个数也需要用一个新变量来存储。然后创建两个结构体数组分别存放所有字符的信息和编码即可。
整个系统的运行过程:
这是一整行的文本内容,分行的话,在代码中要做另外的处理。
A common thread is emerging from the impeachment bombshells, court fights and multiple scandals all coming to head this week inside the one-year mark to the next general election. It’s a picture of a President and his men who subscribe to a staggeringly broad interpretation of executive power and have no reservations about using it often for domestic political ends.The trend, which threatens to recast the conception of the presidency shared by America’s founders, shone through the first witness testimony released from the impeachment inquiry Monday.One former ambassador, Marie Yovanovitch, who apparently had been in the way of Trump’s plans to get dirt from Ukraine on former Vice President Joe Biden, was shocked when the President told his counterpart in Kiev on a phone call that the official US diplomatic representative to his country was “bad news.”
上面没太看懂的地方不要紧,注释写的很详细,仔细看看代码理解一下就好了。另外有心的读者可以关注一下csv读入后的处理,我一直觉得我的处理办法并不是一个很好的方法,因为我也是百度了一下才知道可以读取csv的。所以有经验的同学,欢迎在评论区给出详细的更好用代码,大家互相学习才能进步。
#include
#include
#include
#include
#include
using namespace std;
#define MAXLEN 1024
struct HuffmanNode // Huffman结点结构
{
char data = '\0'; // 存储的字符
int weigth = 0; // 权值
int parent = -1; // 双亲结点的下标
int lchild = -1; // 左孩子的下标
int rchild = -1; // 右孩子的下标
};
struct HuffmanCodeTable // Huffman编码表结构
{
char data; // 存储的字符
char code[MAXLEN]; // 该字符的Huffman编码
};
int InputCharCnt = 0; // 统计字符集中的字符个数
int HuffmanTreeNodeCnt; // 统计Huffman树的结点数
HuffmanNode HN[MAXLEN]; // Huffman结点数组
HuffmanCodeTable HCT[MAXLEN]; // Huffman编码表
void InputByKeyoard()
{
char data; // 接受输入的字符
int weight; // 接受输入字符的权值
cout << "请在一行中按照“字符”,“权值”的格式输入数据,并以回车作为输入结束:" << endl;
cin.get(); // 过滤回车
while (cin.peek() != '\n')
{
cin >> data >> weight;
HN[InputCharCnt].data = data;
HN[InputCharCnt].weigth = weight;
++InputCharCnt;
} // i 1 l 2 o 3 v 4 e 5 u 6
}
void LoadByCSV()
{
char line[MAXLEN];
ifstream file;
file.open("weight.csv", ios::in);
while (file.getline(line, MAXLEN))
{
HN[InputCharCnt].data = line[0];
for (int i = 2; line[i] != '\0'; ++i)
HN[InputCharCnt].weigth = HN[InputCharCnt].weigth * 10 + line[i] - '0';
++InputCharCnt;
}
file.close();
}
void LoadByTXT()
{
char line[MAXLEN];
int isRecord = 0;
ifstream file;
file.open("huffman.txt", ios::in);
file.getline(line, MAXLEN);
//cout << line << strlen(line);
for (int i = 0; line[i] != '\0'; ++i) // 遍历读取到的字符串
{
isRecord = 0;
for (int j = 0; j < InputCharCnt; ++j) // 遍历Huffman结点数组
{
if (HN[j].data == line[i]) // 已在结点数组中记录该字符
{
++HN[j].weigth; // 权值+1
isRecord = 1; // 置1表明记录过
break;
}
}
if (!isRecord)// 未在结点数组中记录该字符
{
HN[InputCharCnt].data = line[i]; // 记录该字符
++HN[InputCharCnt].weigth; // 权值+1
++InputCharCnt; // 字符个数+1
}
}
file.close();
}
void SwitchLoad(int choice)
{
switch (choice)
{
case 1:
InputByKeyoard();
break;
case 2:
LoadByCSV();
break;
case 3:
LoadByTXT();
break;
default:
break;
}
}
int SearchMinWeight(int newNode)
{
int min = MAXLEN;
int minNum = MAXLEN - 1;
for (int i = 0; i < HuffmanTreeNodeCnt; ++i)
{
if (HN[i].weigth < min && HN[i].parent == -1) // 权值更小且没有双亲结点
{
min = HN[i].weigth;
minNum = i;
}
}
HN[minNum].parent = newNode; // 标记该结点双亲结点
return minNum;
}
void CreatHuffmanTree()
{
int lchild, rchild;
HuffmanTreeNodeCnt = InputCharCnt; // InputCharCnt是输入字符个数不能改变,变的是树结点个数
for (int i = InputCharCnt; i < 2 * InputCharCnt - 1; ++i) // 循环建立n - 1个新的Huffman结点
{
lchild = SearchMinWeight(i); // 寻找权重最小的两个结点
rchild = SearchMinWeight(i);
if (lchild > rchild) // 下标小的做左孩子
{
int temp = rchild;
rchild = lchild;
lchild = temp;
}
HN[i].lchild = lchild; // 标记左孩子
HN[i].rchild = rchild; // 标记右孩子
HN[i].weigth = HN[lchild].weigth + HN[rchild].weigth; // 双亲结点的权重为孩子权重之和
HuffmanTreeNodeCnt++;
}
}
void ReverseCode(char code[], int length)
{
char temp;
for (int i = 0; i < length / 2; ++i)
{
temp = code[i];
code[i] = code[length - i - 1];
code[length - i - 1] = temp;
}
}
void CreatHuffmanCodeTable()
{
for (int i = 0; i < InputCharCnt; ++i) // 遍历字符集
{
int len = 0;
int parent, child = i;
HCT[i].data = HN[i].data;
while (HN[child].parent != -1) // 不断地跟踪双亲结点直到根结点为止,只有根结点的双亲为-1
{
parent = HN[child].parent;
if (HN[parent].lchild == child) // 该结点是左孩子时编码为0
HCT[i].code[len++] = '0';
else if (HN[parent].rchild == child) // 该结点是右孩子编码为1
HCT[i].code[len++] = '1';
child = parent;
}
ReverseCode(HCT[i].code, len);
}
}
void PrintHuffmanCodeTable()
{
cout << setiosflags(ios::left)
<< setw(5) << "Num"
<< setw(10) << "Data"
<< setw(10) << "Weigth"
<< setw(10) << "Parent"
<< setw(10) << "Lchild"
<< setw(10) << "Rchild"
<< setw(15) << "Code"
<< "\n" << endl;
for (int i = 0; i < InputCharCnt; i++)
cout << setiosflags(ios::left)
<< setw(5) << i
<< setw(10) << HN[i].data
<< setw(10) << HN[i].weigth
<< setw(10) << HN[i].parent
<< setw(10) << HN[i].lchild
<< setw(10) << HN[i].rchild
<< setw(15) << HCT[i].code
<< endl;
cout << endl;
cout << setiosflags(ios::left)
<< setw(5) << "Num"
<< setw(10) << "Data"
<< setw(10) << "Weigth"
<< setw(10) << "Parent"
<< setw(10) << "Lchild"
<< setw(10) << "Rchild"
<< setw(15) << "Code"
<< "\n" << endl;
for (int i = InputCharCnt; i < HuffmanTreeNodeCnt; i++)
cout << setiosflags(ios::left)
<< setw(5) << i
<< setw(10) << "NULL"
<< setw(10) << HN[i].weigth
<< setw(10) << HN[i].parent
<< setw(10) << HN[i].lchild
<< setw(10) << HN[i].rchild
<< setw(15) << "NULL"
<< endl;
}
void HuffmanDecode()
{
cout << "请输入要解码的二进制码:";
char Binary[MAXLEN];
char* s = Binary;
cin >> Binary;
cout << "解码后的数据为:";
int track = HuffmanTreeNodeCnt - 1;
while (*s != '\0') // 用*s代替数组下标移动
{
if (*s == '0')
track = HN[track].lchild;
else
track = HN[track].rchild;
if (HN[track].lchild == -1) // 只用判断一个孩子,因为只要没左孩子必然就是叶结点也即字符结点
{
cout << HN[track].data;
track = HuffmanTreeNodeCnt - 1;
}
++s;
}
if (HN[track].lchild != -1) // 二进制码没有完全的读取到叶结点,输出二进制码缺省
cout << "\n部分解码成功!存在二进制码缺省!" << endl;
cout << endl;
}
void SwitchOperation(int choice)
{
switch (choice)
{
case 1:
PrintHuffmanCodeTable();
break;
case 2:
HuffmanDecode();
break;
case 3:
exit(0);
default:
break;
}
}
int main(void)
{
int choice;
cout << "欢迎使用本Huffman编码系统" << endl;
cout << "请选择您字符集的加载方式:" << endl;
cout << "1--键盘数据输入\n2--加载CSV文件\n3--加载txt文本文件" << endl;
cin >> choice;
SwitchLoad(choice);
CreatHuffmanTree(); // 创建Huffman树
CreatHuffmanCodeTable(); // 创建Huffman编码表
cout << "加载成功!请输入您接下来的操作:" << endl;
while (1)
{
cout << "1--打印Huffman编码表" << endl;
cout << "2--进行Huffman解码" << endl;
cout << "3--不操作了,退出程序" << endl;
cin >> choice;
SwitchOperation(choice);
}
}
希望本篇博客能对你起到一点的帮助作用,也希望能动动小手点个赞,这样我才能知道,我的付出没有白费啦~~。