/**********************************************************************
* Description : create huffmanTree and huffmanCode by input string
* and decode a 0、1 sequence by huffmanCode
* Author : wandugu
* DATE : 2020-05-02
*********************************************************************/
#include
#include
#include
#include
#include
using namespace std;
const int INPUT_LEN_MAX = 32; // 允许的输入的最大的节点个数,
const int CODE_NUM_MAX = 2 * INPUT_LEN_MAX - 1; // 最大哈夫曼编码长度
using HuffmanTree = struct {
float weight = 0.0f;
char data = '0';
int lChild = -1;
int rChild = -1;
int parent = -1;
int index = 0;
};
HuffmanTree huffmanTree[CODE_NUM_MAX]; // huffmanTree:构造的Huffman树
using codeType = struct {
char bits[INPUT_LEN_MAX] = ""; // 编码数组位串,其中N_MAX为运行的最大叶子数目
int bitLen = 0; // 每个节点的Huffman编码长度
char data = 127; // 结点值, 默认为ASCII最大值
};
codeType huffmanCode[INPUT_LEN_MAX]; // huffmanCode: 存放Huffman编码的数组
int inputStrLen, codeNum; // 全局变量, 用于存放实际叶子的数目和实际Huffman树的结点数
bool cmpByTreeWeight(const HuffmanTree &tree1, const HuffmanTree &tree2);
bool cmpByTreeIndex(const HuffmanTree &tree1, const HuffmanTree &tree2);
bool cmpByCodeData(const codeType &code1, const codeType &code2);
bool checkInputStr(const string &toCheckStr);
void reverse(codeType &toRevertCode);
void CreatHuffmanTree(HuffmanTree tree[]) {
char inputStr[INPUT_LEN_MAX] = "";
/* Initialization the huffmanTree */
for (int i = 0; i < CODE_NUM_MAX; i++) {
tree[i].index = i;
}
/* Input name and weights */
cout << "Please input a string:";
cin >> inputStr;
cout << "Your input string is: " << inputStr << endl;
inputStrLen = strlen(inputStr);
codeNum = 2 * inputStrLen - 1;
for (int i = 0; i < inputStrLen; i++) {
tree[i].data = inputStr[i];
}
cout << "Please input " << inputStrLen << " float number as the corresponding weights:" << endl;
for (int i = 0; i < inputStrLen; i++) {
cin >> tree[i].weight;
}
cout << endl << "The corresponding weights are: ";
for (int i = 0; i < inputStrLen; i++) {
cout.setf(ios::fixed);
cout << setprecision(2) << tree[i].weight;
if (i != inputStrLen - 1) {
cout << ", ";
} else {
cout << endl;
}
}
// 构造Huffman树
sort(tree, tree + inputStrLen, cmpByTreeWeight); // 先按权值sort一把
for (int i = 0; i < codeNum - 1; i+=2) {
// 每次抓2个最小的
tree[i].parent = tree[tree[inputStrLen + i / 2].index].index;
tree[i + 1].parent = tree[tree[inputStrLen + i / 2].index].index;
tree[tree[i].parent].lChild = tree[i].index;
tree[tree[i + 1].parent].rChild = tree[i + 1].index;
// 再抓1个空节点, 权重赋上去
tree[tree[inputStrLen + i / 2].index].weight = tree[i].weight + tree[i + 1].weight;
// 之后重新排序, 确保每次都是前2个最小
sort(tree, tree + inputStrLen + i / 2 + 1, cmpByTreeWeight); // +1是因为多抓了1个空节点
}
sort(tree, tree + codeNum, cmpByTreeIndex); // 最后按索引排一把, 方便展示
}
void ShowHuffmanTree(HuffmanTree tree[]) {
cout << endl << "================== The Huffman huffmanTree is as follows ==================" << endl;
cout << "Index\tlChild\tdata\tweight\trChild\tparent" << endl;
for (int i = 0; i < codeNum; i++) {
cout << internal << setw(3) << tree[i].index << "\t"
<< right << setw(4) << tree[i].lChild << "\t"
<< internal << setw(2) << tree[i].data << "\t"
<< setprecision(2) << tree[i].weight << "\t"
<< right << setw(4) << tree[i].rChild << "\t"
<< right << setw(3) << tree[i].parent << endl;
}
cout << endl;
}
// Huffman树编码
void HuffmanCode(codeType code[], HuffmanTree tree[]) {
for (int i = codeNum - 2; i >= 0; i--) {
// 先把data搞到手
if (tree[i].data != '0') {
code[i].data = tree[i].data;
}
// 从上到下进行Huffman编码, 所以此时的Huffman码是逆序的
for (int j = i, k = 0; tree[j].parent != -1; k++) {
if (j == tree[tree[j].parent].lChild) {
code[i].bits[k] = '0';
} else if (j == tree[tree[j].parent].rChild) {
code[i].bits[k] = '1';
}
j = tree[j].parent;
code[i].bitLen++;
}
}
// 逆序得到的Huffman编码
for (int i = 0; i < codeNum; i++) {
reverse(code[i]);
}
// 把带data的放在前面, 方便打印
sort(code, code + codeNum, cmpByCodeData);
}
void ShowHuffmanCode(const codeType code[]) {
cout << endl << "================== The Huffman codes are as follows ==================" << endl;
for (int i = 0; i < inputStrLen; i++) {
cout << internal << "Index: " << i << "\tdata: " << code[i].data << "\t\tbitLen: "
<< code[i].bitLen << "\tbits: ";
for (int j = 0; j < code[i].bitLen; j++) {
cout << code[i].bits[j];
}
cout << endl;
}
cout << endl;
}
// Huffman树译码
// 输入一串二进制代码, 根据上述编码译出相应字符, 直到译完整个代码, 如出错, 输出错误信息, 并返回
void HuffmanDeCode(codeType code[]) {
cout << "================== Decoding Start ==================" << endl;
string toDecodeStr; // 待解码的0、1序列
vector decodeStr; // 解码后的结果
do {
cout << "Please input the huffmanCode for Decoding !" << endl;
cout << "It should be noted that the huffmanCode should only be 0 or 1." << endl;
cout << "Other values are invalid!" << endl;
cin >> toDecodeStr;
} while (!checkInputStr(toDecodeStr)); // 如果输入不合法, 就重新输入
// 下面开始解码
int maxBitLen = code[0].bitLen; // 拿到当前最大的Huffman编码长度
bool isStrExistCode = false;
for (int i = 0; i < toDecodeStr.length(); i++) {
for (int j = 1; j <= maxBitLen; j++) {
char* tmpCode = const_cast(toDecodeStr.substr(i, j).c_str()); // 拿到当前需要解码的子串
for (int k = 0; k < inputStrLen; k++) {
if (strcmp(tmpCode, code[k].bits) == 0) {
decodeStr.push_back(code[k].data); // 如果找到了, 就扔到结果串中
isStrExistCode = true;
break;
}
}
if (isStrExistCode) { // 如果找到了, 就把i往前偏移j-1位, 再+1
i += j - 1;
isStrExistCode = false;
break;
}
if (j == maxBitLen) { // 如果循环完输入字符串了还没找到这个子串, 则直接退出
cout << "输入的序列无法被译码" << endl;
return;
} else { // 否则, 重置找到标志位, 进行下一轮循环
isStrExistCode = false;
continue;
}
}
}
// 解码完毕, 输出结果
cout << "decode result is: ";
for (char &outIter : decodeStr) { // 这个写法太方便了, 学习一下
cout << outIter;
}
cout << endl;
}
// 逆序code中的Huffman编码
void reverse(codeType &toRevertCode) {
for (int i = 0; i < toRevertCode.bitLen / 2; i++) {
swap(toRevertCode.bits[i], toRevertCode.bits[toRevertCode.bitLen - i - 1]);
}
}
// 权值排序函数, 如果权值一样, 按内容排序, 否则, 按权值排序
bool cmpByTreeWeight(const HuffmanTree &tree1, const HuffmanTree &tree2) {
if (tree1.weight == tree2.weight) {
return tree1.data > tree2.data;
} else {
return tree1.weight < tree2.weight;
}
}
// 索引排序函数, 按照从小到大索引顺序的排序
bool cmpByTreeIndex(const HuffmanTree &tree1, const HuffmanTree &tree2) {
return tree1.index < tree2.index;
}
// Huffman编码排序函数, 优先返回有data的节点, 如果都有data, 则按bitLen排序
bool cmpByCodeData(const codeType &code1, const codeType &code2) {
if (code1.data == 127) {
return false;
}
if (code2.data == 127) {
return true;
}
return code1.bitLen > code2.bitLen;
}
// 检查输入的待解码的哈夫曼编码是否全为0或1
bool checkInputStr(const string &toCheckStr) {
for (char i : toCheckStr) {
if (i != '0' && i != '1') {
return false;
}
}
return true;
}
int main() {
CreatHuffmanTree(huffmanTree);
ShowHuffmanTree(huffmanTree);
HuffmanCode(huffmanCode, huffmanTree);
ShowHuffmanCode(huffmanCode);
HuffmanDeCode(huffmanCode);
return 0;
}