【C++】根据输入的字符串生成哈夫曼树, 并进行哈夫曼编码和解码

/**********************************************************************
 * Description    : create huffmanTree and huffmanCode by input string 
 *                  and decode a 0、1 sequence by huffmanCode
 * Author         : wandugu
 * DATE           : 2020-05-02
 *********************************************************************/
#include 
#include 
#include 
#include 
#include 

using namespace std;

const int INPUT_LEN_MAX = 32;                    // 允许的输入的最大的节点个数,
const int CODE_NUM_MAX = 2 * INPUT_LEN_MAX - 1;  // 最大哈夫曼编码长度

using HuffmanTree = struct {
    float weight = 0.0f;
    char data = '0';
    int lChild = -1;
    int rChild = -1;
    int parent = -1;
    int index = 0;
};
HuffmanTree huffmanTree[CODE_NUM_MAX];      // huffmanTree:构造的Huffman树

using codeType = struct {
    char bits[INPUT_LEN_MAX] = "";          // 编码数组位串,其中N_MAX为运行的最大叶子数目
    int bitLen = 0;                         // 每个节点的Huffman编码长度
    char data = 127;                        // 结点值, 默认为ASCII最大值
};
codeType huffmanCode[INPUT_LEN_MAX];        // huffmanCode: 存放Huffman编码的数组

int inputStrLen, codeNum;                   // 全局变量, 用于存放实际叶子的数目和实际Huffman树的结点数
bool cmpByTreeWeight(const HuffmanTree &tree1, const HuffmanTree &tree2);
bool cmpByTreeIndex(const HuffmanTree &tree1, const HuffmanTree &tree2);
bool cmpByCodeData(const codeType &code1, const codeType &code2);
bool checkInputStr(const string &toCheckStr);
void reverse(codeType &toRevertCode);

void CreatHuffmanTree(HuffmanTree tree[]) {
    char inputStr[INPUT_LEN_MAX] = "";
    /* Initialization the huffmanTree */
    for (int i = 0; i < CODE_NUM_MAX; i++) {
        tree[i].index = i;
    }
    /* Input name and weights */
    cout << "Please input a string:";
    cin >> inputStr;
    cout << "Your input string is: " << inputStr << endl;

    inputStrLen = strlen(inputStr);
    codeNum = 2 * inputStrLen - 1;
    for (int i = 0; i < inputStrLen; i++) {
        tree[i].data = inputStr[i];
    }

    cout << "Please input " << inputStrLen << " float number as the corresponding weights:" << endl;
    for (int i = 0; i < inputStrLen; i++) {
        cin >> tree[i].weight;
    }

    cout << endl << "The corresponding weights are: ";
    for (int i = 0; i < inputStrLen; i++) {
        cout.setf(ios::fixed);
        cout << setprecision(2) << tree[i].weight;
        if (i != inputStrLen - 1) {
            cout << ", ";
        } else {
            cout << endl;
        }
    }
    // 构造Huffman树
    sort(tree, tree + inputStrLen, cmpByTreeWeight); // 先按权值sort一把
    for (int i = 0; i < codeNum - 1; i+=2) {
        // 每次抓2个最小的
        tree[i].parent = tree[tree[inputStrLen + i / 2].index].index;
        tree[i + 1].parent = tree[tree[inputStrLen + i / 2].index].index;
        tree[tree[i].parent].lChild = tree[i].index;
        tree[tree[i + 1].parent].rChild = tree[i + 1].index;
        // 再抓1个空节点, 权重赋上去
        tree[tree[inputStrLen + i / 2].index].weight = tree[i].weight + tree[i + 1].weight;
        // 之后重新排序, 确保每次都是前2个最小
        sort(tree, tree + inputStrLen + i / 2 + 1, cmpByTreeWeight); // +1是因为多抓了1个空节点
    }
    sort(tree, tree + codeNum, cmpByTreeIndex); // 最后按索引排一把, 方便展示
}

void ShowHuffmanTree(HuffmanTree tree[]) {
    cout << endl << "================== The Huffman huffmanTree is as follows ==================" << endl;
    cout << "Index\tlChild\tdata\tweight\trChild\tparent" << endl;
    for (int i = 0; i < codeNum; i++) {
        cout << internal << setw(3) << tree[i].index << "\t"
             << right << setw(4) << tree[i].lChild << "\t"
             << internal << setw(2) << tree[i].data << "\t"
             << setprecision(2) << tree[i].weight << "\t"
             << right << setw(4) << tree[i].rChild << "\t"
             << right << setw(3) << tree[i].parent << endl;
    }
    cout << endl;
}

// Huffman树编码
void HuffmanCode(codeType code[], HuffmanTree tree[]) {
    for (int i = codeNum - 2; i >= 0; i--) {
        // 先把data搞到手
        if (tree[i].data != '0') {
            code[i].data = tree[i].data;
        }
        // 从上到下进行Huffman编码, 所以此时的Huffman码是逆序的
        for (int j = i, k = 0; tree[j].parent != -1; k++) {
            if (j == tree[tree[j].parent].lChild) {
                code[i].bits[k] = '0';
            } else if (j == tree[tree[j].parent].rChild) {
                code[i].bits[k] = '1';
            }
            j = tree[j].parent;
            code[i].bitLen++;
        }
    }
    // 逆序得到的Huffman编码
    for (int i = 0; i < codeNum; i++) {
        reverse(code[i]);
    }
    // 把带data的放在前面, 方便打印
    sort(code, code + codeNum, cmpByCodeData);
}

void ShowHuffmanCode(const codeType code[]) {
    cout << endl << "================== The Huffman codes are as follows ==================" << endl;
    for (int i = 0; i < inputStrLen; i++) {
        cout << internal << "Index: " << i << "\tdata: " << code[i].data << "\t\tbitLen: "
             << code[i].bitLen << "\tbits: ";
        for (int j = 0; j < code[i].bitLen; j++) {
            cout << code[i].bits[j];
        }
        cout << endl;
    }
    cout << endl;
}

// Huffman树译码
// 输入一串二进制代码, 根据上述编码译出相应字符, 直到译完整个代码, 如出错, 输出错误信息, 并返回
void HuffmanDeCode(codeType code[]) {
    cout << "================== Decoding Start ==================" << endl;
    string toDecodeStr;         // 待解码的0、1序列
    vector decodeStr;     // 解码后的结果
    do {
        cout << "Please input the huffmanCode for Decoding !" << endl;
        cout << "It should be noted that the huffmanCode should only be 0 or 1." << endl;
        cout << "Other values are invalid!" << endl;
        cin >> toDecodeStr;
    } while (!checkInputStr(toDecodeStr));  // 如果输入不合法, 就重新输入
    // 下面开始解码
    int maxBitLen = code[0].bitLen;         // 拿到当前最大的Huffman编码长度
    bool isStrExistCode = false;
    for (int i = 0; i < toDecodeStr.length(); i++) {
        for (int j = 1; j <= maxBitLen; j++) {
            char* tmpCode = const_cast(toDecodeStr.substr(i, j).c_str()); // 拿到当前需要解码的子串
            for (int k = 0; k < inputStrLen; k++) {
                if (strcmp(tmpCode, code[k].bits) == 0) {
                    decodeStr.push_back(code[k].data); // 如果找到了, 就扔到结果串中
                    isStrExistCode = true;
                    break;
                }
            }
            if (isStrExistCode) {   // 如果找到了, 就把i往前偏移j-1位, 再+1
                i += j - 1;
                isStrExistCode = false;
                break;
            }
            if (j == maxBitLen) {   // 如果循环完输入字符串了还没找到这个子串, 则直接退出
                cout << "输入的序列无法被译码" << endl;
                return;
            } else {                // 否则, 重置找到标志位, 进行下一轮循环
                isStrExistCode = false;
                continue;
            }
        }
    }
    // 解码完毕, 输出结果
    cout << "decode result is: ";
    for (char &outIter : decodeStr) { // 这个写法太方便了, 学习一下
        cout << outIter;
    }
    cout << endl;
}

// 逆序code中的Huffman编码
void reverse(codeType &toRevertCode) {
    for (int i = 0; i < toRevertCode.bitLen / 2; i++) {
        swap(toRevertCode.bits[i], toRevertCode.bits[toRevertCode.bitLen - i - 1]);
    }
}

// 权值排序函数, 如果权值一样, 按内容排序, 否则, 按权值排序
bool cmpByTreeWeight(const HuffmanTree &tree1, const HuffmanTree &tree2) {
    if (tree1.weight == tree2.weight) {
        return tree1.data > tree2.data;
    } else {
        return tree1.weight < tree2.weight;
    }
}

// 索引排序函数, 按照从小到大索引顺序的排序
bool cmpByTreeIndex(const HuffmanTree &tree1, const HuffmanTree &tree2) {
    return tree1.index < tree2.index;
}

// Huffman编码排序函数, 优先返回有data的节点, 如果都有data, 则按bitLen排序
bool cmpByCodeData(const codeType &code1, const codeType &code2) {
    if (code1.data == 127) {
        return false;
    }
    if (code2.data == 127) {
        return true;
    }
    return code1.bitLen > code2.bitLen;
}

// 检查输入的待解码的哈夫曼编码是否全为0或1
bool checkInputStr(const string &toCheckStr) {
    for (char i : toCheckStr) {
        if (i != '0' && i != '1') {
            return false;
        }
    }
    return true;
}

int main() {
    CreatHuffmanTree(huffmanTree);
    ShowHuffmanTree(huffmanTree);
    HuffmanCode(huffmanCode, huffmanTree);
    ShowHuffmanCode(huffmanCode);
    HuffmanDeCode(huffmanCode);
    return 0;
}

 

你可能感兴趣的:(C++)