//头文件 #include <iostream> #include <fstream> #include <string> #include <bitset> using namespace std; #define MAX_SIZE 128 struct HuffumanNode //节点信息 { string weight; int Frequence; char Word; bool bRet; HuffumanNode *pLeftChild; HuffumanNode *pRightChild; HuffumanNode *next; HuffumanNode() { bRet = false; weight = ""; Frequence = -1; Word = NULL; pLeftChild = NULL; pRightChild = NULL; next = NULL; } }; struct WordList //字符+频率+编码 { char szWord; int nFrequence; string strWeight; WordList() { nFrequence = -1; szWord = NULL; strWeight = ""; } }; class HuffumanTree { public: HuffumanTree(); ~HuffumanTree(); int Initialization(); //初始化文件信息 int BuildATree(); //创建二叉树 void EnCode(); //编码 void Decode(); //译码 void MemoryToFile(); //写入压缩文件 private: struct HuffumanNode *m_pHNHead; //用于生成二叉树 struct HuffumanNode *m_pHNTail; struct HuffumanNode *m_pNode; //指向二叉树的指针 struct WordList m_WordList[MAX_SIZE]; //存放编码信息 int m_nSumLeaf; //总的字符数 int m_nDefSum; //不同的字符数 int m_n8Bit; //编码字符的总长度 string m_strSorFile; //源文件 string m_strDesFile; //目标文件 }; void InterFace(); //包含对象实现过程的总的接口 static int PreOrders(struct HuffumanNode *p, //递归遍历二叉树 struct WordList LstArray[], string strWord, int nDefSum); //========================================= //函数的定义 #include "huffuman.h" HuffumanTree::HuffumanTree() { m_n8Bit = 0; m_nSumLeaf = 0; m_nDefSum = 0; m_pNode = new struct HuffumanNode; m_pHNHead = new struct HuffumanNode; m_pHNTail = m_pHNHead; memset(m_WordList, 0, sizeof(WordList)*MAX_SIZE); } HuffumanTree::~HuffumanTree() { } int HuffumanTree::Initialization() { char word; int i, j, k, l; cout << "Inputing The File Of Source And Destination's Name" << endl; cin >> m_strSorFile >> m_strDesFile; ifstream InputSouFile(m_strSorFile.c_str()); while (InputSouFile.get(word)) { bool bRet = false; m_nSumLeaf++; //记录总的字符数 for ( i = 0; i < m_nDefSum; i++) { if (word == m_WordList[i].szWord) //记录字符频率 { m_WordList[i].nFrequence++; bRet = true; break; } } if (!bRet) //将不同的字符记录进表 { m_WordList[m_nDefSum].szWord = word; m_WordList[m_nDefSum].nFrequence = 1; m_nDefSum++; } } struct WordList temp; for (j = 0; j < m_nDefSum; j++) //频率高的的字符排后 { k = j + 1; for (l = k; l < m_nDefSum; l++) { if (m_WordList[j].nFrequence > m_WordList[l].nFrequence) { temp = m_WordList[j]; m_WordList[j] = m_WordList[l]; m_WordList[l] = temp; } } } if (0 == m_nSumLeaf) { return 0; } InputSouFile.close(); return 1; } //建立二叉树,生成最优二叉树 int HuffumanTree::BuildATree() { if (1 == m_nSumLeaf) //如果只有一个字符的情况 { return 0; } int i; struct HuffumanNode *pHNTemp = NULL; struct HuffumanNode *qHNTemp = NULL; struct HuffumanNode *pTemp = NULL; struct HuffumanNode *qTemp = NULL; m_pHNTail = m_pHNHead; //这里的重定位很重要,因为二叉树的建立在编码和译码中都用到 //编码过程中,指针的移动,可能会对马上的译码产生影响,所以对它重定位 for (i = 0; i < m_nDefSum; i++) //得到字符链表 { pHNTemp = new struct HuffumanNode; pHNTemp->Frequence = m_WordList[i].nFrequence; pHNTemp->Word = m_WordList[i].szWord; m_pHNTail->next = pHNTemp; m_pHNTail = pHNTemp; } for (m_pHNTail = m_pHNHead->next, i = 0; NULL != m_pHNTail; m_pHNTail = m_pHNTail->next, i++) //生成编码树 { pHNTemp = new struct HuffumanNode; qHNTemp = new struct HuffumanNode; if (0 == i) { pHNTemp->Frequence = m_pHNTail->Frequence; pHNTemp->Word = m_pHNTail->Word; m_pHNTail = m_pHNTail->next; qHNTemp->Frequence = m_pHNTail->Frequence; qHNTemp->Word = m_pHNTail->Word; m_pNode->pLeftChild = pHNTemp; m_pNode->pRightChild = qHNTemp; } else { pTemp = new struct HuffumanNode; qTemp = new struct HuffumanNode; pHNTemp->Frequence = m_pHNTail->Frequence; pHNTemp->Word = m_pHNTail->Word; m_pHNTail = m_pHNTail->next; if (NULL != m_pHNTail) { qHNTemp->Frequence = m_pHNTail->Frequence; qHNTemp->Word = m_pHNTail->Word; pTemp->pLeftChild = pHNTemp; pTemp->pRightChild = qHNTemp; qTemp->pLeftChild = pTemp; qTemp->pRightChild = m_pNode; } else { qTemp->pLeftChild = pHNTemp; qTemp->pRightChild = m_pNode; m_pNode = qTemp; break; } m_pNode = qTemp; } } return 0; } //遍历二叉树,完成编码,写入编码数组 static int PreOrders(struct HuffumanNode *p, struct WordList LstArray[], string strWord, int nDefSum) { if (NULL == p->pLeftChild && NULL == p->pRightChild) { for ( int i = 0; i < nDefSum; i++) { if (p->Word == LstArray[i].szWord && false == p->bRet) { LstArray[i].strWeight = strWord; p->bRet = true; break; } } return 0; } else { PreOrders(p->pLeftChild, LstArray, strWord+"0", nDefSum); PreOrders(p->pRightChild, LstArray, strWord+"1", nDefSum); } return 0; } void HuffumanTree::EnCode() { struct HuffumanNode *p = m_pNode; string str = ""; PreOrders(p, m_WordList, str, m_nDefSum); //得到字符对应的编码 } // 生成新文件,文件格式为: 字符个数 + 编码大小 + 编码序列 // 可以将编码组织成一个连续的bit 串 void HuffumanTree::MemoryToFile() { char szTemp; int i, j; ifstream InputSouFile(m_strSorFile.c_str()); ofstream OutDesFile(m_strDesFile.c_str()); cout << "The File Size : " << m_nSumLeaf << " bytes" << endl; cout << "The codes numbers : " << m_nDefSum << endl; OutDesFile.write((char*)&m_nSumLeaf, sizeof(int)); //写入原文件长度 OutDesFile.write((char*)&m_nDefSum, sizeof(int)); //写入编码表长度 for (i= 0; i < m_nDefSum; i++) //将编码字符和字符频率写入文件 { OutDesFile.write((char*)&m_WordList[i].szWord, sizeof(char)); OutDesFile.write((char*)&m_WordList[i].nFrequence, sizeof(m_WordList[i].nFrequence)); } string strSum = ""; //储存所有压缩的编码 while (InputSouFile.get(szTemp)) { for ( j = 0; j < m_nDefSum && szTemp != m_WordList[j].szWord; j++); //找到字符对应的编码 strSum += m_WordList[j].strWeight; } m_n8Bit = strlen(strSum.c_str()); //得到编码的总长度 cout << "The length of String :" << m_n8Bit << endl; OutDesFile.write((char*)&m_n8Bit, sizeof(int)); cout << "After Coding ,the length of string: " << m_n8Bit/8 << " bytes" << endl; static int m = 0; for (i = 0; i < m_n8Bit/8; i++) //将字符编码写入文件 { char buffer[8] = {0}; for (j = 0; j < 8; j++) { buffer[j] = strSum[m++]; } bitset<8> bit(buffer); unsigned int n = bit.to_ulong(); OutDesFile.write((char*)&n, sizeof(char)); } for(i = m; i <= (m_n8Bit %8 + m); i++) //得到未满8bit的字符,直接写入文件 { OutDesFile.write((char*)&strSum[i], sizeof(char)); } InputSouFile.close(); OutDesFile.close(); } //译码部分 void HuffumanTree::Decode() { char szTemp; int i, j, k; cout << "Inputing The File Of Source And Destination's Name" << endl; cin >> m_strSorFile >> m_strDesFile; ifstream InputSorFile(m_strSorFile.c_str()); ofstream OutputDesFile(m_strDesFile.c_str()); if (InputSorFile.fail() || OutputDesFile.fail()) { if (InputSorFile) { InputSorFile.close(); } if (OutputDesFile) { OutputDesFile.close(); } cout << "open " << m_strSorFile << (InputSorFile.fail()?" false":" OK") << endl; cout << "open " << m_strDesFile << (OutputDesFile.fail()?" false":" OK") << endl; return; } InputSorFile.read((char*)&m_nSumLeaf, sizeof(int)); InputSorFile.read((char*)&m_nDefSum, sizeof(int)); for ( i = 0; i < m_nDefSum; i++) //读出字符和它对应的编码 { InputSorFile.read((char*)&m_WordList[i].szWord, sizeof(char)); InputSorFile.read((char*)&m_WordList[i].nFrequence, sizeof(m_WordList[i].nFrequence)); } BuildATree(); //重建二叉树 EnCode(); //得到编码 InputSorFile.read((char*)&m_n8Bit, sizeof(int)); //读出字符的长度 cout << "The length of string :" << m_n8Bit << endl; string strResult = ""; for (i = 0; i < m_n8Bit/8; i++) //将字符内容解析成编码 { InputSorFile.read((char*)&szTemp, sizeof(char)); int n = szTemp; bitset<8> bit(n); string strWeight = bit.to_string(); strResult += strWeight; } for (i = 0; i < m_n8Bit%8; i++) //解析出不够 8 bit 的字符的编码 { InputSorFile.read((char*)&szTemp, sizeof(char)); strResult += szTemp; } k = 0; for (i = 0; i < m_nSumLeaf; i++) //对每个字符匹配 { int m = 0, n = 0, nLocal = k; for (j = 0; j < m_nDefSum; j++) { //得到当前的编码的长度,便于判断字符是否匹配 n = strlen(m_WordList[j].strWeight.c_str()); for (m = 0; m < n; m++) { if (m_WordList[j].strWeight[m] == strResult[k]) { k++; } else { m = 0; break; } } if (n == m) //找到相同的,跳出 { OutputDesFile.write((char*)&m_WordList[j].szWord, sizeof(m_WordList[j].szWord)); break; } else { k = nLocal; } } } InputSorFile.close(); OutputDesFile.close(); } void InterFace() { int nRet; int nChoose = 1; char szChoose; while (nChoose) { cout << "/t/t**************************" << endl; cout << "/t/t* Compressed : C*" << endl; cout << "/t/t* Decompressed : D*" << endl; cout << "/t/t* Exit: E*" << endl; cout << "/t/t**************************" << endl; HuffumanTree Ht; cin >> szChoose; switch (szChoose) { case 'C'|'c': { nRet = Ht.Initialization(); if(0 == nRet) { cout << "The File Is NULL" << endl; break; } Ht.BuildATree(); Ht.EnCode(); Ht.MemoryToFile(); break; } case 'D'|'d': { Ht.Decode(); break; } case 'E'|'e': { nChoose = 0; break; } default: { cout << "Inputing is Error" << endl; nChoose = 0; break; } }//end of switch }//end of while } //====================================== //主函数 #include "huffuman.h" int main() { InterFace(); return 0; }