fbd2XML

#include
using namespace std;
int main() {
    //原始字符串|〖 为+ | 〗为-| 【】都删除
    string uts="+HT5SS-+JP3-+CX2-+CT-阿鼻地狱+CX-+HT-+WTXT-+JP-+KG2-+CT(-ā+CT)- bí dì yù+KG2-+HT8.25,8.5SS-+JP5-+ZK(*2#-+HT8.5,8.75K-+CS%100,0,0,0-阿鼻:梵语译音,意为“无间”,即痛苦没有间断的意思。“阿鼻地狱”为佛教传说中八大地狱里最下层、最痛苦的地狱。+CS-+HT8.25,8.5SS-《敦煌变文集·目连缘起》:“七日之间,母身将死,堕阿鼻地狱,受无间之余殃。”元·无名氏《来生债》四折:“若不是点化真言,险堕了阿鼻地狱。”柯灵《从〈秋瑾传〉说到〈赛金花〉》:“八国联军铁蹄下的故都,烧杀淫掠,如阿鼻地狱,尽人皆知。”+HT8.5,8.75K-+CS%100,0,0,0-也比喻无法摆脱、难以忍受的痛苦境地。+CS-+HT8.25,8.5SS-冯雪峰《上饶集中营·炼狱杂记》:“但也有少数意志薄弱的……逐步上当,终至堕入阿鼻地狱。”+CS-+HT8.25,8.5SS-";
    //string uts = "+HT5SS-+JP3-+CX2-+CT-哀兵必胜+CX-+HT-+WTXT-+JP-+KG*2-+CT(-āi+CT)- bīnɡ bì shènɡ+KG*2-+HT8.25,8.5SS-+JP5- +HT8.5,8.75K-+CS%100,0,0,0-哀兵:由于受压或遭受危难而处在悲愤中的军队。+HT8.5,8.75K-+CS%100,0,0,0-指受压迫而悲愤地奋起反抗的军队一定能胜利。常用以鼓励处于劣势的一方要建立必胜的信心和勇气。+CS-+HT8.25,8.5SS-语本《老子·六十九章》:“故抗兵相加,哀者胜矣。”+CS-+HT8.25,8.5SS-宗璞《南渡记》三章:“我们让人欺负够了,全国百姓谁不愿打!岂不闻哀兵必胜啊!”+CS-+HT8.25,8.5SS-";
    //变量
    string fins;
    string word;
    string chapter = "3-1";
    string header = " + chapter + "\" role=\"汉语辞书条目库\"> <span class="token operator">+</span> chapter <span class="token operator">+</span> <span class="token string">".title\">"</span><span class="token punctuation">;</span>
    string pinyinheader<span class="token punctuation">;</span>
    string pinyin<span class="token punctuation">;</span>
    string t_pinyin<span class="token punctuation">;</span>
    string text1<span class="token punctuation">;</span>
    string text2<span class="token punctuation">;</span>
    string text3<span class="token punctuation">;</span>
    string text4<span class="token punctuation">;</span>
    <span class="token keyword">int</span> countfirst <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span>
    <span class="token keyword">int</span> CS100count <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span>
    <span class="token keyword">int</span> countsecond <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span>
    <span class="token keyword">int</span> countthird <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span>
    <span class="token keyword">int</span> countfourth <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span>
    <span class="token keyword">int</span> countfivth <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span>
    <span class="token keyword">int</span> countsixth <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span>
    <span class="token comment">//加头部</span>
    fins <span class="token operator">=</span> header <span class="token operator">+</span> fins<span class="token punctuation">;</span>
    <span class="token comment">//获取词条</span>
    <span class="token keyword">for</span> <span class="token punctuation">(</span><span class="token keyword">int</span> i <span class="token operator">=</span> <span class="token number">0</span><span class="token punctuation">;</span> i <span class="token operator"><</span> uts<span class="token punctuation">.</span><span class="token function">length</span><span class="token punctuation">(</span><span class="token punctuation">)</span><span class="token punctuation">;</span> i<span class="token operator">++</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
        <span class="token keyword">if</span> <span class="token punctuation">(</span>uts<span class="token punctuation">[</span>i<span class="token punctuation">]</span> <span class="token operator">==</span> <span class="token char">'T'</span> <span class="token operator">&&</span> uts<span class="token punctuation">[</span>i <span class="token operator">+</span> <span class="token number">1</span><span class="token punctuation">]</span> <span class="token operator">==</span> <span class="token char">'-'</span> <span class="token operator">&&</span> countfirst<span class="token operator">==</span><span class="token number">0</span><span class="token punctuation">)</span> <span class="token punctuation">{</span>
            countfirst<span class="token operator">++</span><span class="token punctuation">;</span>
            word <span class="token operator">=</span> uts<span class="token punctuation">.</span><span class="token function">substr</span><span class="token punctuation">(</span>i<span class="token operator">+</span><span class="token number">2</span><span class="token punctuation">,</span><span class="token number">8</span><span class="token punctuation">)</span><span class="token punctuation">;</span>
        <span class="token punctuation">}</span>
    <span class="token punctuation">}</span>
    fins <span class="token operator">=</span> fins <span class="token operator">+</span> word <span class="token operator">+</span> <span class="token string">""+""</span> <span class="token operator">+</span> word <span class="token operator">+</span> <span class="token string">"";
    //提取拼音
    for (int i = 0; i < uts.length(); i++) {
        if (uts[i] == '(' && uts[i + 1] == '-') {
            for (int j = i; j < i + 50; j++) {
                if (uts[j] == '+' && uts[j + 1] == 'K') {
                    pinyin = uts.substr(i + 2, j - 1 - i - 1);
                }
            }
        }
    }
    //删除废物
    pinyin.erase(std::remove(pinyin.begin(), pinyin.end(), 'C'));
    pinyin.erase(std::remove(pinyin.begin(), pinyin.end(), 'T'));
    pinyin.erase(std::remove(pinyin.begin(), pinyin.end(), '+'));
    pinyin.erase(std::remove(pinyin.begin(), pinyin.end(), '-'));
    pinyin.erase(std::remove(pinyin.begin(), pinyin.end(), ')'));
    fins = fins + pinyin + "新华成语词典";
    //提取解释
    for (int i = 0; i < uts.length(); i++) {
        if (uts[i] == 'S' && uts[i + 1] == '%' && CS100count == 0) {
            CS100count++;
            for (int j = i; j < uts.length(); j++) {
                if (countsecond == 0 && uts[j] == '+' && uts[j + 1] == 'C' && uts[j + 2] == 'S' && uts[j + 3] == '-') {
                    countsecond++;
                    text1 = uts.substr(i + 12, j - 1 - i - 11);
                }
            }
        }
    }
    fins = fins + text1+ "";  
    //提取废话,倒数第三段
    for (int i = 0; i < uts.length(); i++) {
        if (uts[i] == 'S' && uts[i + 1] == 'S') {
            countthird++;
            if (countthird == 3) {
                for (int j = i; j < uts.length(); j++) {
                    if (uts[j] == '+' && uts[j + 1] == 'H' && uts[j + 2] == 'T' && uts[j + 3] == '8' && uts[j + 4] == '.' && uts[j + 5] == '5')
                       text2 = uts.substr(i+3 , j - 1 - i - 2);
                }
            }
        }
    }
    fins = fins + text2 + "";
    //提取倒数第二段
    for (int i = 0; i < uts.length(); i++) {
        if (uts[i] == '+' && uts[i + 1] == 'C' && uts[i + 2] == 'S' && uts[i + 3] == '%' && uts[i + 4] == '1' && uts[i + 5] == '0') {
            countfourth++;
            if (countfourth == 2) {
                for (int j = i; j < uts.length(); j++) {
                    if (uts[j] == '+' && uts[j + 1] == 'C' && uts[j + 2] == 'S' && uts[j + 3] == '-') {
                        countfivth++;
                        if (countfivth == 1) {
                            text3 = uts.substr(i + 14, j - i - 14);
                        }
                    }
                }
            }
        }
    }
    fins = fins + text3 + "";
    //最后一段
    for (int i = 0; i < uts.length(); i++) {
        if (uts[i] == '+' && uts[i + 1] == 'C' && uts[i + 2] == 'S' && uts[i + 3] == '-') {
            countsixth++;
            if (countsixth == 2) {
                for (int j = i+1; j < uts.length(); j++) {
                    if (uts[j] == '+' && uts[j + 1] == 'C' && uts[j + 2] == 'S' && uts[j + 3] == '-') {
                        text4 = uts.substr(i + 18, j - i - 18);
                    }
                }
            }
        }
    }
    fins = fins + text4 + "";
    cout << fins << endl;
    return 0;
}

你可能感兴趣的:(xml)