VC编码转换

    有很多人在为VC中的编码转换犯愁,有的自己根据编码规范去写,经过我查阅资料,根本不需要这么做,我总结了一下实现方式。

CodePages枚举的定义(参看了.net中的定义)

enum CodePages
{
    IBM037=37,
    IBM437=437,
    IBM500=500,
    ASMO_708=708,
    DOS_720=720,
    ibm737=737,
    ibm775=775,
    ibm850=850,
    ibm852=852,
    IBM855=855,
    ibm857=857,
    IBM00858=858,
    IBM860=860,
    ibm861=861,
    DOS_862=862,
    IBM863=863,
    IBM864=864,
    IBM865=865,
    cp866=866,
    ibm869=869,
    IBM870=870,
    windows_874=874,
    cp875=875,
    shift_jis=932,
    gb2312=936,
    ks_c_5601_1987=949,
    big5=950,
    IBM1026=1026,
    IBM01047=1047,
    IBM01140=1140,
    IBM01141=1141,
    IBM01142=1142,
    IBM01143=1143,
    IBM01144=1144,
    IBM01145=1145,
    IBM01146=1146,
    IBM01147=1147,
    IBM01148=1148,
    IBM01149=1149,
    utf_16=1200,
    unicodeFFFE=1201,
    windows_1250=1250,
    windows_1251=1251,
    Windows_1252=1252,
    windows_1253=1253,
    windows_1254=1254,
    windows_1255=1255,
    windows_1256=1256,
    windows_1257=1257,
    windows_1258=1258,
    Johab=1361,
    macintosh=10000,
    x_mac_japanese=10001,
    x_mac_chinesetrad=10002,
    x_mac_korean=10003,
    x_mac_arabic=10004,
    x_mac_hebrew=10005,
    x_mac_greek=10006,
    x_mac_cyrillic=10007,
    x_mac_chinesesimp=10008,
    x_mac_romanian=10010,
    x_mac_ukrainian=10017,
    x_mac_thai=10021,
    x_mac_ce=10029,
    x_mac_icelandic=10079,
    x_mac_turkish=10081,
    x_mac_croatian=10082,
    utf_32=12000,
    utf_32BE=12001,
    x_Chinese_CNS=20000,
    x_cp20001=20001,
    x_Chinese_Eten=20002,
    x_cp20003=20003,
    x_cp20004=20004,
    x_cp20005=20005,
    x_IA5=20105,
    x_IA5_German=20106,
    x_IA5_Swedish=20107,
    x_IA5_Norwegian=20108,
    us_ascii=20127,
    x_cp20261=20261,
    x_cp20269=20269,
    IBM273=20273,
    IBM277=20277,
    IBM278=20278,
    IBM280=20280,
    IBM284=20284,
    IBM285=20285,
    IBM290=20290,
    IBM297=20297,
    IBM420=20420,
    IBM423=20423,
    IBM424=20424,
    x_EBCDIC_KoreanExtended=20833,
    IBM_Thai=20838,
    koi8_r=20866,
    IBM871=20871,
    IBM880=20880,
    IBM905=20905,
    IBM00924=20924,
    EUC_JP=20932,
    x_cp20936=20936,
    x_cp20949=20949,
    cp1025=21025,
    koi8_u=21866,
    iso_8859_1=28591,
    iso_8859_2=28592,
    iso_8859_3=28593,
    iso_8859_4=28594,
    iso_8859_5=28595,
    iso_8859_6=28596,
    iso_8859_7=28597,
    iso_8859_8=28598,
    iso_8859_9=28599,
    iso_8859_13=28603,
    iso_8859_15=28605,
    x_Europa=29001,
    iso_8859_8_i=38598,
    iso_2022_jp=50220,
    csISO2022JP=50221,
    iso_2022_kr=50225,
    x_cp50227=50227,
    euc_jp=51932,
    EUC_CN=51936,
    euc_kr=51949,
    hz_gb_2312=52936,
    GB18030=54936,
    x_iscii_de=57002,
    x_iscii_be=57003,
    x_iscii_ta=57004,
    x_iscii_te=57005,
    x_iscii_as=57006,
    x_iscii_or=57007,
    x_iscii_ka=57008,
    x_iscii_ma=57009,
    x_iscii_gu=57010,
    x_iscii_pa=57011,
    utf_7=65000,
    utf_8=65001
};

几个转换函数一看就懂,主要调用WideCharToMultiByte和MultiByteToWideChar函数。

void UnicodeToOther(const wstring &src, string& result, CodePages codePage)
{
    int n = WideCharToMultiByte(codePage, 0, src.c_str(), -1, 0, 0, 0, 0 );
    result.resize(n);
    ::WideCharToMultiByte(codePage, 0, src.c_str(), -1, (char*)result.c_str(), (int)result.length(), 0, 0 );
}



void OtherToUnicode(const string& src, wstring& result, CodePages codePage)
{
    int n = MultiByteToWideChar(codePage, 0, src.c_str(), -1, NULL, 0);
    result.resize(n);
    ::MultiByteToWideChar(codePage, 0, src.c_str(), -1, (TCHAR*)result.c_str(), (int)result.length());
}

void EncodingConvert(const string& src, CodePages srcCodePage, string& result, CodePages resultCodePage)
{
    wstring buffer;
    OtherToUnicode(src, buffer, srcCodePage);
    UnicodeToOther(buffer, result, resultCodePage);
}

你可能感兴趣的:(编码转换)