linux下面 UTF-8 UTF-16/ UTF-32 互转代码

#include 
#include 
 
int UTF8ToUnicode(const char *pmbs, wchar_t *pwcs, int size)
{
    int cnt = 0;
    // 这里 size-- 是预先除去尾零所需位置
    if (pmbs != NULL && pwcs != NULL && size-- > 0) {
        while (*pmbs != 0 && size > 0) {
            unsigned char ch = *pmbs;
            if (ch > 0x7FU) {
                int cwch = 0;
                while (ch & 0x80U) {
                    ch <<= 1;
                    cwch++;
                }
                *pwcs = *pmbs++ & (0xFFU >> cwch);
                while (--cwch > 0) {
                    *pwcs <<= 6;
                    *pwcs |= (*pmbs++ & 0x3FU);
                }
            } else {
                *pwcs = *pmbs++;
            }
            pwcs++;
            size--;
            cnt++;
        }
        *pwcs = 0;
        cnt++;
    }
    return cnt;
}
 
int UnicodeToUTF8(const wchar_t *pwcs, char *pmbs, int size)
{
    int cnt = 0;
    // 这里 size-- 是预先除去尾零所需位置
    if (pwcs != NULL && pmbs != NULL && size-- > 0) {
        while (*pwcs != 0 && size > 0) {
            if (*pwcs < 0x00000080U) {
                *pmbs++ = (char)*pwcs;
                size -= 1;
                cnt += 1;
            } else if (*pwcs < 0x00000800U) {
                // 剩余空间不够存放该字符
                if (size < 2) {
                    break;
                }
                *pmbs++ = (0xFFU << 6) | (*pwcs >> 6);
                *pmbs++ = 0x80U | (*pwcs & 0x3FU);
                size -= 2;
                cnt += 2;
            } else if (*pwcs < 0x00010000U) {
                // 剩余空间不够存放该字符
                if (size < 3) {
                    break;
                }
                *pmbs++ = (0xFFU << 5) | (*pwcs >> 12);
                *pmbs++ = 0x80U | ((*pwcs >> 6) & 0x3FU);
                *pmbs++ = 0x80U | (*pwcs & 0x3FU);
                size -= 3;
                cnt += 3;
            } else if (*pwcs < 0x00200000U) {
                // 剩余空间不够存放该字符
                if (size < 4) {
                    break;
                }
                *pmbs++ = (0xFFU << 4) | (*pwcs >> 18);
                *pmbs++ = 0x80U | ((*pwcs >> 12) & 0x3FU);
                *pmbs++ = 0x80U | ((*pwcs >> 6) & 0x3FU);
                *pmbs++ = 0x80U | (*pwcs & 0x3FU);
                size -= 4;
                cnt += 4;
            } else if (*pwcs < 0x04000000U) {
                // 剩余空间不够存放该字符
                if (size < 5) {
                    break;
                }
                *pmbs++ = (0xFFU << 3) | (*pwcs >> 24);
                *pmbs++ = 0x80U | ((*pwcs >> 18) & 0x3FU);
                *pmbs++ = 0x80U | ((*pwcs >> 12) & 0x3FU);
                *pmbs++ = 0x80U | ((*pwcs >> 6) & 0x3FU);
                *pmbs++ = 0x80U | (*pwcs & 0x3FU);
                size -= 5;
                cnt += 5;
            } else if (*pwcs < 0x80000000U) {
                // 剩余空间不够存放该字符
                if (size < 6) {
                    break;
                }
                *pmbs++ = (0xFFU << 2) | (*pwcs >> 30);
                *pmbs++ = 0x80U | ((*pwcs >> 24) & 0x3FU);
                *pmbs++ = 0x80U | ((*pwcs >> 18) & 0x3FU);
                *pmbs++ = 0x80U | ((*pwcs >> 12) & 0x3FU);
                *pmbs++ = 0x80U | ((*pwcs >> 6) & 0x3FU);
                *pmbs++ = 0x80U | (*pwcs & 0x3FU);
                size -= 6;
                cnt += 6;
            } else {
                // 无法识别的 Unicode 字符
                break;
            }
            pwcs++;
        }
        *pmbs = 0;
        cnt++;
    }
    return cnt;
}
 
int main(void)
{
    // 这部分代码请在 Linux 上测试,Windows 命令行无法打印 UTF-8 字符串。
    char mbs1[256] = { 0 };
    wchar_t wcs1[] = L"测试文字";
    int ret1 = UnicodeToUTF8(wcs1, mbs1, sizeof(mbs1)/sizeof(char));
    printf("%d\n", ret1);
 
    // 这部分代码请在 Linux 上测试,Windows 的 char 类型非 UTF-8 编码。
    char mbs2[] = "测试文字";
    wchar_t wcs2[256] = { 0 };
    int ret2 = UTF8ToUnicode(mbs2, wcs2, sizeof(wcs2)/sizeof(wchar_t));
    printf("%d\n", ret2);
 
    return 0;
}


windows下面只是需要setlocal就可以了

setlocale(LC_ALL,"");     //设置本地默认Locale. 或者是setlocale(LC_ALL, "chs")
wcstombs(dest, source, length);
setlocale(LC_ALL,"C");     //默认



你可能感兴趣的:(编程)