wchar_t 转换UTF16编码

wchar_t在Windows下的是utf16编码,Linux下面则是utf32编码,有些时候这些不同会带来代码移植问题。

下列转换宏提供了从wchar_t 到UTF16编码的转换

 

 

#ifdef WIN32 #define USES_WCSUTF16_CONV #define WCS2UTF16(wcs) (wcs) #define UTF162WCS(utf16) (utf16) #else //WIN32 static unsigned short * _wcsu16_wcs2utf16(wchar_t *wcs, unsigned short * utf16, int len) { int wc; wchar_t *end = wcs + len; unsigned short * ret = utf16; while(wcs < end) { wc = *(wcs++); if (wc > 0xFFFF) { wc -= 0x00010000L; *utf16++ = 0xD800 | (wc >> 10); *utf16++ = 0xDC00 | (wc & 0x03FF); } else { *utf16++ = wc; } } return ret; } static int _wcsu16_utf16len(wchar_t *wcs) { int wc; int ret = 0; while(wc = *wcs++) ret += wc > 0xFFFF ? 2 : 1; return ret; } static int _wcsu16_wcslen(unsigned short * utf16) { unsigned short * start = utf16; while(*utf16++) ; return (int)(utf16 - start - 1); } static wchar_t * _wcsu16_utf162wcs(unsigned short * utf16, wchar_t *wcs, int len) { if (sizeof(unsigned short) == sizeof(wchar_t)) { memcpy(wcs, utf16, (len + 1) * sizeof(wchar_t)); return wcs; } else { int u32; unsigned short *end = utf16 + len; wchar_t * ret = wcs; int u32low = 0; while (utf16 < end) { u32 = *utf16++; if (u32>=0xD800 && u32<0xDC00) { u32low = *utf16++; u32 &= 0x3FF; u32low &= 0x3FF; u32 <<= 10; u32 += u32low; u32 += 0x10000; } *wcs++ = u32; } *(wcs-1) = 0; return ret; } } #define USES_WCSUTF16_CONV int _len; (_len); wchar_t * _wcs; (_wcs); unsigned short * _utf16; (_utf16) #define WCS2UTF16(wcs) \ (((_wcs = wcs) == NULL) ? NULL : (\ _len = (_wcsu16_utf16len(_wcs)+1),\ _wcsu16_wcs2utf16(_wcs, (unsigned short *) alloca(_len*2), _len))) #define UTF162WCS(utf16) \ (((_utf16 = utf16) == NULL) ? NULL : (\ _len = (_wcsu16_wcslen(_utf16)+1),\ _wcsu16_utf162wcs(_utf16, (wchar_t *) alloca(_len*sizeof(wchar_t)), _len))) #endif //WIN32

你可能感兴趣的:(C/C++)