1, 基本概念,几种编码方式
1,unicode: 是一个字符集; utf16, UCS-2编码,little endian格式
2,Unicode big endian编码
3,utf8: unicode的一种,变长编码
4,ansi: 本地编码
英文-ascii ,英文字符和二进制之间的关系,包含128个字符;128--255扩展字符,
简体中文- gb2312,DBCS编码,一个汉字两个英文字符, 对ascii的中文扩展,一个小于127的字符的意义与原来相同,但两个大于127的字符连在一起时,就表示一个汉字;有数字即在127内存在,也在127外存在,数字<127,半角字符; >127,全角字符 < GBK 标准<GB18030;
繁体中文-big5
2,字符串长度: "abc汉字” unicode: 5个字符,一个字符两个字节; dbcs: 7个字符,汉字占两个英文字符
3, 实现,如果有多国语言,则用unicode来显示。工程内部用utf8来传递
::MessageBoxW(NULL, utf8Wstr.c_str(), L"utf8Wstr", MB_OK);
#ifndef __K_UTILITY_H__ #define __K_UTILITY_H__ /************************************************************************/ /* 编码转换 utf8 - gb2312 在unix平台中可以使用iconv来做转换 在windows平台可以用MultiByteToWideChar/WideCharToMultiByte 函数. char - wchar_t 使用CRT库的mbstowcs()函数和wcstombs()函数,平台无关,需设定locale */ /************************************************************************/ #include "OPPOS.h" #include <locale.h> #ifndef OPP_WIN32 #include <iconv.h> #endif #include <string> using namespace std; inline string IntToString( int nVal ); inline string Int64ToString( __int64 liVal ); inline string FloatToString( float fVal ); inline int StringToInt( const char * pVal ); inline __int64 StringToInt64( const char * pVal ); inline float StringToFloat( const char* pVal ); inline int WideStrToInt( const wchar_t * pVal ); inline __int64 WideStrToInt64( const wchar_t * pVal ); inline float WideStrToFloat( const wchar_t * pVal ); inline wstring IntToWideStr( int nVal ); inline wstring Int64ToWideStr( __int64 lnVal ); inline wstring FloatToWideStr( float fVal ); inline string ws2s(const wstring& ws); inline wstring s2ws(const string& s); //GB2312 转为 UTF-8 inline void GB2312ToUTF_8(string& pOut,char *pText, int pLen); //UTF-8 转为 GB2312 inline void UTF_8ToGB2312(string &pOut, char *pText, int pLen); #ifndef OPP_WIN32 //代码转换:从一种编码转为另一种编码 inline int code_convert(char *from_charset,char *to_charset,char *inbuf,int inlen,char *outbuf,int outlen); //UNICODE码转为GB2312码 inline int u2g(char *inbuf,int inlen,char *outbuf,int outlen); //GB2312码转为UNICODE码 inline int g2u(char *inbuf,size_t inlen,char *outbuf,size_t outlen); #else // 把UTF-8转换成Unicode inline void UTF_8ToUnicode(WCHAR* pOut,char *pText); // Unicode 转换成UTF-8 inline void UnicodeToUTF_8(char* pOut,WCHAR* pText); // 把Unicode 转换成 GB2312 inline void UnicodeToGB2312(char* pOut,unsigned short uData); // GB2312 转换成 Unicode inline void Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer); //GB2312 转为 UTF-8 inline void GB2312ToUTF_8ByWin(string& pOut,char *pText, int pLen); //UTF-8 转为 GB2312 inline void UTF_8ToGB2312ByWin(string &pOut, char *pText, int pLen); inline char* UTF8ToString(const char* src, char* dest, int dest_size); #endif ////////////////////////////////////////////////////////////////////////// inline string IntToString( int nVal ) { // integer MAX : 4294967295L char Buf[16] = {'\0'}; _itoa_s(nVal, Buf, sizeof(Buf), 10L); return string(Buf); } inline string Int64ToString( __int64 liVal ) { // integer_64 MAX : 18446744073709551615L char Buf[32] = {'\0'}; _i64toa_s(liVal, Buf, sizeof(Buf), 10L); return string(Buf); } inline string FloatToString( float fVal ) { char Buf[32] = {'\0'}; sprintf_s( Buf, sizeof(Buf), "%f", fVal); return string(Buf); } inline int StringToInt( const char * pVal ) { assert(pVal); return ( ::atoi(pVal) ); } inline __int64 StringToInt64( const char * pVal ) { assert(pVal); return ( ::_atoi64(pVal) ); } inline float StringToFloat( const char* pVal ) { assert(pVal); return float( ::atof(pVal) ); } inline int WideStrToInt( const wchar_t * pVal ) { assert(pVal); return (::_wtoi(pVal)); } inline __int64 WideStrToInt64( const wchar_t * pVal ) { assert(pVal); return (::_wtoi64(pVal)); } inline float WideStrToFloat( const wchar_t * pVal ) { assert(pVal); return ((float)::_wtof(pVal)); } inline wstring IntToWideStr( int nVal ) { wchar_t buf[32] = {L"\0"}; _itow_s( nVal, buf, 32, 10 ); return wstring(buf); } inline wstring Int64ToWideStr( __int64 lnVal ) { wchar_t buf[32] = {L"\0"}; _i64tow_s( lnVal, buf, 32, 10 ); return wstring(buf); } inline wstring FloatToWideStr( float fVal ) { wchar_t buf[32] = {L"\0"}; wsprintfW(buf, L"%f", fVal); return wstring(buf); } inline string ws2s(const wstring& ws) { string curLocale = setlocale(LC_ALL, NULL); // curLocale = "C"; //以gbk页码来翻译为中文的双字节 setlocale(LC_ALL, "chs"); const wchar_t* _Source = ws.c_str(); size_t _Dsize = 2 * ws.size() + 1; char *_Dest = new char[_Dsize]; memset(_Dest,0,_Dsize); wcstombs(_Dest,_Source,_Dsize); string result = _Dest; delete []_Dest; setlocale(LC_ALL, curLocale.c_str()); return result; } inline wstring s2ws(const string& s) { //以gbk页码来翻译为中文的双字节 setlocale(LC_ALL, "chs"); const char* _Source = s.c_str(); size_t _Dsize = s.size() + 1; wchar_t *_Dest = new wchar_t[_Dsize]; wmemset(_Dest, 0, _Dsize); mbstowcs(_Dest,_Source,_Dsize); wstring result = _Dest; delete []_Dest; setlocale(LC_ALL, "C"); return result; } #ifndef OPP_WIN32 //代码转换:从一种编码转为另一种编码 int code_convert(char *from_charset,char *to_charset,char *inbuf,int inlen,char *outbuf,int outlen) { iconv_t cd; int rc; char **pin = &inbuf; char **pout = &outbuf; cd = iconv_open(to_charset,from_charset); if (cd==0) return -1; memset(outbuf,0,outlen); if (iconv(cd,pin,&inlen,pout,&outlen)==-1) return -1; iconv_close(cd); return 0; } //UNICODE码转为GB2312码 int u2g(char *inbuf,int inlen,char *outbuf,int outlen) { return code_convert("utf-8","gb2312",inbuf,inlen,outbuf,outlen); } //GB2312码转为UNICODE码 int g2u(char *inbuf,size_t inlen,char *outbuf,size_t outlen) { return code_convert("gb2312","utf-8",inbuf,inlen,outbuf,outlen); } #else // 把UTF-8转换成Unicode void UTF_8ToUnicode(WCHAR* pOut,char *pText) { char* uchar = (char *)pOut; uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F); uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F); return; } // Unicode 转换成UTF-8 void UnicodeToUTF_8(char* pOut,WCHAR* pText) { // 注意 WCHAR高低字的顺序,低字节在前,高字节在后 char* pchar = (char *)pText; pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4)); pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6); pOut[2] = (0x80 | (pchar[0] & 0x3F)); return; } // 把Unicode 转换成 GB2312 void UnicodeToGB2312(char* pOut,unsigned short uData) { WideCharToMultiByte(CP_ACP,NULL,(WCHAR*)&uData,1,pOut,sizeof(WCHAR),NULL,NULL); return; } // GB2312 转换成 Unicode void Gb2312ToUnicode(WCHAR* pOut,char *gbBuffer) { ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1); return; } //GB2312 转为 UTF-8 void GB2312ToUTF_8ByWin(string& pOut,char *pText, int pLen) { char buf[4]; char* rst = new char[pLen + (pLen >> 2) + 2]; memset(buf,0,4); memset(rst,0,pLen + (pLen >> 2) + 2); int i = 0; int j = 0; while(i < pLen) { //如果是英文直接复制就可以 if( *(pText + i) >= 0) { rst[j++] = pText[i++]; } else { WCHAR pbuffer; Gb2312ToUnicode(&pbuffer,pText+i); UnicodeToUTF_8(buf,&pbuffer); unsigned short int tmp = 0; tmp = rst[j] = buf[0]; tmp = rst[j+1] = buf[1]; tmp = rst[j+2] = buf[2]; j += 3; i += 2; } } rst[j] = '\0'; //返回结果 pOut = rst; delete []rst; return; } char* UTF8ToString(const char* src, char* dest, int dest_size) { wchar_t wbuffer[2048]; #ifdef _WIN32 MultiByteToWideChar(CP_ACP, 0, src, -1, wbuffer, 2048); WideCharToMultiByte(CP_UTF8, 0, wbuffer, -1, dest, dest_size, NULL, NULL); #else mbstowcs(wbuffer, src, 2048); wcstombs(dest, wbuffer, dest_size); #endif return dest; } //UTF-8 转为 GB2312 void UTF_8ToGB2312ByWin(string &pOut, char *pText, int pLen) { char * newBuf = new char[pLen+1]; newBuf[pLen] = '\0'; char Ctemp[4] = {"\0"}; int i =0; int j = 0; while(i < pLen) { if(pText[i] > 0) { newBuf[j++] = pText[i++]; } else { WCHAR Wtemp; UTF_8ToUnicode(&Wtemp,pText + i); UnicodeToGB2312(Ctemp,Wtemp); newBuf[j] = Ctemp[0]; newBuf[j + 1] = Ctemp[1]; i += 3; j += 2; } } newBuf[j] = '\0'; pOut = newBuf; delete []newBuf; return; } #endif //GB2312 转为 UTF-8 inline void GB2312ToUTF_8(string& pOut,char *pText, int pLen) { #ifndef OPP_WIN32 int outLen = pLen + (pLen >> 2) + 2; g2u(pText, pLen, pOut, outLen); #else GB2312ToUTF_8ByWin(pOut, pText, pLen); #endif } //UTF-8 转为 GB2312 inline void UTF_8ToGB2312(string &pOut, char *pText, int pLen) { #ifndef OPP_WIN32 u2g(pText, pLen, pOut, pLen+1); #else UTF_8ToGB2312ByWin(pOut, pText, pLen); #endif } #endif // end of __K_UTILITY_H__
url: http://blog.csdn.net/lijie_sh/article/details/4396872