Unicode,UTF8,GB2312,UCS2,GBK之间的转换
平时用到的几种编码格式转换。平时用的不是很多。但是在做短信协议的时候,就经常遇到了。这段时间做短信平台接口,总结了几个,也不是很全。
//////////////////////////////////////////////////////////////////////////////
//把汉字格式化为%HH
int URLEncode(LPCTSTR pInBuf,LPTSTR szOut)
{
LPBYTE pInTmp,pOutTmp;
pInTmp = (LPBYTE)pInBuf;
pOutTmp = (LPBYTE)szOut;
while (*pInTmp){
if(isalnum(*pInTmp)){
*pOutTmp++ = *pInTmp;
}else{
if(isspace(*pInTmp)){
*pOutTmp++ = '+';
}else{
*pOutTmp++ = '%';
*pOutTmp++ = toHex(*pInTmp>>4);
*pOutTmp++ = toHex(*pInTmp&0xF);
}
}
pInTmp++;
}
*pOutTmp = '/0';
return (int)(pOutTmp-(LPBYTE)szOut);
}
///////////////////////////////////////////////////////////////////
// Unicode字符 转换成UTF-8编码
LPCTSTR UnicodeToUTF8Char(LPTSTR pOut,WCHAR wcText)
{
// 注意 WCHAR高低字的顺序,低字节在前,高字节在后
LPTSTR pchar = (LPTSTR)&wcText;
pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6);
pOut[2] = (0x80 | (pchar[0] & 0x3F));
pOut[3] = '/0';
return pOut;
}
//GB2312字符串转为UTF-8编码
LPCTSTR GB2312ToUTF8(LPTSTR pUTF8Out,LPCTSTR pGB2312Input, int GB2312Len)
{
CHAR buf[4];
LPCTSTR lpReturn,pGB2312Cursor,pGB2312InputEnd;
WCHAR wcBuffer;
lpReturn = (LPCTSTR)pUTF8Out;
pGB2312Cursor = (LPTSTR)pGB2312Input;
pGB2312InputEnd= pGB2312Cursor + GB2312Len;
while( pGB2312Cursor<pGB2312InputEnd ){
//如果ANSII直接复制就可以
if( *pGB2312Cursor>0 ){
*pUTF8Out++ = *pGB2312Cursor++;
}else{
::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,pGB2312Cursor,2,&wcBuffer,1);
memcpy( pUTF8Out, UnicodeToUTF8Char(buf,wcBuffer), 3 );
pGB2312Cursor += 2;
pUTF8Out += 3;
}
}
*pUTF8Out = '/0';
return lpReturn;
}
int UTF8ToGB(const char* str,char *out)
{
WCHAR *strSrc;
TCHAR *szRes;
int len;
//获得临时变量的大小
int i = MultiByteToWideChar(CP_UTF8, 0, str, -1, NULL, 0);
strSrc = new WCHAR[i+1];
MultiByteToWideChar(CP_UTF8, 0, str, -1, strSrc, i);
//获得临时变量的大小
i = WideCharToMultiByte(CP_ACP, 0, strSrc, -1, NULL, 0, NULL, NULL);
szRes = new TCHAR[i+1];
WideCharToMultiByte(CP_ACP, 0, strSrc, -1, szRes, i, NULL, NULL);
len = (i+1)*sizeof(CHAR);
memcpy(out,szRes,len);
out[len+1] ='/0';
delete []strSrc;
delete []szRes;
return len;
}
//GB2312字符串转为GB2312网页编码
LPCTSTR GB2312ToWebGB2312(LPTSTR pWebGB2312Out,LPCTSTR pGB2312Input, int GB2312Len)
{
LPCTSTR lpReturn,pGB2312Cursor,pGB2312InputEnd;
WCHAR wcBuffer;
lpReturn = (LPCTSTR)pWebGB2312Out;
pGB2312Cursor = (LPTSTR)pGB2312Input;
pGB2312InputEnd= pGB2312Cursor + GB2312Len;
while( pGB2312Cursor<pGB2312InputEnd ){
//如果ANSII直接复制就可以
if( *pGB2312Cursor>0 ){
*pWebGB2312Out++ = *pGB2312Cursor++;
}else{
::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,pGB2312Cursor,2,&wcBuffer,1);
pWebGB2312Out += sprintf( pWebGB2312Out, "&#%d;", wcBuffer);
pGB2312Cursor += 2;
}
}
*pWebGB2312Out = '/0';
return lpReturn;
}
void UCS2toUTF8(unsigned short *ucs2, int count, char *utf8)
{
unsigned short unicode;
unsigned char bytes[4] = {0};
int nbytes = 0;
int i = 0, j = 0;
int len=0;
if((ucs2 != NULL) && (utf8 != NULL))
{
if(count == 0)
{
len = 0;
}
else
{
for (i=0; i<count; i++)
{
unicode = ucs2[i];
if (unicode < 0x80)
{
nbytes = 1;
bytes[0] = unicode;
}
else if (unicode < 0x800)
{
nbytes = 2;
bytes[1] = (unicode & 0x3f) | 0x80;
bytes[0] = ((unicode << 2) & 0x1f00 | 0xc000) >> 8;
}
else
{
nbytes = 3;
bytes[2] = (unicode & 0x3f) | 0x80;
bytes[1] = ((unicode << 2) & 0x3f00 | 0x8000) >> 8;
bytes[0] = ((unicode << 4) & 0x0f0000 | 0xe00000) >> 16;
}
for (j=0; j<nbytes; j++)
{
utf8[len] = bytes[j];
len++;
}
}
}
utf8[len] = '/0';
}
}
///////////////////////////////////////////////////////////////////
//UCS2转换UTF8
///////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////
//UTF8转换GB
///////////////////////////////////////////////////////////////////