以下是GB2312编码与UTF-8编码相互转换,自测通过。
思路:GB2312编码与UTF-8编码相互转换,是通过转化为中间编码(UNICODE),再转化为其它编码
//------------------------------------------------------------------------------------------------------------------------------
#ifdef LINUXCODE
#include
#endif
//这里再包含其它头文件
//这里是由UTF-8转化为UNICODE,其中pText是需要转换内容的指针,pOut是转换后内容的指针
int cU8xU(WCHAR* pOut,char *pText)
{
int ret = 0;
char* uchar = (char *)pOut;
unsigned cIn = (unsigned char)pText[0];
if(cIn<0x80)
{ // ASCII 0x00 ~ 0x7f
pOut[0] = pText[0];
}
else if(cIn<0xdf)
{
uchar[0] = (pText[0]<<6)|(pText[1]&0x3f);
uchar[1] = (pText[0]>>2)&0x0f;
ret = 1;
}
else if(cIn<0xef)
{
uchar[0] = (pText[1]<<6)|(pText[2]&0x3f);
uchar[1] = (pText[0]<<4)|((pText[1]>>2)&0x0f);
ret = 2;
}
else if(cIn<0xf7)
{
uchar[0] = (pText[2]<<6)|(pText[3]&0x3f);
uchar[1] = (pText[1]<<4)|((pText[2]>>2)&0x0f);
uchar[2] = ((pText[0]<<2)&0x1c)|((pText[1]>>4)&0x03);
ret = 3;
}
return ret;
}
//代码转换:从一种编码转为另一种编码,这里是linux下的转换
#ifdef LINUXCODE
int code_convert(char *from_charset,char *to_charset,char *inbuf,int inlen,char *outbuf,int outlen)
{
iconv_t cd;
int rc;
char **pin = &inbuf;
char **pout = &outbuf;
cd = iconv_open(to_charset,from_charset);
if (cd==(iconv_t)-1)
{
perror("open");
iconv_close(cd);
return 0;
}
//memset(outbuf,0,outlen);
rc = iconv(cd,pin,(size_t *)&inlen,pout,(size_t*)&outlen);
if(rc == -1)
{
perror("conv");
iconv_close(cd);
return 0;
}
iconv_close(cd);
return rc;
}
#endif
#ifndef LINUXCODE
int cUxG(char* pOut,WCHAR* pText)
{
int ret = 0;
if(pText[0]<0x80)
{
pOut[0] = (char)pText[0];
}
else
{
::WideCharToMultiByte(CP_ACP,0,pText,1,pOut,sizeof(WCHAR),NULL,NULL);
ret = 1;
}
return ret;
}
#endif
//UTF-8转GB2312
int sU8xG(char* pOut,char* pText,int Len)
{
#ifdef LINUXCODE
code_convert("utf-8","gb2312",pText,Len,pOut,1024);
return strlen(pOut);
#else
int i,j;
WCHAR buf;
for(i=0,j=0;i
if((unsigned)pText[i]<0x80)
{ // ASCII 0x00 ~ 0x7f
pOut[j] = pText[i];
}
else
{
i += cU8xU(&buf,&pText[i]);
j += cUxG(&pOut[j],&buf);
}
}
return j;
#endif
}
// GB2312转Unicode,单字 ------------------
#ifndef LINUXCODE
int cGxU(WCHAR* pOut,char* pText)
{
int ret = 0;
if((unsigned)pText[0]<0x80)
{ // ASCII 0x00 ~ 0x7f
pOut[0] = (WCHAR)pText[0];
}
else
{
::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,pText,2,pOut,1);
//::mbstowcs((wchar_t *)pOut, pText, 1);
ret = 1;
}
return ret;
}
#endif
//UNOCODE 转化为UTF-8
int cUxU8(char* pOut,WCHAR* pText)
{
int ret = 0;
unsigned char* pchar = (unsigned char *)pText;
if(pText[0]<=0x7f)
{ // ASCII 0x00 ~ 0x7f
pOut[0] = (char)pchar[0];
}
else if(pText[0]<=0x7ff)
{ // 0x080 ~ 0x7ff
pOut[0] = 0xc0|(pchar[1]<<2)|(pchar[0]>>6);
pOut[1] = 0x80|(pchar[0]&0x3f);
ret = 1;
}
else
{ // 0x0800 ~ 0xFFFF
pOut[0] = 0xe0|(pchar[1]>>4);
pOut[1] = 0x80|((pchar[1]&0x0f)<<2)|(pchar[0]>>6);
pOut[2] = 0x80|(pchar[0]&0x3f);
ret = 2;
}
return ret;
}
//GB2312转UTF-8
int sGxU8(char* pOut,char* pText,int Len)
{
#ifdef LINUXCODE
code_convert("gb2312","utf-8",pText,Len,pOut,1024);
return strlen(pOut);
#else
int i,j;
WCHAR buf;
for(i=0,j=0;i
if((unsigned)pText[i]<0x80)
{
pOut[j] = pText[i];
}
else
{
i += cGxU(&buf,&pText[i]);
j += cUxU8(&pOut[j],&buf);
}
}
return j;
#endif
}