GB2312编码与UTF-8编码的相互转换代码(linux windows下通用)

以下是GB2312编码与UTF-8编码相互转换,自测通过。

思路:GB2312编码与UTF-8编码相互转换,是通过转化为中间编码(UNICODE),再转化为其它编码

//------------------------------------------------------------------------------------------------------------------------------

#ifdef LINUXCODE
#include
#endif
//这里再包含其它头文件 

 

//这里是由UTF-8转化为UNICODE,其中pText是需要转换内容的指针,pOut是转换后内容的指针
int cU8xU(WCHAR* pOut,char *pText)
{
 int ret = 0;
 char* uchar = (char *)pOut;
 unsigned cIn = (unsigned char)pText[0];
 if(cIn<0x80)
 {              // ASCII  0x00 ~ 0x7f
        pOut[0] = pText[0];
 }
 else if(cIn<0xdf)
 {
  uchar[0] = (pText[0]<<6)|(pText[1]&0x3f);
        uchar[1] = (pText[0]>>2)&0x0f;
        ret = 1;
 }
 else if(cIn<0xef)
 {
        uchar[0] = (pText[1]<<6)|(pText[2]&0x3f);
        uchar[1] = (pText[0]<<4)|((pText[1]>>2)&0x0f);
        ret = 2;
 }
 else if(cIn<0xf7)
 {
        uchar[0] = (pText[2]<<6)|(pText[3]&0x3f);
        uchar[1] = (pText[1]<<4)|((pText[2]>>2)&0x0f);
        uchar[2] = ((pText[0]<<2)&0x1c)|((pText[1]>>4)&0x03);
        ret = 3;
 }
 return ret;
}

//代码转换:从一种编码转为另一种编码,这里是linux下的转换
#ifdef LINUXCODE
int code_convert(char *from_charset,char *to_charset,char *inbuf,int inlen,char *outbuf,int outlen)
{
        iconv_t cd;
        int rc;
        char **pin = &inbuf;
        char **pout = &outbuf;

        cd = iconv_open(to_charset,from_charset);
        if (cd==(iconv_t)-1)
        {
                perror("open");
                iconv_close(cd);
                return 0;
        }
        //memset(outbuf,0,outlen);
        rc = iconv(cd,pin,(size_t *)&inlen,pout,(size_t*)&outlen);
        if(rc == -1)
        {
                perror("conv");
                iconv_close(cd);
                return 0;
        }
        iconv_close(cd);
        return rc;
}
#endif

#ifndef LINUXCODE
int cUxG(char* pOut,WCHAR* pText)
{
 int ret = 0;
 if(pText[0]<0x80)
 {
        pOut[0] = (char)pText[0];
 }
 else
 {
  ::WideCharToMultiByte(CP_ACP,0,pText,1,pOut,sizeof(WCHAR),NULL,NULL);
  ret = 1;
 }
 
 return ret;
}
#endif

//UTF-8转GB2312

int sU8xG(char* pOut,char* pText,int Len)
{
#ifdef LINUXCODE
 code_convert("utf-8","gb2312",pText,Len,pOut,1024);
 return strlen(pOut);
#else
 int i,j;
 WCHAR buf;
 for(i=0,j=0;i  {
  if((unsigned)pText[i]<0x80)
  {       // ASCII  0x00 ~ 0x7f
           pOut[j] = pText[i];
  }
  else
  {
           i += cU8xU(&buf,&pText[i]);
           j += cUxG(&pOut[j],&buf);
         }
      }
       return j;
#endif
}

// GB2312转Unicode,单字 ------------------
#ifndef LINUXCODE
int cGxU(WCHAR* pOut,char* pText)
{
     int ret = 0;
      if((unsigned)pText[0]<0x80)
   {        // ASCII  0x00 ~ 0x7f
        pOut[0] = (WCHAR)pText[0];
      }
   else
   {
         ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,pText,2,pOut,1);
         //::mbstowcs((wchar_t *)pOut, pText, 1);
     ret = 1;
    }
   return ret;
     
}
#endif

//UNOCODE 转化为UTF-8

int cUxU8(char* pOut,WCHAR* pText)
{
 int ret = 0;
 unsigned char* pchar = (unsigned char *)pText;
 if(pText[0]<=0x7f)
 {         // ASCII  0x00 ~ 0x7f
  pOut[0] = (char)pchar[0];
 }
 else if(pText[0]<=0x7ff)
 {  // 0x080 ~ 0x7ff
        pOut[0] = 0xc0|(pchar[1]<<2)|(pchar[0]>>6);
        pOut[1] = 0x80|(pchar[0]&0x3f);
        ret = 1;
 }
 else
 {                      // 0x0800 ~ 0xFFFF
         pOut[0] = 0xe0|(pchar[1]>>4);
         pOut[1] = 0x80|((pchar[1]&0x0f)<<2)|(pchar[0]>>6);
         pOut[2] = 0x80|(pchar[0]&0x3f);
   ret = 2;
 }
      return ret;

  //GB2312转UTF-8
int sGxU8(char* pOut,char* pText,int Len)
{
#ifdef LINUXCODE
 code_convert("gb2312","utf-8",pText,Len,pOut,1024);
 return strlen(pOut);
#else
 int i,j;
 WCHAR buf;
 for(i=0,j=0;i  {
        if((unsigned)pText[i]<0x80)
    {
            pOut[j] = pText[i];
        }
    else
    {
            i += cGxU(&buf,&pText[i]);
            j += cUxU8(&pOut[j],&buf);
        }
  }
  return j;
#endif
}

你可能感兴趣的:(工作开发)