代码页转换&简繁体转换

/*-----------------------------------------------------------------------------*/
// 代码页转换 & 简繁体转换
/*-----------------------------------------------------------------------------*/

typedef struct _tagWORD_CHAR
{
 string strWord;
 string strChar;
}WORD_CHAR;

typedef map< string, vector < WORD_CHAR > > WORD_MAP;
typedef map< string, int > CHAR_MAP;


void SetNotNormalWordMap( WORD_MAP &_NotNormalWordMap )
{
 WORD_CHAR word_char;

 word_char.strWord = "秋千";
 word_char.strChar = "鞦";
 _NotNormalWordMap[ "秋" ].push_back( word_char );

 word_char.strWord = "秋千";
 word_char.strChar = "韆";
 _NotNormalWordMap[ "千" ].push_back( word_char );

 word_char.strWord = "胡须";
 word_char.strChar = "鬍";
 _NotNormalWordMap[ "胡" ].push_back( word_char );

 word_char.strWord = "回旋";
 word_char.strChar = "迴";
 _NotNormalWordMap[ "回" ].push_back( word_char );

 word_char.strWord = "生姜";
 word_char.strChar = "薑";
 _NotNormalWordMap[ "姜" ].push_back( word_char );

 word_char.strWord = "漓江";
 word_char.strChar = "灕";
 _NotNormalWordMap[ "漓" ].push_back( word_char );

//  word_char.strWord = "";
//  word_char.strChar = "";
//  _NotNormalWordMap[ "" ].push_back( word_char );
}

// 非对称简繁体字
void SetNotNormalCharMap( CHAR_MAP &_NotNormalCharMap )
{
 int i = 1;
 
 _NotNormalCharMap[ "摆" ] = i++;
 _NotNormalCharMap[ "板" ] = i++;
 _NotNormalCharMap[ "辟" ] = i++;
 _NotNormalCharMap[ "表" ] = i++;
 _NotNormalCharMap[ "别" ] = i++;
 _NotNormalCharMap[ "卜" ] = i++;
 _NotNormalCharMap[ "才" ] = i++;
 _NotNormalCharMap[ "厂" ] = i++;
 _NotNormalCharMap[ "冲" ] = i++;
 _NotNormalCharMap[ "虫" ] = i++;
 _NotNormalCharMap[ "仇" ] = i++;
 _NotNormalCharMap[ "丑" ] = i++;
 _NotNormalCharMap[ "出" ] = i++;
 _NotNormalCharMap[ "担" ] = i++;
 _NotNormalCharMap[ "当" ] = i++;
 _NotNormalCharMap[ "党" ] = i++;
 _NotNormalCharMap[ "淀" ] = i++;
 _NotNormalCharMap[ "冬" ] = i++;
 _NotNormalCharMap[ "斗" ] = i++;
 _NotNormalCharMap[ "恶" ] = i++;
 _NotNormalCharMap[ "儿" ] = i++;
 _NotNormalCharMap[ "发" ] = i++;
 _NotNormalCharMap[ "范" ] = i++;
 _NotNormalCharMap[ "丰" ] = i++;
 _NotNormalCharMap[ "复" ] = i++;
 _NotNormalCharMap[ "干" ] = i++;
 _NotNormalCharMap[ "谷" ] = i++;
 _NotNormalCharMap[ "刮" ] = i++;
 _NotNormalCharMap[ "广" ] = i++;
 _NotNormalCharMap[ "柜" ] = i++;
 _NotNormalCharMap[ "合" ] = i++;
 _NotNormalCharMap[ "后" ] = i++;
 _NotNormalCharMap[ "胡" ] = i++;
 _NotNormalCharMap[ "划" ] = i++;
 _NotNormalCharMap[ "坏" ] = i++;
 _NotNormalCharMap[ "回" ] = i++;
 _NotNormalCharMap[ "汇" ] = i++;
 _NotNormalCharMap[ "伙" ] = i++;
 _NotNormalCharMap[ "获" ] = i++;
 _NotNormalCharMap[ "饥" ] = i++;
 _NotNormalCharMap[ "几" ] = i++;
 _NotNormalCharMap[ "家" ] = i++;
 _NotNormalCharMap[ "价" ] = i++;
 _NotNormalCharMap[ "姜" ] = i++;
 _NotNormalCharMap[ "借" ] = i++;
 _NotNormalCharMap[ "尽" ] = i++;
 _NotNormalCharMap[ "据" ] = i++;
 _NotNormalCharMap[ "卷" ] = i++;
 _NotNormalCharMap[ "克" ] = i++;
 _NotNormalCharMap[ "困" ] = i++;
 _NotNormalCharMap[ "腊" ] = i++;
 _NotNormalCharMap[ "蜡" ] = i++;
 _NotNormalCharMap[ "累" ] = i++;
 _NotNormalCharMap[ "漓" ] = i++;
 _NotNormalCharMap[ "里" ] = i++;
 _NotNormalCharMap[ "历" ] = i++;
 _NotNormalCharMap[ "帘" ] = i++;
 _NotNormalCharMap[ "了" ] = i++;
 _NotNormalCharMap[ "卤" ] = i++;
 _NotNormalCharMap[ "么" ] = i++;
 _NotNormalCharMap[ "霉" ] = i++;
 _NotNormalCharMap[ "蒙" ] = i++;
 _NotNormalCharMap[ "弥" ] = i++;
 _NotNormalCharMap[ "面" ] = i++;
 _NotNormalCharMap[ "蔑" ] = i++;
 _NotNormalCharMap[ "宁" ] = i++;
 _NotNormalCharMap[ "苹" ] = i++;
 _NotNormalCharMap[ "仆" ] = i++;
 _NotNormalCharMap[ "朴" ] = i++;
 _NotNormalCharMap[ "千" ] = i++;
 _NotNormalCharMap[ "签" ] = i++;
 _NotNormalCharMap[ "秋" ] = i++;
 _NotNormalCharMap[ "曲" ] = i++;
 _NotNormalCharMap[ "确" ] = i++;
 _NotNormalCharMap[ "舍" ] = i++;
 _NotNormalCharMap[ "沈" ] = i++;
 _NotNormalCharMap[ "胜" ] = i++;
 _NotNormalCharMap[ "适" ] = i++;
 _NotNormalCharMap[ "术" ] = i++;
 _NotNormalCharMap[ "松" ] = i++;
 _NotNormalCharMap[ "苏" ] = i++;
 _NotNormalCharMap[ "台" ] = i++;
 _NotNormalCharMap[ "坛" ] = i++;
 _NotNormalCharMap[ "体" ] = i++;
 _NotNormalCharMap[ "涂" ] = i++;
 _NotNormalCharMap[ "团" ] = i++;
 _NotNormalCharMap[ "万" ] = i++;
 _NotNormalCharMap[ "系" ] = i++;
 _NotNormalCharMap[ "纤" ] = i++;
 _NotNormalCharMap[ "咸" ] = i++;
 _NotNormalCharMap[ "向" ] = i++;
 _NotNormalCharMap[ "须" ] = i++;
 _NotNormalCharMap[ "旋" ] = i++;
 _NotNormalCharMap[ "药" ] = i++;
 _NotNormalCharMap[ "叶" ] = i++;
 _NotNormalCharMap[ "佣" ] = i++;
 _NotNormalCharMap[ "余" ] = i++;
 _NotNormalCharMap[ "与" ] = i++;
 _NotNormalCharMap[ "吁" ] = i++;
 _NotNormalCharMap[ "郁" ] = i++;
 _NotNormalCharMap[ "御" ] = i++;
 _NotNormalCharMap[ "愿" ] = i++;
 _NotNormalCharMap[ "云" ] = i++;
 _NotNormalCharMap[ "沄" ] = i++;
 _NotNormalCharMap[ "芸" ] = i++;
 _NotNormalCharMap[ "脏" ] = i++;
 _NotNormalCharMap[ "折" ] = i++;
 _NotNormalCharMap[ "征" ] = i++;
 _NotNormalCharMap[ "症" ] = i++;
 _NotNormalCharMap[ "只" ] = i++;
 _NotNormalCharMap[ "制" ] = i++;
 _NotNormalCharMap[ "致" ] = i++;
 _NotNormalCharMap[ "钟" ] = i++;
 _NotNormalCharMap[ "种" ] = i++;
 _NotNormalCharMap[ "朱" ] = i++;
 _NotNormalCharMap[ "筑" ] = i++;
 _NotNormalCharMap[ "准" ] = i++;
}

// GB2312 --> GBK
bool __fastcall GB2GBK(char *szBuf)
{
 if( !strcmp(szBuf, "") )
  return false;

 int nStrLen = strlen(szBuf);
 DWORD wLCID = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC);
 int nReturn = LCMapString(wLCID, LCMAP_TRADITIONAL_CHINESE, szBuf, nStrLen, NULL, 0);

 if( !nReturn )
  return false;

 char *pcBuf = new char[nReturn + 1];

 wLCID = MAKELCID(MAKELANGID(LANG_CHINESE, SUBLANG_CHINESE_SIMPLIFIED), SORT_CHINESE_PRC);
 LCMapString(wLCID, LCMAP_TRADITIONAL_CHINESE, szBuf, nReturn, pcBuf, nReturn + 1);
 strncpy(szBuf, pcBuf, nReturn);

 delete[] pcBuf;

 return true;
}

// 代码页转换
CString CharCodeConvert( const CString str, int sourceCodepage, int targetCodepage)
{
 int len=str.GetLength();

 int unicodeLen=MultiByteToWideChar(sourceCodepage,0,str,-1,NULL,0);

 wchar_t * pUnicode;
 pUnicode=new wchar_t[unicodeLen+1];

 memset(pUnicode,0,(unicodeLen+1)*sizeof(wchar_t));


 MultiByteToWideChar(sourceCodepage,0,str,-1,(LPWSTR)pUnicode,unicodeLen);

 BYTE * pTargetData = 0;
 int targetLen=WideCharToMultiByte(targetCodepage,0,(LPWSTR)pUnicode,-1,(char *)pTargetData,0,NULL,NULL);

 pTargetData=new BYTE[targetLen+1];
 memset(pTargetData,0,targetLen+1);

 WideCharToMultiByte(targetCodepage,0,(LPWSTR)pUnicode,-1,(char *)pTargetData,targetLen,NULL,NULL);

 CString rt;
 rt.Format("%s",pTargetData);

 delete pUnicode;
 delete pTargetData;
 return rt;
}

CString ConvertString( const CString _strVal, const CHAR_MAP _mapNotNormalChar, const WORD_MAP _mapNotNormalWord )
{
 if ( _strVal.IsEmpty() )
 {
  return _strVal;
 }

 CString strResult = "";

 CHAR_MAP NotNormalCharMap = _mapNotNormalChar;
 WORD_MAP NotNormalWordMap = _mapNotNormalWord;

 int iSourceCodePage = CP_UTF8;
 int iTargetCodePage = 936;

 // UTF8 --> ANSI(GBK)
 CString strAnsi = CharCodeConvert( _strVal, iSourceCodePage, iTargetCodePage );
 strAnsi.TrimLeft();
 strAnsi.TrimRight();

 // 简体 --> 繁体
 string strName = strAnsi;
 int nLen = strName.size();
 for ( int nIndex = 0; nIndex < nLen; nIndex ++ )
 {
  WORD wChar = (BYTE)strName[nIndex];

  if ( ::IsDBCSLeadByteEx( iTargetCodePage, (BYTE)wChar ) )
  {
   nIndex ++;

   WORD wHighByte = wChar;
   WORD wLowByte = (BYTE)strName[nIndex];
   WORD wCharByte = (wHighByte << 8) | wLowByte;

   // 简体字 (GB2312汉字编码范围内)
   if ( (wHighByte >= 0xB0 && wHighByte <= 0xF7) && (wLowByte >= 0xA1 && wLowByte <= 0xFE) )
   {
    char *cpCharTemp = new char[ 2+1 ];
    memset( cpCharTemp, 0, 2+1 );
    cpCharTemp[0] = strName[nIndex-1];
    cpCharTemp[1] = strName[nIndex];

    // 非对称简繁体字
    if ( ( NotNormalCharMap[ cpCharTemp ] <= 0 ) || ( NotNormalCharMap[ cpCharTemp ] > NotNormalCharMap.size() ) )
    {
     GB2GBK( cpCharTemp );
    }
    else
    {
     if ( NotNormalWordMap[ cpCharTemp ].size() > 0 )
     {
      string strSourceChar = cpCharTemp;

      vector< WORD_CHAR >::iterator itWord_Char;
      for ( itWord_Char = (NotNormalWordMap[ cpCharTemp ]).begin(); itWord_Char != (NotNormalWordMap[ cpCharTemp ]).end(); itWord_Char++ )
      {
       WORD_CHAR *word_char = itWord_Char;
       string strTargetWord = word_char->strWord;
       string strTargetChar = word_char->strChar;

       int iTargetCharIndex = strTargetWord.find( strSourceChar );
       int iTargetWordLen = strTargetWord.size();
       
       int iSourceCharIndex = strName.find( strSourceChar );

       string strSourceWord = strName.substr( iSourceCharIndex - iTargetCharIndex, iTargetWordLen );

       int iCompareResult = strSourceWord.compare( strTargetWord );
       if ( !iCompareResult )
       {
        char cpTempChar[2] = {0};
        strcpy( cpTempChar, strTargetChar.c_str() );
       }
      }
     }
     TRACE( "%s,%s/n", strName, cpCharTemp );

     delete[] cpCharTemp;
    }
   }
  }
 }

 return strResult;
}
 

你可能感兴趣的:(字符编码&字符集)