将读入的文本文件数据转换为UNICODE

 

//文本转换为UNICODE
//pInByte 从文本文件读入的数据
//iInBytesLen 原始数据字节长度
//pWchString 返回转换后的UNICODE数据(以0结束)
//返回转换后UNICODE数据长度
int ConvertText2Unicode(const BYTE *pInByte, const int iInBytesLen, LPWSTR *pWchString)
{
  int iWChDataLen = 0;

  if(pInByte && iInBytesLen > 0 && pWchString)
  {
    WCHAR *pWchData = NULL;

    if(iInBytesLen > 3 
      && pInByte[0] == '\xEF' && pInByte[1] == '\xBB' && pInByte[2] == '\xBF')//UTF8-BOM
    {
      int icp = CP_UTF8;
      LPCSTR pSrcIn = (LPCSTR)(pInByte+3); //skip head
      int iSrcLen = iInBytesLen-3;
      iWChDataLen = MultiByteToWideChar(icp, 0, pSrcIn, iSrcLen, NULL, 0);
      pWchData = new WCHAR[iWChDataLen + 1];
      MultiByteToWideChar(icp, 0, pSrcIn, iSrcLen, pWchData, iWChDataLen);    
      pWchData[iWChDataLen] = 0;
    }
    else if(iInBytesLen > 2
      && pInByte[0] == '\xFF' && pInByte[1] == '\xFE') //UNICODE
    {
      LPCWSTR pSrcIn = (LPCWSTR)(pInByte+2); //skip head
      iWChDataLen = (iInBytesLen-2)/2;
      pWchData = new WCHAR[iWChDataLen + 1];
      memcpy(pWchData, pSrcIn, iWChDataLen*sizeof(WCHAR));
      pWchData[iWChDataLen] = 0;
    }
    else if(iInBytesLen > 2
      && pInByte[0] == '\xFE' && pInByte[1] == '\xFF') //UNICODE Big-endian
    {
      const BYTE *pSrcIn = pInByte+2; //skip head
      iWChDataLen = (iInBytesLen-2)/2;
      pWchData = new WCHAR[iWChDataLen + 1];
      //大小端转换
      for(int iSrc=0,iDst=0; iDst 0) //常规文本
    {
      int icp = CP_THREAD_ACP; //按当前线程代码页转换
      LPCSTR pSrcIn = (LPCSTR)pInByte; 
      iWChDataLen = MultiByteToWideChar(icp, 0, pSrcIn, iInBytesLen, NULL, 0);
      pWchData = new WCHAR[iWChDataLen + 1];
      MultiByteToWideChar(icp, 0, pSrcIn, iInBytesLen, pWchData, iWChDataLen);    
      pWchData[iWChDataLen] = 0;
    }
    else
    {
    }

    *pWchString = pWchData;
  }

  return iWChDataLen;
}

 

//测试函数
{
  BYTE *pInByte = NULL;
  int iInBytesLen = 0;
  WCHAR *pWchData = NULL;
  int iWChDataLen = 0;

  //读入文件
  try
  {
    CFile file(szInFile, CFile::modeRead);
    iInBytesLen = (int)file.GetLength();
    pInByte = new BYTE[iInBytesLen + 2];
    memset(pInByte, 0, iInBytesLen + 2);
    file.Read(pInByte, iInBytesLen);
    file.Close();
    pInByte[iInBytesLen] = 0;
  }
  catch (CFileException* e)
  {
    e->ReportError();
    e->Delete();
  }

  if(pInByte && iInBytesLen > 0)
  {
    iWChDataLen = ConvertText2Unicode(pInByte, iInBytesLen, &pWchData);

    if(iWChDataLen > 0 && pWchData)
    {
      OutputDebugStringW(pWchData);
      OutputDebugStringW(L"\r\n");
    }
  }

  if(pWchData)
    delete [] pWchData;
  if(pInByte)
    delete [] pInByte;
}

 

你可能感兴趣的:(文档/视,算法,其它)