最近用windows api进行文件,在使用UNICODE字符集后,发现只要往文件中写入中文则会出现乱码的现象。最后用UltraEdit看了一下,发现是没有UNICODE文件头。
现在将UNICODE 文件头的所有相关信息都放在这里用于标识,如下:
unicode文件头的标识
Byte-order mark Description
EF BB BFUTF-8
FF FE UTF-16 aka UCS-2, little endian
FE FF UTF-16 aka UCS-2, big endian
00 00 FF FE UTF-32 aka UCS-4, little endian.
00 00 FE FF UTF-32 aka UCS-4, big-endian.
我在创建文件后,即往文件中写入两个两个字节值:0xFEFF (使用小端存储方式)。截取我的一个代码实现,如下:
BOOL CreateSetupLog()
{
BOOL bRet = FALSE;
TCHAR szAppDataPath[MAX_PATH] = {0};
if(SHGetSpecialFolderPath(NULL, szAppDataPath, CSIDL_APPDATA, FALSE))
{
g_strLogPath.Format(_T("%s\\SetupInfo.log"),szAppDataPath);
if(PathFileExists(g_strLogPath))
{
DeleteFile(g_strLogPath);
}
else
{
TCHAR szDirPath[MAX_PATH] = {0};
_sntprintf(szDirPath,MAX_PATH-1,_T("%s"),g_strLogPath);
PathRemoveFileSpec(szDirPath);
Util::IOMisc::ForceCreateDirectory(szDirPath);
}
HANDLE hFile = CreateFile(g_strLogPath,GENERIC_WRITE, FILE_SHARE_WRITE, NULL, CREATE_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);
if(hFile!=INVALID_HANDLE_VALUE)
{
SYSTEMTIME tmNow;
GetLocalTime(&tmNow);
TCHAR szInfo[1024] = {0};
szInfo[0] = 0xFEFF;
_sntprintf(szInfo+1, SIZEOF(szInfo)-2,_T("%02d-%02d-%02d %02d:%02d:%02d Create New Log.\r\n\r\n"),tmNow.wYear,tmNow.wMonth,tmNow.wDay,tmNow.wHour,tmNow.wMinute,tmNow.wSecond);
DWORD dwWrite = 0;
int iLen = _tcslen(szInfo);
//写入UNICODE 文件头
#ifdef _UNICODE
TCHAR szFileHeader = 0xFEFF;
WriteFile(hFile,&szFileHeader,sizeof(szFileHeader),&dwWrite,NULL);
#endif
if(WriteFile(hFile,szInfo,iLen * sizeof(TCHAR),&dwWrite,NULL))
{
bRet = TRUE;
}
CloseHandle(hFile);
}
}
return bRet;
}