VC修改CStdioFile类, 兼容一行一行读写Unicode的和非Unicode的log

有些人写Unicode的log文件时, 不知道在文件前加# define nUNICODE_BOM      0xFEFF 标志位。 当设置在Unicode环境下的VC代码中,使用MFC的CStdioFile类是无法正确调用ReadString()函数获取. 所有需要重写一个CStdioFileEx类.

 

FileEx.h文件:

#define nUNICODE_BOM      0xFEFF  // Unicode "byte order mark" which goes at start of file
#define sNEWLINE        _T("\r\n") // New line characters
#define sDEFAULT_UNICODE_FILLER_CHAR "#"   // Filler char used when no conversion from Unicode to local code page is possible

class CStdioFileEx: public CStdioFile
{
public:
	CStdioFileEx();
	CStdioFileEx( LPCTSTR lpszFileName, UINT nOpenFlags );

	virtual BOOL Open( LPCTSTR lpszFileName, UINT nOpenFlags, CFileException* pError = NULL );
	virtual BOOL ReadString(CString& rString);
	virtual void WriteString( LPCTSTR lpsz );
	bool    IsFileUnicodeText() { return m_bIsUnicodeText; } 
	unsigned long GetCharCount();

	// Additional flag to allow Unicode text writing
	static const UINT modeWriteUnicode;

	// static utility functions

	// --------------------------------------------------------------------------------------------
	//
	// CStdioFileEx::GetUnicodeStringFromMultiByteString()
	//
	// --------------------------------------------------------------------------------------------
	// Returns:    bool
	// Parameters: char *  szMultiByteString  (IN) Multi-byte input string
	//     wchar_t*  szUnicodeString  (OUT) Unicode output string
	//     short   nUnicodeBufferSize (IN) Size of Unicode output buffer
	//     UINT   nCodePage    (IN) Code page used to perform conversion
	//                  Default = -1 (Get local code page).
	//
	// Purpose:  Gets a Unicode string from a MultiByte string.
	// Notes:  None.
	// Exceptions: None.
	//
	static bool  GetUnicodeStringFromMultiByteString(char * szMultiByteString,wchar_t* szUnicodeString,
		short nUnicodeBufferSize,UINT nCodePage=-1);

	// --------------------------------------------------------------------------------------------
	//
	// CStdioFileEx::GetMultiByteStringFromUnicodeString()
	//
	// --------------------------------------------------------------------------------------------
	// Returns:    BOOL
	// Parameters: wchar_t * szUnicodeString   (IN) Unicode input string
	//     char*   szMultiByteString   (OUT) Multibyte output string
	//     short   nMultiByteBufferSize  (IN) Multibyte buffer size
	//     UINT   nCodePage     (IN) Code page used to perform conversion
	//                   Default = -1 (Get local code page).
	//
	// Purpose:  Gets a MultiByte string from a Unicode string.
	// Notes:  .
	// Exceptions: None.
	//
	static BOOL   GetMultiByteStringFromUnicodeString(wchar_t * szUnicodeString,char* szMultiByteString,
		short nMultiByteBufferSize,UINT nCodePage=-1);


	// --------------------------------------------------------------------------------------------
	//
	// CStdioFileEx::IsFileUnicode()
	//
	// --------------------------------------------------------------------------------------------
	// Returns:    bool
	// Parameters: const CString& sFilePath
	//
	// Purpose:  Determines whether a file is Unicode by reading the first character and detecting
	//     whether it's the Unicode byte marker.
	// Notes:  None.
	// Exceptions: None.
	//
	static bool IsFileUnicode(const CString& sFilePath);
	bool  m_bIsUnicodeText;
protected:
	UINT ProcessFlags(const CString& sFilePath, UINT& nOpenFlags);

	
	UINT  m_nFlags;
};


FileEx.cpp:
#include "StdAfx.h"
#include "FileEx.h"

/*static*/ const UINT CStdioFileEx::modeWriteUnicode = 0x20000; // Add this flag to write in Unicode

CStdioFileEx::CStdioFileEx(): CStdioFile()
{
	m_bIsUnicodeText = false;
	m_nFlags = 0;
}

CStdioFileEx::CStdioFileEx(LPCTSTR lpszFileName,UINT nOpenFlags)
:CStdioFile(lpszFileName, ProcessFlags(lpszFileName, nOpenFlags))
{
}

BOOL CStdioFileEx::Open(LPCTSTR lpszFileName,UINT nOpenFlags,CFileException* pError /*=NULL*/)
{
	// Process any Unicode stuff
	ProcessFlags(lpszFileName, nOpenFlags);

	return CStdioFile::Open(lpszFileName, nOpenFlags, pError);
}

BOOL CStdioFileEx::ReadString(CString& rString)
{
	const int nMAX_LINE_CHARS = 4096;
	BOOL   bReadData;
	LPTSTR  lpsz;
	int   nLen = 0; //, nMultiByteBufferLength = 0, nChars = 0;
	CString  sTemp;
	wchar_t*  pszUnicodeString = NULL;
	char *  pszMultiByteString= NULL;

	// If at position 0, discard byte-order mark before reading
	if (!m_pStream || (GetPosition() == 0 && m_bIsUnicodeText))
	{
		wchar_t cDummy;
		//  Read(&cDummy, sizeof(_TCHAR));
		Read(&cDummy, sizeof(wchar_t));
	}

	// If compiled for Unicode
#ifdef _UNICODE
	// Do standard stuff -- both ANSI and Unicode cases seem to work OK
	if (m_bIsUnicodeText)
	{
		bReadData = CStdioFile::ReadString(rString); 
	}
	else
	{
		pszUnicodeString = new wchar_t[nMAX_LINE_CHARS]; 
		pszMultiByteString= new char[nMAX_LINE_CHARS];  

		// Read as Unicode, convert to ANSI

		if(fgets(pszMultiByteString, nMAX_LINE_CHARS, m_pStream)==NULL)
		{  
			bReadData=FALSE;
		}
		else
		{
			bReadData=TRUE;
			if (GetUnicodeStringFromMultiByteString(pszMultiByteString, pszUnicodeString,nMAX_LINE_CHARS))
			{
				rString = (CString)pszUnicodeString;
			}

			if (pszUnicodeString)
			{
				delete pszUnicodeString;
				pszUnicodeString = NULL;
			}

			if (pszMultiByteString)
			{
				delete pszMultiByteString;
				pszMultiByteString = NULL;
			}
		}
	}
#else

	if (!m_bIsUnicodeText)
	{
		// Do standard stuff -- read ANSI in ANSI
		bReadData = CStdioFile::ReadString(rString);
	}
	else
	{
		pszUnicodeString = new wchar_t[nMAX_LINE_CHARS]; 
		pszMultiByteString= new char[nMAX_LINE_CHARS];  

		// Read as Unicode, convert to ANSI

		if(fgetws(pszUnicodeString, nMAX_LINE_CHARS, m_pStream)==NULL)
		{  
			bReadData=FALSE;
		}
		else
		{
			bReadData=TRUE;
			if (GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nMAX_LINE_CHARS))
			{
				rString = (CString)pszMultiByteString;
			}

			if (pszUnicodeString)
			{
				delete pszUnicodeString;
			}

			if (pszMultiByteString)
			{
				delete pszMultiByteString;
			}
		}
	}
#endif

	// Then remove end-of-line character if in Unicode text mode
	if (bReadData)
	{
		// Copied from FileTxt.cpp but adapted to Unicode and then adapted for end-of-line being just '\r'. 
		nLen = rString.GetLength();
		if (nLen > 1 && rString.Mid(nLen-2) == sNEWLINE)
		{
			rString.GetBufferSetLength(nLen-2);
		}
		else
		{
			lpsz = rString.GetBuffer(0);
			if (nLen != 0 && (lpsz[nLen-1] == _T('\r') || lpsz[nLen-1] == _T('\n')))
			{
				rString.GetBufferSetLength(nLen-1);
			}
		}
	}

	return bReadData;
}

// --------------------------------------------------------------------------------------------
//
// CStdioFileEx::WriteString()
//
// --------------------------------------------------------------------------------------------
// Returns:    void
// Parameters: LPCTSTR lpsz
//
// Purpose:  Writes string to file either in Unicode or multibyte, depending on whether the caller specified the
//     CStdioFileEx::modeWriteUnicode flag. Override of base class function.
// Notes:  If writing in Unicode we need to:
//      a) Write the Byte-order-mark at the beginning of the file
//      b) Write all strings in byte-mode
//     - If we were compiled in Unicode, we need to convert Unicode to multibyte if 
//      we want to write in multibyte
//     - If we were compiled in multi-byte, we need to convert multibyte to Unicode if 
//      we want to write in Unicode.
// Exceptions: None.
//
void CStdioFileEx::WriteString(LPCTSTR lpsz)
{
	// If writing Unicode and at the start of the file, need to write byte mark
	if (m_nFlags & CStdioFileEx::modeWriteUnicode)
	{
		// If at position 0, write byte-order mark before writing anything else
		if (!m_pStream || GetPosition() == 0)
		{
			wchar_t cBOM = (wchar_t)nUNICODE_BOM;
			CFile::Write(&cBOM, sizeof(wchar_t));
		}
	}

	// If compiled in Unicode...
#ifdef _UNICODE

	// If writing Unicode, no conversion needed
	if (m_nFlags & CStdioFileEx::modeWriteUnicode)
	{
		// Write in byte mode
		CFile::Write(lpsz, lstrlen(lpsz) * sizeof(wchar_t));
	}
	// Else if we don't want to write Unicode, need to convert
	else
	{
		int  nChars = lstrlen(lpsz) + 1;    // Why plus 1? Because yes
		int  nBufferSize = nChars * sizeof(char);
		wchar_t* pszUnicodeString = new wchar_t[nChars]; 
		char * pszMultiByteString= new char[nChars];  

		// Copy string to Unicode buffer
		lstrcpy(pszUnicodeString, lpsz);

		// Get multibyte string
		if (GetMultiByteStringFromUnicodeString(pszUnicodeString, pszMultiByteString, nBufferSize, GetACP()))
		{
			// Do standard write
			CFile::Write((const void*)pszMultiByteString, lstrlen(lpsz));
		}

		if (pszUnicodeString && pszMultiByteString)
		{
			delete [] pszUnicodeString;
			delete [] pszMultiByteString;
		}
	}
	// Else if *not* compiled in Unicode
#else
	// If writing Unicode, need to convert
	if (m_nFlags & CStdioFileEx::modeWriteUnicode)
	{
		int  nChars = lstrlen(lpsz) + 1;  // Why plus 1? Because yes
		int  nBufferSize = nChars * sizeof(wchar_t);
		wchar_t* pszUnicodeString = new wchar_t[nChars];
		char * pszMultiByteString= new char[nChars]; 

		// Copy string to multibyte buffer
		lstrcpy(pszMultiByteString, lpsz);

		if (GetUnicodeStringFromMultiByteString(pszMultiByteString, pszUnicodeString, nBufferSize, GetACP()))
		{
			// Write in byte mode
			CFile::Write(pszUnicodeString, lstrlen(lpsz) * sizeof(wchar_t));
		}
		else
		{
			ASSERT(false);
		}

		if (pszUnicodeString && pszMultiByteString)
		{
			delete [] pszUnicodeString;
			delete [] pszMultiByteString;
		}
	}
	// Else if we don't want to write Unicode, no conversion needed
	else
	{
		// Do standard stuff
		CStdioFile::WriteString(lpsz);
	}

#endif
}

UINT CStdioFileEx::ProcessFlags(const CString& sFilePath, UINT& nOpenFlags)
{
	m_bIsUnicodeText = false;

	// If we have writeUnicode we must have write or writeRead as well
#ifdef _DEBUG
	if (nOpenFlags & CStdioFileEx::modeWriteUnicode)
	{
		ASSERT(nOpenFlags & CFile::modeWrite || nOpenFlags & CFile::modeReadWrite);
	}
#endif

	// If reading in text mode and not creating...
	if (nOpenFlags & CFile::typeText)
	{
		if (!(m_nFlags & CFile::modeCreate))
		{
			if (!(m_nFlags & CFile::modeWrite ))
			{
				m_bIsUnicodeText = IsFileUnicode(sFilePath);

				// If it's Unicode, switch to binary mode
				if (m_bIsUnicodeText)
				{
					nOpenFlags ^= CFile::typeText;
					nOpenFlags |= CFile::typeBinary;
				}
			}
		}				
		
	}

	m_nFlags = nOpenFlags;

	return nOpenFlags;
}

// --------------------------------------------------------------------------------------------
//
// CStdioFileEx::IsFileUnicode()
//
// --------------------------------------------------------------------------------------------
// Returns:    bool
// Parameters: const CString& sFilePath
//
// Purpose:  Determines whether a file is Unicode by reading the first character and detecting
//     whether it's the Unicode byte marker.
// Notes:  None.
// Exceptions: None.
//
/*static*/ bool CStdioFileEx::IsFileUnicode(const CString& sFilePath)
{
	CFile    file;
	bool    bIsUnicode = false;
	wchar_t   cFirstChar;
	CFileException exFile;

	// Open file in binary mode and read first character
	if (file.Open(sFilePath, CFile::typeBinary | CFile::modeRead, &exFile))
	{
		// If byte is Unicode byte-order marker, let's say it's Unicode
		if (file.Read(&cFirstChar, sizeof(wchar_t)) > 0 && cFirstChar == (wchar_t)nUNICODE_BOM)
		{
			bIsUnicode = true;
		}

		file.Close();
	}
	else
	{
		// Handle error here if you like
	}

	return bIsUnicode;
}

unsigned long CStdioFileEx::GetCharCount()
{
	int    nCharSize;
	unsigned long nByteCount, nCharCount = 0;

	if (m_pStream)
	{
		// Get size of chars in file
		nCharSize = m_bIsUnicodeText ? sizeof(wchar_t): sizeof(char);

		// If Unicode, remove byte order mark from count
		nByteCount = (unsigned long)GetLength();

		if (m_bIsUnicodeText)
		{
			nByteCount = nByteCount - sizeof(wchar_t);
		}

		// Calc chars
		nCharCount = (nByteCount / nCharSize);
	}

	return nCharCount;
}

// --------------------------------------------------------------------------------------------
//
// CStdioFileEx::GetUnicodeStringFromMultiByteString()
//
// --------------------------------------------------------------------------------------------
// Returns:    bool
// Parameters: char *  szMultiByteString  (IN) Multi-byte input string
//     wchar_t*  szUnicodeString  (OUT) Unicode outputstring
//     short   nUnicodeBufferSize (IN) Size of Unicode output buffer
//     UINT   nCodePage    (IN) Code page used to perform conversion
//                  Default = -1 (Get local code page).
//
// Purpose:  Gets a Unicode string from a MultiByte string.
// Notes:  None.
// Exceptions: None.
//
bool CStdioFileEx::GetUnicodeStringFromMultiByteString(char * szMultiByteString, wchar_t* szUnicodeString, short nUnicodeBufferSize, UINT nCodePage)
{
	bool  bOK = true;
	int  nReturn = 0;
	CString sErrorMsg;

	if (szUnicodeString && szMultiByteString)
	{
		// If no code page specified, take default for system
		if (nCodePage == -1)
		{
			nCodePage = GetACP();
		}

		try 
		{
			nReturn = MultiByteToWideChar(nCodePage,MB_PRECOMPOSED,szMultiByteString,-1,szUnicodeString,nUnicodeBufferSize);

			if (nReturn == 0)
			{
				bOK = false;
			}
		}
		catch(...)
		{
			bOK = false;
		}
	}
	else
	{
		bOK = false;
	}

	ASSERT(bOK);
	return bOK;
}

// --------------------------------------------------------------------------------------------
//
// CStdioFileEx::GetMultiByteStringFromUnicodeString()
//
// --------------------------------------------------------------------------------------------
// Returns:    BOOL
// Parameters: wchar_t * szUnicodeString   (IN) Unicode input string
//     char*   szMultiByteString   (OUT) Multibyte output string
//     short   nMultiByteBufferSize  (IN) Multibyte buffer size
//     UINT   nCodePage     (IN) Code page used to perform conversion
//                   Default = -1 (Get local code page).
//
// Purpose:  Gets a MultiByte string from a Unicode string
// Notes:  None.
// Exceptions: None.
//
BOOL CStdioFileEx::GetMultiByteStringFromUnicodeString(wchar_t * szUnicodeString, char* szMultiByteString, 
													   short nMultiByteBufferSize, UINT nCodePage)
{
	BOOL   bUsedDefChar = FALSE;
	BOOL   bGotIt = FALSE;

	if (szUnicodeString && szMultiByteString) 
	{
		// If no code page specified, take default for system
		if (nCodePage == -1)
		{
			nCodePage = GetACP();
		}

		try 
		{
			bGotIt = WideCharToMultiByte(nCodePage, WC_COMPOSITECHECK | WC_SEPCHARS,
				szUnicodeString,-1, szMultiByteString, nMultiByteBufferSize, sDEFAULT_UNICODE_FILLER_CHAR, &bUsedDefChar);
		}
		catch(...) 
		{
			TRACE(_T("Controlled exception in WideCharToMultiByte!\n"));
		}
	} 

	return bGotIt;
}

 

你可能感兴趣的:(VC修改CStdioFile类, 兼容一行一行读写Unicode的和非Unicode的log)