Windows下内存映射

我想用内存映射和直接读取文件分别实现MD5算法。

众所周知,该算法是线性复杂度,文件的读取可能成为效率的瓶颈。

以前曾写过CRT版本的,效率比我在windows下使用的WinMD5略低。

 

内存映射读取MapViewOfFile

第一步先打开文件,推荐使用CreateFile而不是OpenFile。msdn上OpenFile可以查到:

Note  Only use this function with 16-bit versions of Windows. For newer applications, use the CreateFile function."

 

HANDLE WINAPI CreateFile( __in LPCTSTR lpFileName, __in DWORD dwDesiredAccess, __in DWORD dwShareMode, __in LPSECURITY_ATTRIBUTES lpSecurityAttributes, __in DWORD dwCreationDisposition, __in DWORD dwFlagsAndAttributes, __in HANDLE hTemplateFile );  

我们只需要读文件

hFile = CreateFile(argv[1], GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0); 

文件查找失败会返回 INVALID_HANDLE_VALUE。


第二步是建立文件映射

HANDLE WINAPI CreateFileMapping( __in HANDLE hFile, __in LPSECURITY_ATTRIBUTES lpAttributes, __in DWORD flProtect, __in DWORD dwMaximumSizeHigh, __in DWORD dwMaximumSizeLow, __in LPCTSTR lpName );  

由于不需要别的映射,这里让 lpName = NULL

hMapping = CreateFileMapping(hFile, 0, PAGE_READONLY, 0, 0, 0); 

失败会返回NULL。

 

第三步把文件的某个片段映射到内存空间

LPVOID WINAPI MapViewOfFile( __in HANDLE hFileMappingObject, __in DWORD dwDesiredAccess, __in DWORD dwFileOffsetHigh, __in DWORD dwFileOffsetLow, __in SIZE_T dwNumberOfBytesToMap );  

对于小的文件直接一次映射到内存就行了,但是4G大小的文件,你会发现这一步会映射失败,32位地址允许的内存最多是2G,所以我们要分多次映射,每次取的offset必须是系统allocation granularity的整数倍。

SYSTEM_INFO info; GetSystemInfo(&info); printf("%u/n", info.dwAllocationGranularity); 

我的是笔记本,操作系统Vista下的granularity = 65536。大多数的PC机应该都是这个数值。

令uPos 是偏移量,STEP是每次取出映射的长度

lpBuf = (PBYTE)MapViewOfFile(hMapping, FILE_MAP_READ, uPos >> 32, uPos & 0xffffffff, STEP); 

失败会返回 NULL。

 

获取文件大小,注意如果文件大小超过2G,需要GetFileSize的第2个参数作为文件大小的高位

DWORD nFileSizeHigh = 0; UINT64 uSize = GetFileSize(hFile, &nFileSizeHigh); uSize |= (UINT64)nFileSizeHigh << 32; printf("%I64u byte(s)/n", uSize); 

完整代码如下

#include #include #include const UINT CHUNK_SIZE = 64; const UINT REMAIN_SIZE = 56; const UINT STEP = 1073741824; const BYTE padding[] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; void ClearUp(HANDLE hFile = NULL, HANDLE hMapping = NULL, PBYTE lpBuf = NULL) { if (lpBuf != NULL) UnmapViewOfFile(lpBuf); if (hMapping != NULL) CloseHandle(hMapping); if (hFile != INVALID_HANDLE_VALUE) CloseHandle(hFile); } inline UINT F(UINT x, UINT y, UINT z) { return x & y | ~x & z; } inline UINT G(UINT x, UINT y, UINT z) { return x & z | y & ~z; } inline UINT H(UINT x, UINT y, UINT z) { return x ^ y ^ z; } inline UINT I(UINT x, UINT y, UINT z) { return y ^ (x | ~z); } inline void FF(UINT &a, UINT b, UINT c, UINT d, UINT m, int s, UINT t) { a += F(b, c, d) + m + t; a = a << s | a >> (32 - s); a += b; } inline void GG(UINT &a, UINT b, UINT c, UINT d, UINT m, int s, UINT t) { a += G(b, c, d) + m + t; a = a << s | a >> (32 - s); a += b; } inline void HH(UINT &a, UINT b, UINT c, UINT d, UINT m, int s, UINT t) { a += H(b, c, d) + m + t; a = a << s | a >> (32 - s); a += b; } inline void II(UINT &a, UINT b, UINT c, UINT d, UINT m, int s, UINT t) { a += I(b, c, d) + m + t; a = a << s | a >> (32 - s); a += b; } UINT g_a = 0x67452301; UINT g_b = 0xefcdab89; UINT g_c = 0x98badcfe; UINT g_d = 0x10325476; void Transform(BYTE ch[]) { UINT a, b, c, d; PUINT x = (PUINT)ch; a = g_a; b = g_b; c = g_c; d = g_d; /* Round 1 */ FF(a, b, c, d, x[0], 7, 0xd76aa478); FF(d, a, b, c, x[1], 12, 0xe8c7b756); FF(c, d, a, b, x[2], 17, 0x242070db); FF(b, c, d, a, x[3], 22, 0xc1bdceee); FF(a, b, c, d, x[4], 7, 0xf57c0faf); FF(d, a, b, c, x[5], 12, 0x4787c62a); FF(c, d, a, b, x[6], 17, 0xa8304613); FF(b, c, d, a, x[7], 22, 0xfd469501); FF(a, b, c, d, x[8], 7, 0x698098d8); FF(d, a, b, c, x[9], 12, 0x8b44f7af); FF(c, d, a, b, x[10], 17, 0xffff5bb1); FF(b, c, d, a, x[11], 22, 0x895cd7be); FF(a, b, c, d, x[12], 7, 0x6b901122); FF(d, a, b, c, x[13], 12, 0xfd987193); FF(c, d, a, b, x[14], 17, 0xa679438e); FF(b, c, d, a, x[15], 22, 0x49b40821); /* Round 2 */ GG(a, b, c, d, x[1], 5, 0xf61e2562); GG(d, a, b, c, x[6], 9, 0xc040b340); GG(c, d, a, b, x[11], 14, 0x265e5a51); GG(b, c, d, a, x[0], 20, 0xe9b6c7aa); GG(a, b, c, d, x[5], 5, 0xd62f105d); GG(d, a, b, c, x[10], 9, 0x2441453); GG(c, d, a, b, x[15], 14, 0xd8a1e681); GG(b, c, d, a, x[4], 20, 0xe7d3fbc8); GG(a, b, c, d, x[9], 5, 0x21e1cde6); GG(d, a, b, c, x[14], 9, 0xc33707d6); GG(c, d, a, b, x[3], 14, 0xf4d50d87); GG(b, c, d, a, x[8], 20, 0x455a14ed); GG(a, b, c, d, x[13], 5, 0xa9e3e905); GG(d, a, b, c, x[2], 9, 0xfcefa3f8); GG(c, d, a, b, x[7], 14, 0x676f02d9); GG(b, c, d, a, x[12], 20, 0x8d2a4c8a); /* Round 3 */ HH(a, b, c, d, x[5], 4, 0xfffa3942); HH(d, a, b, c, x[8], 11, 0x8771f681); HH(c, d, a, b, x[11], 16, 0x6d9d6122); HH(b, c, d, a, x[14], 23, 0xfde5380c); HH(a, b, c, d, x[1], 4, 0xa4beea44); HH(d, a, b, c, x[4], 11, 0x4bdecfa9); HH(c, d, a, b, x[7], 16, 0xf6bb4b60); HH(b, c, d, a, x[10], 23, 0xbebfbc70); HH(a, b, c, d, x[13], 4, 0x289b7ec6); HH(d, a, b, c, x[0], 11, 0xeaa127fa); HH(c, d, a, b, x[3], 16, 0xd4ef3085); HH(b, c, d, a, x[6], 23, 0x4881d05); HH(a, b, c, d, x[9], 4, 0xd9d4d039); HH(d, a, b, c, x[12], 11, 0xe6db99e5); HH(c, d, a, b, x[15], 16, 0x1fa27cf8); HH(b, c, d, a, x[2], 23, 0xc4ac5665); /* Round 4 */ II(a, b, c, d, x[0], 6, 0xf4292244); II(d, a, b, c, x[7], 10, 0x432aff97); II(c, d, a, b, x[14], 15, 0xab9423a7); II(b, c, d, a, x[5], 21, 0xfc93a039); II(a, b, c, d, x[12], 6, 0x655b59c3); II(d, a, b, c, x[3], 10, 0x8f0ccc92); II(c, d, a, b, x[10], 15, 0xffeff47d); II(b, c, d, a, x[1], 21, 0x85845dd1); II(a, b, c, d, x[8], 6, 0x6fa87e4f); II(d, a, b, c, x[15], 10, 0xfe2ce6e0); II(c, d, a, b, x[6], 15, 0xa3014314); II(b, c, d, a, x[13], 21, 0x4e0811a1); II(a, b, c, d, x[4], 6, 0xf7537e82); II(d, a, b, c, x[11], 10, 0xbd3af235); II(c, d, a, b, x[2], 15, 0x2ad7d2bb); II(b, c, d, a, x[9], 21, 0xeb86d391); g_a += a; g_b += b; g_c += c; g_d += d; } int main(int argc, char *argv[]) { // clock_t start, finish; // start = clock(); if (argc < 2) { fprintf(stderr, "MD5 "); return 1; } HANDLE hFile = NULL; HANDLE hMapping = NULL; PBYTE lpBuf = NULL; hFile = CreateFile(argv[1], GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0); if (hFile == INVALID_HANDLE_VALUE) { fprintf(stderr, "Could not open file (error %d)/n", GetLastError()); ClearUp(); return 1; } DWORD nFileSizeHigh = 0; UINT64 uSize = GetFileSize(hFile, &nFileSizeHigh); uSize |= (UINT64)nFileSizeHigh << 32; printf("%I64u byte(s)/n", uSize); hMapping = CreateFileMapping(hFile, 0, PAGE_READONLY, 0, 0, 0); if (hMapping == NULL) { fprintf(stderr, "Could not create file mapping object (%d)./n", GetLastError()); ClearUp(hFile); return 1; } UINT64 uPos = 0; for (uPos = 0; uPos + STEP <= uSize; uPos += STEP) { lpBuf = (PBYTE)MapViewOfFile(hMapping, FILE_MAP_READ, uPos >> 32, uPos & 0xffffffff, STEP); if (lpBuf == NULL) { fprintf(stderr, "Could not map view of file (%d)./n", GetLastError()); ClearUp(hFile, hMapping); return 1; } for (UINT i = 0; i + CHUNK_SIZE <= STEP; i += CHUNK_SIZE) Transform(lpBuf + i); UnmapViewOfFile(lpBuf); } lpBuf = (PBYTE)MapViewOfFile(hMapping, FILE_MAP_READ, uPos >> 32, uPos & 0xffffffff, 0); if (lpBuf == NULL) { fprintf(stderr, "Could not map view of file (%d)./n", GetLastError()); ClearUp(hFile, hMapping); return 1; } UINT i = 0; for (i = 0; uPos + CHUNK_SIZE <= uSize; uPos += CHUNK_SIZE, i += CHUNK_SIZE) Transform(lpBuf + i); BYTE lpRemain[CHUNK_SIZE]; UINT uRemain = uSize - uPos; uSize <<= 3; memcpy(lpRemain, lpBuf + i, sizeof(BYTE) * uRemain); if (uRemain <= REMAIN_SIZE) { memcpy(lpRemain + uRemain, padding, sizeof(BYTE) * (REMAIN_SIZE - uRemain)); memcpy(lpRemain + REMAIN_SIZE, &uSize, sizeof(UINT64)); } else { memcpy(lpRemain + uRemain, padding, sizeof(BYTE) * (CHUNK_SIZE - uRemain)); Transform(lpRemain); memset(lpRemain, 0, sizeof(BYTE) * REMAIN_SIZE); memcpy(lpRemain + REMAIN_SIZE, &uSize, sizeof(UINT64)); } Transform(lpRemain); memcpy(lpRemain, &g_a, sizeof(UINT)); memcpy(lpRemain + 4, &g_b, sizeof(UINT)); memcpy(lpRemain + 8, &g_c, sizeof(UINT)); memcpy(lpRemain + 12, &g_d, sizeof(UINT)); printf("MD5: "); for (int i = 0; i < 16; ++i) printf("%02x", lpRemain[i]); printf("/n"); ClearUp(hFile, hMapping, lpBuf); // finish = clock(); printf("%.3lf/n", (double)(finish - start) / CLOCKS_PER_SEC); return 0; }  

 

直接文件读取ReadFile

大致上差不多,有个优化速度的地方就是不要每次读取64个byte。我是每次读取1024×64个byte,然后分1024次处理

这样速度上快一些,我原来CRT版本的就是这么做的。

完整代码如下

#include #include #include const UINT CHUNK_SIZE = 64; const UINT REMAIN_SIZE = 56; const BYTE padding[] = { 0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; inline UINT F(UINT x, UINT y, UINT z) { return x & y | ~x & z; } inline UINT G(UINT x, UINT y, UINT z) { return x & z | y & ~z; } inline UINT H(UINT x, UINT y, UINT z) { return x ^ y ^ z; } inline UINT I(UINT x, UINT y, UINT z) { return y ^ (x | ~z); } inline void FF(UINT &a, UINT b, UINT c, UINT d, UINT m, int s, UINT t) { a += F(b, c, d) + m + t; a = a << s | a >> (32 - s); a += b; } inline void GG(UINT &a, UINT b, UINT c, UINT d, UINT m, int s, UINT t) { a += G(b, c, d) + m + t; a = a << s | a >> (32 - s); a += b; } inline void HH(UINT &a, UINT b, UINT c, UINT d, UINT m, int s, UINT t) { a += H(b, c, d) + m + t; a = a << s | a >> (32 - s); a += b; } inline void II(UINT &a, UINT b, UINT c, UINT d, UINT m, int s, UINT t) { a += I(b, c, d) + m + t; a = a << s | a >> (32 - s); a += b; } UINT g_a = 0x67452301; UINT g_b = 0xefcdab89; UINT g_c = 0x98badcfe; UINT g_d = 0x10325476; void Transform(BYTE ch[]) { UINT a, b, c, d; PUINT x = (PUINT)ch; a = g_a; b = g_b; c = g_c; d = g_d; /* Round 1 */ FF(a, b, c, d, x[0], 7, 0xd76aa478); FF(d, a, b, c, x[1], 12, 0xe8c7b756); FF(c, d, a, b, x[2], 17, 0x242070db); FF(b, c, d, a, x[3], 22, 0xc1bdceee); FF(a, b, c, d, x[4], 7, 0xf57c0faf); FF(d, a, b, c, x[5], 12, 0x4787c62a); FF(c, d, a, b, x[6], 17, 0xa8304613); FF(b, c, d, a, x[7], 22, 0xfd469501); FF(a, b, c, d, x[8], 7, 0x698098d8); FF(d, a, b, c, x[9], 12, 0x8b44f7af); FF(c, d, a, b, x[10], 17, 0xffff5bb1); FF(b, c, d, a, x[11], 22, 0x895cd7be); FF(a, b, c, d, x[12], 7, 0x6b901122); FF(d, a, b, c, x[13], 12, 0xfd987193); FF(c, d, a, b, x[14], 17, 0xa679438e); FF(b, c, d, a, x[15], 22, 0x49b40821); /* Round 2 */ GG(a, b, c, d, x[1], 5, 0xf61e2562); GG(d, a, b, c, x[6], 9, 0xc040b340); GG(c, d, a, b, x[11], 14, 0x265e5a51); GG(b, c, d, a, x[0], 20, 0xe9b6c7aa); GG(a, b, c, d, x[5], 5, 0xd62f105d); GG(d, a, b, c, x[10], 9, 0x2441453); GG(c, d, a, b, x[15], 14, 0xd8a1e681); GG(b, c, d, a, x[4], 20, 0xe7d3fbc8); GG(a, b, c, d, x[9], 5, 0x21e1cde6); GG(d, a, b, c, x[14], 9, 0xc33707d6); GG(c, d, a, b, x[3], 14, 0xf4d50d87); GG(b, c, d, a, x[8], 20, 0x455a14ed); GG(a, b, c, d, x[13], 5, 0xa9e3e905); GG(d, a, b, c, x[2], 9, 0xfcefa3f8); GG(c, d, a, b, x[7], 14, 0x676f02d9); GG(b, c, d, a, x[12], 20, 0x8d2a4c8a); /* Round 3 */ HH(a, b, c, d, x[5], 4, 0xfffa3942); HH(d, a, b, c, x[8], 11, 0x8771f681); HH(c, d, a, b, x[11], 16, 0x6d9d6122); HH(b, c, d, a, x[14], 23, 0xfde5380c); HH(a, b, c, d, x[1], 4, 0xa4beea44); HH(d, a, b, c, x[4], 11, 0x4bdecfa9); HH(c, d, a, b, x[7], 16, 0xf6bb4b60); HH(b, c, d, a, x[10], 23, 0xbebfbc70); HH(a, b, c, d, x[13], 4, 0x289b7ec6); HH(d, a, b, c, x[0], 11, 0xeaa127fa); HH(c, d, a, b, x[3], 16, 0xd4ef3085); HH(b, c, d, a, x[6], 23, 0x4881d05); HH(a, b, c, d, x[9], 4, 0xd9d4d039); HH(d, a, b, c, x[12], 11, 0xe6db99e5); HH(c, d, a, b, x[15], 16, 0x1fa27cf8); HH(b, c, d, a, x[2], 23, 0xc4ac5665); /* Round 4 */ II(a, b, c, d, x[0], 6, 0xf4292244); II(d, a, b, c, x[7], 10, 0x432aff97); II(c, d, a, b, x[14], 15, 0xab9423a7); II(b, c, d, a, x[5], 21, 0xfc93a039); II(a, b, c, d, x[12], 6, 0x655b59c3); II(d, a, b, c, x[3], 10, 0x8f0ccc92); II(c, d, a, b, x[10], 15, 0xffeff47d); II(b, c, d, a, x[1], 21, 0x85845dd1); II(a, b, c, d, x[8], 6, 0x6fa87e4f); II(d, a, b, c, x[15], 10, 0xfe2ce6e0); II(c, d, a, b, x[6], 15, 0xa3014314); II(b, c, d, a, x[13], 21, 0x4e0811a1); II(a, b, c, d, x[4], 6, 0xf7537e82); II(d, a, b, c, x[11], 10, 0xbd3af235); II(c, d, a, b, x[2], 15, 0x2ad7d2bb); II(b, c, d, a, x[9], 21, 0xeb86d391); g_a += a; g_b += b; g_c += c; g_d += d; } const UINT CHUNK_NUM = 1024; BYTE lpBuf[CHUNK_SIZE * CHUNK_NUM]; int main(int argc, char *argv[]) { // clock_t start, finish; // start = clock(); if (argc < 2) { fprintf(stderr, "MD5 "); return 1; } HANDLE hFile = NULL; hFile = CreateFile(argv[1], GENERIC_READ, 0, 0, OPEN_EXISTING, 0, 0); if (hFile == INVALID_HANDLE_VALUE) { fprintf(stderr, "Could not open file (error %d)/n", GetLastError()); return 1; } DWORD nFileSizeHigh; UINT64 uSize = GetFileSize(hFile, &nFileSizeHigh); uSize |= (UINT64)nFileSizeHigh << 32; printf("%I64u byte(s)/n", uSize); UINT64 uPos; DWORD nBytesRead; UINT uStep = CHUNK_SIZE * CHUNK_NUM; for (uPos = 0; uPos + uStep <= uSize; uPos += uStep) { ReadFile(hFile, lpBuf, uStep, &nBytesRead, 0); for (UINT i = 0; i < uStep; i += CHUNK_SIZE) Transform(lpBuf + i); } for (; uPos + CHUNK_SIZE <= uSize; uPos += CHUNK_SIZE) { ReadFile(hFile, lpBuf, CHUNK_SIZE, &nBytesRead, 0); Transform(lpBuf); } UINT uRemain = uSize - uPos; uSize <<= 3; ReadFile(hFile, lpBuf, uRemain, &nBytesRead, 0); if (uRemain <= REMAIN_SIZE) { memcpy(lpBuf + uRemain, padding, sizeof(BYTE) * (REMAIN_SIZE - uRemain)); memcpy(lpBuf + REMAIN_SIZE, &uSize, sizeof(UINT64)); } else { memcpy(lpBuf + uRemain, padding, sizeof(BYTE) * (CHUNK_SIZE - uRemain)); Transform(lpBuf); memset(lpBuf, 0, sizeof(BYTE) * REMAIN_SIZE); memcpy(lpBuf + REMAIN_SIZE, &uSize, sizeof(UINT64)); } Transform(lpBuf); memcpy(lpBuf, &g_a, sizeof(UINT)); memcpy(lpBuf + 4, &g_b, sizeof(UINT)); memcpy(lpBuf + 8, &g_c, sizeof(UINT)); memcpy(lpBuf + 12, &g_d, sizeof(UINT)); printf("MD5: "); for (int i = 0; i < 16; ++i) printf("%02x", lpBuf[i]); printf("/n"); CloseHandle(hFile); // finish = clock(); printf("%.3lf/n", (double)(finish - start) / CLOCKS_PER_SEC); return 0; }  

 

两种方法的速度比较

都不是很理想,对一个4G的大文件直接读取速度要快一些,大约260s。

500M左右的文件也是后者快一点点。没有感觉到内存映射的优势:(

内存映射有个小地方要注意的是:如果文件是0 byte的话,CreateFileMapping这步会失败,需要另外处理。

不过实际中不用考虑这个。

你可能感兴趣的:(Windows下内存映射)