参考:http://xiang201314.blog.51cto.com/1294467/642311
基本需求是:存在文本方式存储的海量点坐标数据,需要快速读取到内存中。在测试1000万行(x y z)数据时,以fgets()方式逐行读取进行分析大概需要一分钟左右,而以内存映射(MMap)方式大概20s左右。代码如下:
/*创建文件对象*/
HANDLE hFile = ::CreateFile(L"D:\\algorithm\\triangle\\triangle\\1_12.xyz",GENERIC_READ,0,NULL,
OPEN_ALWAYS,FILE_ATTRIBUTE_NORMAL,
NULL);
if (hFile == INVALID_HANDLE_VALUE)
{
printf("failed to create the file, error code is:%d ", GetLastError());
return false;
}
/*得到系统分配粒度*/
SYSTEM_INFO SysInfo;
GetSystemInfo(&SysInfo);
DWORD dwGran = SysInfo.dwAllocationGranularity;
/*得到文件尺寸*/
DWORD dwFileSizeHigh;
__int64 qwFileSize = GetFileSize(hFile, &dwFileSizeHigh);
qwFileSize |= (((__int64)dwFileSizeHigh) << 32);
/*创建文件映射对象*/
HANDLE hFileMapping = CreateFileMapping(hFile,NULL,PAGE_WRITECOPY,0, 0,NULL);
/*关闭文件对象*/
CloseHandle(hFile);
if (hFileMapping == NULL)
{
printf("failed to create the mapping file, error code is :%d ", GetLastError());
return false;
}
/*一次全部映射,等同于0*/
const char *lpbMapAddress = (const char *)MapViewOfFile(hFileMapping,FILE_MAP_COPY, 0, 0,qwFileSize);
if (lpbMapAddress == NULL)
{
printf("映射文件映射失败,错误代码:%d ", GetLastError());
return false;
}
/*开始解析,先按换行分割,再按空格分割。如果用strtok实现发现运行到一定数据量事出错,还没弄明白*/
int lp = 0;
int cp = 0;
int sp1 = -1;
int sp2 = -1;
char xstr[30] = {0};
char ystr[30] = {0};
char zstr[30] = {0};
REAL x,y,z;
int i = 0;
/*while(lpbMapAddress[i])*/
for (;i
{
if (lpbMapAddress[i] == ' ')
{
if ( sp1 == -1)
{
sp1 = i;
strncpy(xstr,&lpbMapAddress[lp],sp1 - lp);
x = atof(xstr);
//i++;
continue;
}
if (sp2 == -1)
{
sp2 = i;
strncpy(ystr,&lpbMapAddress[sp1+1],sp2 - sp1);
y = atof(ystr);
//i++;
continue;
}
}
if (lpbMapAddress[i] == '\n')
{
cp = i;
strncpy(zstr,&lpbMapAddress[sp2+1],cp - sp2 - 1);
z = atof(zstr);
m_xmin = min(m_xmin,x);
m_ymin = min(m_ymin,y);
m_zmin = min(m_zmin,z);
m_xmax = max(m_xmax,x);
m_ymax = max(m_ymax,y);
m_zmax = max(m_zmax,z);
appendVx2IO(x,y,z);
lp = cp+1;
sp1 = -1;
sp2 = -1;
}
//i++;
}
io.numberofpoints = m_coords_number;
UnmapViewOfFile(lpbMapAddress);
CloseHandle(hFileMapping);
备注:
1:如果逐次映射的话,会存在将行切断的可能,这就需要对每行数据进行检查,并设定两次映射之间的重叠宽度。为了简便读取,暂时全部读取。
2:编码是影响文件读取的另外一个问题,发现在Unicode编码下读取文件时CreateFile会出错,即使CreateFile在强制转换后成功,但是CreateFileMapping也会返回NULL。观察发现,宏定义:
#ifdef UNICODE
#define CreateFile CreateFileW
#else
#define CreateFile CreateFileA
#endif // !UNICODE
可见 ,UNICODE下,CreateFile实际为CreateFileW,以宽字节方式处理了,直接调用CreateFileA便可以处理ascii编码了,对应调用CreateFileMappingA即可。