How to write a large buffer into a binary file in C++, fast?
1GBps
// ConsoleApplication1.cpp : This file contains the 'main' function. Program execution begins and ends there.
//
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
std::vector<uint64_t> GenerateData(std::size_t bytes)
{
assert(bytes % sizeof(uint64_t) == 0);
std::vector<uint64_t> data(bytes / sizeof(uint64_t));
std::iota(data.begin(), data.end(), 0);
std::shuffle(data.begin(), data.end(), std::mt19937{ std::random_device{}() });
return data;
}
long long option_1(std::size_t bytes)
{
std::vector<uint64_t> data = GenerateData(bytes);
auto startTime = std::chrono::high_resolution_clock::now();
auto myfile = std::fstream("D:\\file.binary", std::ios::out | std::ios::binary);
myfile.write((char*)&data[0], bytes);
myfile.close();
auto endTime = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();
}
long long option_2(std::size_t bytes)
{
std::vector<uint64_t> data = GenerateData(bytes);
auto startTime = std::chrono::high_resolution_clock::now();
FILE* file = fopen("D:\\file.binary", "wb");
fwrite(&data[0], 1, bytes, file);
fclose(file);
auto endTime = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();
}
long long option_3(std::size_t bytes)
{
std::vector<uint64_t> data = GenerateData(bytes);
std::ios_base::sync_with_stdio(false);
auto startTime = std::chrono::high_resolution_clock::now();
auto myfile = std::fstream("D:\\file.binary", std::ios::out | std::ios::binary);
myfile.write((char*)&data[0], bytes);
myfile.close();
auto endTime = std::chrono::high_resolution_clock::now();
return std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();
}
int main()
{
const std::size_t kB = 1024;
const std::size_t MB = 1024 * kB;
const std::size_t GB = 1024 * MB;
for (std::size_t size = 1 * MB; size <= 4 * GB; size *= 2) std::cout << "option1, " << size / MB << "MB: " << option_1(size) << "ms" << std::endl;
for (std::size_t size = 1 * MB; size <= 4 * GB; size *= 2) std::cout << "option2, " << size / MB << "MB: " << option_2(size) << "ms" << std::endl;
for (std::size_t size = 1 * MB; size <= 4 * GB; size *= 2) std::cout << "option3, " << size / MB << "MB: " << option_3(size) << "ms" << std::endl;
return 0;
}
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
// Generate random data
std::vector<int> GenerateData(std::size_t bytes) {
assert(bytes % sizeof(int) == 0);
std::vector<int> data(bytes / sizeof(int));
std::iota(data.begin(), data.end(), 0);
std::shuffle(data.begin(), data.end(), std::mt19937{ std::random_device{}() });
return data;
}
// Memory mapping
int map_write(int* data, int size, int id) {
char* name = (char*)malloc(100);
sprintf_s(name, 100, "D:\\data_%d.bin", id);
HANDLE hFile = CreateFile(name, GENERIC_READ | GENERIC_WRITE, 0, NULL, OPEN_ALWAYS, FILE_ATTRIBUTE_NORMAL, NULL);//
if (hFile == INVALID_HANDLE_VALUE) {
return -1;
}
Sleep(0);
DWORD dwFileSize = size;
char* rname = (char*)malloc(100);
sprintf_s(rname, 100, "data_%d.bin", id);
HANDLE hFileMap = CreateFileMapping(hFile, NULL, PAGE_READWRITE, 0, dwFileSize, rname);//create file
if (hFileMap == NULL) {
CloseHandle(hFile);
return -2;
}
PVOID pvFile = MapViewOfFile(hFileMap, FILE_MAP_WRITE, 0, 0, 0);//Acquire the address of file on disk
if (pvFile == NULL) {
CloseHandle(hFileMap);
CloseHandle(hFile);
return -3;
}
PSTR pchAnsi = (PSTR)pvFile;
memcpy(pchAnsi, data, dwFileSize);//memery copy
UnmapViewOfFile(pvFile);
CloseHandle(hFileMap);
CloseHandle(hFile);
return 0;
}
// Multi-thread memory mapping
void Mem2SSD_write(int* data, int size) {
int part = size / sizeof(int) / 4;
int index[4];
index[0] = 0;
index[1] = part;
index[2] = part * 2;
index[3] = part * 3;
std::thread ta(map_write, data + index[0], size / 4, 10);
std::thread tb(map_write, data + index[1], size / 4, 11);
std::thread tc(map_write, data + index[2], size / 4, 12);
std::thread td(map_write, data + index[3], size / 4, 13);
ta.join();
tb.join();
tc.join();
td.join();
}
//Test:
int main() {
const std::size_t kB = 1024;
const std::size_t MB = 1024 * kB;
const std::size_t GB = 1024 * MB;
for (int i = 0; i < 10; ++i) {
std::vector<int> data = GenerateData(1 * GB);
auto startTime = std::chrono::high_resolution_clock::now();
Mem2SSD_write(&data[0], 1 * GB);
auto endTime = std::chrono::high_resolution_clock::now();
auto duration = std::chrono::duration_cast<std::chrono::milliseconds>(endTime - startTime).count();
std::cout << "1G writing cost: " << duration << " ms" << std::endl;
}
system("pause");
return 0;
}