对大量的.nc处理瓶颈卡在读与写上,全是机械硬盘就只能考虑改代码了。发现netCDF4中Dataset的构造函数中有memory参数,默认为None,如果没有设置memory,则访问数据的时候从硬盘中读取,若有,则从内存中读取。
import netCDF4 as nc # netcdf4 1.5.7
from time import perf_counter
# 不一次性读取到内存上
time1 = 0
for i in range(10):
start_time = perf_counter()
dataset = nc.Dataset(r'H:\test.nc')
for key in dataset.variables.keys():
data = dataset[key][:]
dataset.close()
end_time = perf_counter()
time1 += end_time - start_time
# 一次性读取到内存上
time2 = 0
for i in range(10):
start_time = perf_counter()
file = open(r'H:\test.nc','rb')
memory = file.read()
dataset = nc.Dataset(r'H:\test.nc',mode='r',memory=memory)
for key in dataset.variables.keys():
data = dataset[key][:]
dataset.close()
file.close()
# 如果不需要了可以提前释放内存
memory = None
end_time = perf_counter()
time2 += end_time - start_time