1. 首先将所有数据裁剪到[28,62]N,[78,132]E:(sde_prepro.py)
gfdl = xr.open_dataset(r"F:\SD\All models\snd_LImon_BCC-CSM2-MR_ssp585_r1i1p1f1_gn_201501-210012.nc")
gfdl_sde = gfdl.snd
gfdl_sde = gfdl_sde.sel(lon=slice(78,132),lat=slice(28,62))
gfdl_sde.to_netcdf(r"D:\Data2022\ERA5\LICL\data_processed\snd_BCC-CSM2-MR_ssp585_menggugaoyuan_201501-210012.nc")
2. 历史数据(historical)时间范围裁剪到1981-2014年---->用来做偏差校正:
gfdl_sde = gfdl_sde.sel(time=slice("1981-01-01","2014-12-31"))
3. 将所有模式数据重采样到0.1°:(通过linux系统的python cdo完成)
# 生成网格
!cdo griddes era5_sde_menggugaoyuan.nc > grid
# 查看网格
!cdo griddes snd_BCC-CSM2-MR_ssp126_menggugaoyuan_201501-210012.nc
#重采样网格从0.25 到0.05(cmip数据到era5数据)
!cdo remapbil,grid snd_NorESM2-MM_historical_menggugaoyuan_1981_2014.nc snd_NorESM2-MM_historical_menggugaoyuan_1981_2014_resample.nc
# 批量重采样
!for ifile in `ls snd_LImon_NorESM2-MM_*.nc`;do cdo remapbil,grid $ifile resample_$ifile; done
4. 进行偏差校正降尺度:
# 计算多年月平均降水
# obs_sde_monthlymean = obs_sde.resample(time="1M").mean()
# bcc_sde_monthlymean = bcc_sde.resample(time="1M").mean()
obs_sde_multimonthlymean = obs_sde.groupby(obs_sde.time.dt.month).mean()
bcc_sde_multimonthlymean = bcc_sde.groupby(bcc_sde.time.dt.month).mean()
# 计算偏差
delta_sde = bcc_sde_multimonthlymean - obs_sde_multimonthlymean
delta_sde = delta_sde.values.squeeze()
# 降尺度
result = []
for i in range(1,13)[:]:
tmp_sde = bcc_sde.sel(time=bcc_sde.time.dt.month==i)
bcc_sde_downscaled = tmp_sde - delta_sde[i-1]
result.append(bcc_sde_downscaled)
# 合并逐月降尺度结果
bcc_sde_downscaled_final = xr.merge(result)
5.评估降尺度结果精度
######精度评估######
bcc_sde_downscaled_final = bcc_sde_downscaled_final.snd
## 评估精度
obs_sde = np.nan_to_num(obs_sde)
bcc_sde = np.nan_to_num(bcc_sde)
bcc_sde_downscaled_final = np.nan_to_num(bcc_sde_downscaled_final)
r1,p1 = pearsonr(obs_sde.ravel(),bcc_sde_downscaled_final.ravel())
rmse1 = np.sqrt(mean_squared_error(obs_sde.ravel(),bcc_sde_downscaled_final.ravel()))
print(r1,p1,rmse1)
r2,p2 = pearsonr(obs_sde.ravel(),bcc_sde.ravel())
rmse2 = np.sqrt(mean_squared_error(obs_sde.ravel(),bcc_sde.ravel()))
print(r2,p2,rmse2)
6.实现批量
###############################附录######################################
#########批量降尺度##########(delta.py)
##############积雪数据偏差校正(批量版)########################
import xarray as xr
import numpy as np
import os
import glob
from scipy.stats import pearsonr
from sklearn.metrics import r2_score,mean_squared_error
import matplotlib.pyplot as plt
## 读取观测数据
obs = xr.open_dataset(r"D:\Data2022\ERA5\LICL\data_processed\era5_sde_menggugaoyuan.nc")
obs_sde = obs.sde
obs_sde = obs_sde.sel(time=slice("1981-01-01","2014-12-31"))
obs_sde = np.flip(obs_sde,1)
## 读取历史模式数据
gcm_his = xr.open_dataset(r"D:\Data2022\ERA5\LICL\data_his\snd_BCC-CSM2-MR_historical_menggugaoyuan_1981_2014_resample.nc")
gcm_his_sde = gcm_his.snd
gcm_his_sde = gcm_his_sde.sel(longitude=slice(80,130),latitude=slice(30,60))
## 读取未来模式数据
gcm_future = glob.glob('D:\\Data2022\\ERA5\\LICL\\data_future\\BCC-CSM2-MR\\*')
# for each in glob.glob('D:\\Data2022\\ERA5\\LICL\\data_future\\resample_*'):
for i in range(0,len(gcm_future)):
gcm_fur_sde = xr.open_dataset(gcm_future[i])['snd']
gcm_fur_sde = gcm_fur_sde.sel(longitude=slice(80,130),latitude=slice(30,60))
# 计算多年月平均降水
obs_sde_multimonthlymean = obs_sde.groupby(obs_sde.time.dt.month).mean()
gcm_sde_multimonthlymean = gcm_his_sde.groupby(gcm_his_sde.time.dt.month).mean()
# 计算偏差
delta_sde = obs_sde_multimonthlymean - gcm_sde_multimonthlymean
delta_sde = delta_sde.values.squeeze()
# 降尺度
result = []
for j in range(1,13)[:]:
tmp_sde = gcm_fur_sde.sel(time=gcm_fur_sde.time.dt.month==j)
gcm_sde_downscaled = tmp_sde + delta_sde[j-1]
result.append(gcm_sde_downscaled)
# 合并逐月降尺度结果
gcm_sde_downscaled_final = xr.merge(result)
filename = gcm_future[i] + 'downscaled'
gcm_sde_downscaled_final.to_netcdf(filename)
7.将模式数据等权重集合
#####------------------------------------------- 等集合权重 -----------------------------------------
pyfiles = glob.glob(r'D:\Data2022\ERA5\LICL\data_his\*.nc')
print(pyfiles)
res = np.zeros((408,301,501))
for i in pyfiles:
da = xr.open_dataset(i)
da = da.sel(latitude=slice(30,60),longitude=slice(80,130))
# 差一步 删除闰年数据
snd = da.snd
res = res + snd.values
res = res/6
# 将前六个模型的snd平均值写成一个dataset
data = xr.Dataset({"snd":
(('time','latitude','longitude'),res)},
coords={
"time":snd.time.values,
"latitude":snd.latitude.values,
"longitude":snd.longitude.values})
data.to_netcdf(r"D:\Data2022\ERA5\LICL\data_his\snd_equal_weighted_historical_menggugaoyuan_1981_2014_resample.nc")
8.裁剪到研究区范围进行评估
############裁剪--评估##########
import xarray as xr
import cmaps
import numpy as np
import geopandas as gpd
from osgeo import gdal
import regionmask
import matplotlib.pyplot as plt
# 打开栅格文件
gcm_data = xr.open_dataset(r"D:\Data2022\ERA5\LICL\data_his\snd_equal_weighted_historical_menggugaoyuan_1981_2014_resample.nc")
obs_data = xr.open_dataset(r"D:\Data2022\ERA5\LICL\data_processed\era5_sde_menggugaoyuan.nc")
gcm_sde = gcm_data.snd
obs_sde = obs_data.sde
obs_sde = np.flip(obs_sde,1)
# 按内蒙古地区裁剪
neimenggu_shp = r'D:\Data2022\ERA5\LICL\menggugaoyuan_shp\geography_project.shp' #输入内蒙古矢量文件
# neimenggu_shp = r'D:\Data2022\base_data\neimenggu\neimengguzizhiqu.shp'
mask_gdf = gpd.read_file(neimenggu_shp)
# mask_gdf = gpd.GeoDataFrame.from_file(neimenggu_shp)
neimenggu_mask = regionmask.mask_geopandas(mask_gdf, gcm_sde.longitude, gcm_sde.latitude) # 创造掩膜
gcm_sde_menggugaoyuan = gcm_sde.where(~np.isnan(neimenggu_mask)) # 裁剪后
obs_sde_menggugaoyuan = obs_sde.where(~np.isnan(neimenggu_mask)) # 裁剪后
gcm_sde_menggugaoyuan.to_netcdf(r"D:\Data2022\ERA5\LICL\caijian_data\snd_equal_weighted_historical_menggugaoyuan_1981_2014_resample.nc")
obs_sde_menggugaoyuan.to_netcdf(r"D:\Data2022\ERA5\LICL\caijian_data\snd_ERA5_historical_menggugaoyuan_1981_2014_resample.nc")
# 画图测试
gcm_sde_menggugaoyuan.isel(time=1).plot()
plt.show()
9.拼接nc数据
## 拼接Nor模式数据
path = 'F:\\Nor585\\' # 存放nc文件的路径名
dirs = os.listdir(path)
print(dirs)
filelist = []
for i in range(0,len(dirs)):
fn = os.path.join(path,dirs[i])
filelist.append(fn)
print(filelist)
alldata = []
for i in range(len(filelist)):
snd = xr.open_dataset(filelist[i])['snd']
alldata.append(snd)
print(alldata)
da = xr.concat(alldata,dim='time')
print(da)
da_sorted = da.loc[{"time":sorted(da.coords['time'].values)}]
# da_sorted = da_sorted.sel(time=slice("1981-01-01","2014-12-31"))
da_sorted = da_sorted.sel(lon=slice(78,132),lat=slice(28,62))
da_sorted.to_netcdf(r"D:\Data2022\ERA5\LICL\data_processed\snd_LImon_NorESM2-MM_ssp585_menggugaoyuan_201501_210012.nc")