CMIP6数据统计降尺度思路与流程操作

1. 首先将所有数据裁剪到[28,62]N,[78,132]E:(sde_prepro.py)

gfdl = xr.open_dataset(r"F:\SD\All models\snd_LImon_BCC-CSM2-MR_ssp585_r1i1p1f1_gn_201501-210012.nc")
gfdl_sde = gfdl.snd
gfdl_sde = gfdl_sde.sel(lon=slice(78,132),lat=slice(28,62))
gfdl_sde.to_netcdf(r"D:\Data2022\ERA5\LICL\data_processed\snd_BCC-CSM2-MR_ssp585_menggugaoyuan_201501-210012.nc")

2. 历史数据(historical)时间范围裁剪到1981-2014年---->用来做偏差校正:

gfdl_sde = gfdl_sde.sel(time=slice("1981-01-01","2014-12-31")) 

3. 将所有模式数据重采样到0.1°:(通过linux系统的python cdo完成)

# 生成网格
!cdo griddes era5_sde_menggugaoyuan.nc > grid
# 查看网格
!cdo griddes snd_BCC-CSM2-MR_ssp126_menggugaoyuan_201501-210012.nc
#重采样网格从0.25 到0.05(cmip数据到era5数据)
!cdo remapbil,grid  snd_NorESM2-MM_historical_menggugaoyuan_1981_2014.nc  snd_NorESM2-MM_historical_menggugaoyuan_1981_2014_resample.nc
# 批量重采样
!for ifile in `ls snd_LImon_NorESM2-MM_*.nc`;do cdo remapbil,grid $ifile resample_$ifile; done

4. 进行偏差校正降尺度:

# 计算多年月平均降水
# obs_sde_monthlymean = obs_sde.resample(time="1M").mean()
# bcc_sde_monthlymean = bcc_sde.resample(time="1M").mean()
obs_sde_multimonthlymean = obs_sde.groupby(obs_sde.time.dt.month).mean()
bcc_sde_multimonthlymean = bcc_sde.groupby(bcc_sde.time.dt.month).mean()

# 计算偏差
delta_sde = bcc_sde_multimonthlymean - obs_sde_multimonthlymean
delta_sde = delta_sde.values.squeeze()

# 降尺度
result = []
for i in range(1,13)[:]:
    tmp_sde = bcc_sde.sel(time=bcc_sde.time.dt.month==i)
    bcc_sde_downscaled = tmp_sde - delta_sde[i-1]
    result.append(bcc_sde_downscaled)
    
# 合并逐月降尺度结果
bcc_sde_downscaled_final = xr.merge(result)

5.评估降尺度结果精度

######精度评估######

bcc_sde_downscaled_final = bcc_sde_downscaled_final.snd
## 评估精度
obs_sde = np.nan_to_num(obs_sde)
bcc_sde = np.nan_to_num(bcc_sde)
bcc_sde_downscaled_final = np.nan_to_num(bcc_sde_downscaled_final)
r1,p1 = pearsonr(obs_sde.ravel(),bcc_sde_downscaled_final.ravel())
rmse1 = np.sqrt(mean_squared_error(obs_sde.ravel(),bcc_sde_downscaled_final.ravel()))
print(r1,p1,rmse1)
r2,p2 = pearsonr(obs_sde.ravel(),bcc_sde.ravel())
rmse2 = np.sqrt(mean_squared_error(obs_sde.ravel(),bcc_sde.ravel()))
print(r2,p2,rmse2)

6.实现批量

###############################附录######################################
#########批量降尺度##########(delta.py)
##############积雪数据偏差校正(批量版)########################
import xarray as xr
import numpy as np
import os
import glob
from scipy.stats import pearsonr
from sklearn.metrics import r2_score,mean_squared_error
import matplotlib.pyplot as plt
## 读取观测数据
obs = xr.open_dataset(r"D:\Data2022\ERA5\LICL\data_processed\era5_sde_menggugaoyuan.nc")
obs_sde = obs.sde
obs_sde = obs_sde.sel(time=slice("1981-01-01","2014-12-31"))
obs_sde = np.flip(obs_sde,1)

## 读取历史模式数据
gcm_his = xr.open_dataset(r"D:\Data2022\ERA5\LICL\data_his\snd_BCC-CSM2-MR_historical_menggugaoyuan_1981_2014_resample.nc")
gcm_his_sde = gcm_his.snd
gcm_his_sde = gcm_his_sde.sel(longitude=slice(80,130),latitude=slice(30,60))

## 读取未来模式数据
gcm_future = glob.glob('D:\\Data2022\\ERA5\\LICL\\data_future\\BCC-CSM2-MR\\*')
# for each in glob.glob('D:\\Data2022\\ERA5\\LICL\\data_future\\resample_*'):

for i in range(0,len(gcm_future)):
    gcm_fur_sde = xr.open_dataset(gcm_future[i])['snd']
    gcm_fur_sde = gcm_fur_sde.sel(longitude=slice(80,130),latitude=slice(30,60))
    # 计算多年月平均降水
    obs_sde_multimonthlymean = obs_sde.groupby(obs_sde.time.dt.month).mean()
    gcm_sde_multimonthlymean = gcm_his_sde.groupby(gcm_his_sde.time.dt.month).mean()
    # 计算偏差
    delta_sde = obs_sde_multimonthlymean - gcm_sde_multimonthlymean 
    delta_sde = delta_sde.values.squeeze()
    # 降尺度
    result = []
    for j in range(1,13)[:]:
        tmp_sde = gcm_fur_sde.sel(time=gcm_fur_sde.time.dt.month==j)
        gcm_sde_downscaled = tmp_sde + delta_sde[j-1]
        result.append(gcm_sde_downscaled)
    # 合并逐月降尺度结果
    gcm_sde_downscaled_final = xr.merge(result)
    filename = gcm_future[i] + 'downscaled' 
    gcm_sde_downscaled_final.to_netcdf(filename)

7.将模式数据等权重集合

#####------------------------------------------- 等集合权重 -----------------------------------------
pyfiles = glob.glob(r'D:\Data2022\ERA5\LICL\data_his\*.nc')
print(pyfiles)
res = np.zeros((408,301,501))
for i in pyfiles:
    da = xr.open_dataset(i)
    da = da.sel(latitude=slice(30,60),longitude=slice(80,130))
    # 差一步 删除闰年数据
    snd = da.snd
    res = res + snd.values
res = res/6
# 将前六个模型的snd平均值写成一个dataset
data = xr.Dataset({"snd":
                    (('time','latitude','longitude'),res)},
                    coords={
                    "time":snd.time.values,
                    "latitude":snd.latitude.values,
                    "longitude":snd.longitude.values})
data.to_netcdf(r"D:\Data2022\ERA5\LICL\data_his\snd_equal_weighted_historical_menggugaoyuan_1981_2014_resample.nc")

8.裁剪到研究区范围进行评估

############裁剪--评估##########
import xarray as xr
import cmaps
import numpy as np
import geopandas as gpd
from osgeo import gdal
import regionmask
import matplotlib.pyplot as plt

# 打开栅格文件
gcm_data = xr.open_dataset(r"D:\Data2022\ERA5\LICL\data_his\snd_equal_weighted_historical_menggugaoyuan_1981_2014_resample.nc")
obs_data = xr.open_dataset(r"D:\Data2022\ERA5\LICL\data_processed\era5_sde_menggugaoyuan.nc")
gcm_sde = gcm_data.snd
obs_sde = obs_data.sde
obs_sde = np.flip(obs_sde,1)

# 按内蒙古地区裁剪
neimenggu_shp = r'D:\Data2022\ERA5\LICL\menggugaoyuan_shp\geography_project.shp' #输入内蒙古矢量文件
# neimenggu_shp = r'D:\Data2022\base_data\neimenggu\neimengguzizhiqu.shp'

mask_gdf = gpd.read_file(neimenggu_shp)
# mask_gdf = gpd.GeoDataFrame.from_file(neimenggu_shp)
neimenggu_mask = regionmask.mask_geopandas(mask_gdf, gcm_sde.longitude, gcm_sde.latitude) # 创造掩膜
gcm_sde_menggugaoyuan = gcm_sde.where(~np.isnan(neimenggu_mask)) # 裁剪后
obs_sde_menggugaoyuan = obs_sde.where(~np.isnan(neimenggu_mask)) # 裁剪后

gcm_sde_menggugaoyuan.to_netcdf(r"D:\Data2022\ERA5\LICL\caijian_data\snd_equal_weighted_historical_menggugaoyuan_1981_2014_resample.nc")
obs_sde_menggugaoyuan.to_netcdf(r"D:\Data2022\ERA5\LICL\caijian_data\snd_ERA5_historical_menggugaoyuan_1981_2014_resample.nc")
# 画图测试
gcm_sde_menggugaoyuan.isel(time=1).plot()
plt.show()

9.拼接nc数据

## 拼接Nor模式数据
path = 'F:\\Nor585\\' # 存放nc文件的路径名
dirs = os.listdir(path)
print(dirs)

filelist = []
for i in range(0,len(dirs)):
    fn = os.path.join(path,dirs[i])
    filelist.append(fn)
print(filelist)

alldata = []
for i in range(len(filelist)):
    snd = xr.open_dataset(filelist[i])['snd']
    alldata.append(snd)
print(alldata)
da = xr.concat(alldata,dim='time')
print(da)
da_sorted = da.loc[{"time":sorted(da.coords['time'].values)}]
# da_sorted = da_sorted.sel(time=slice("1981-01-01","2014-12-31"))
da_sorted = da_sorted.sel(lon=slice(78,132),lat=slice(28,62))

da_sorted.to_netcdf(r"D:\Data2022\ERA5\LICL\data_processed\snd_LImon_NorESM2-MM_ssp585_menggugaoyuan_201501_210012.nc")

你可能感兴趣的:(算法,python,scikit-learn,matplotlib)