import pandas as pd
import numpy as np
import netCDF4 as nc
#站点信息
stations_info = pd.read_excel('china_weather_stations.xlsx', 'Sheet1',
index_col=None, na_values=['NA'])
lons = stations_info['经度'].to_numpy()
lats = stations_info['纬度'].to_numpy()
#ERA5数据
dataset = nc.Dataset("ERA5_single_level_2020_1-4-7-10.nc")
# 经纬度
longitude = dataset.variables['longitude'][:].data
latitude = dataset.variables['latitude'][:].data
# 2m温度
t2m = dataset.variables['t2m'][2].data # 2代表第二个数据,表示7月份的数据
我们使用的 ERA5 纬度从大到小,不符合使用习惯。需要这一步处理
latitude_old = latitude.copy()
t2m_old = t2m.copy()
nlats = len(latitude)
for i in range(nlats):
latitude[i] = latitude_old[nlats-1-i]
print(nlats-1-i, latitude_old[nlats-1-i])
t2m[i,:] = t2m_old[nlats-1-i,:]
将只在格点范围内的站点筛选出来,不在格点范围内的站点我们不进行插值;
此情况适用条件:站点数据范围比格点的数据范围大
# ERA5数据经纬度范围
lonMin = np.min(longitude)
lonMax = np.max(longitude)
latMin = np.min(latitude)
latMax = np.max(latitude)
#筛选范围内的气象站点
stations_info = stations_info[stations_info["经度"] >= lonMin]
stations_info = stations_info[stations_info["经度"] <= lonMax]
stations_info = stations_info[stations_info["纬度"] >= latMin]
stations_info = stations_info[stations_info["纬度"] <= latMax]
lonSta = stations_info['经度'].to_numpy()
latSta = stations_info['纬度'].to_numpy()
# 定义一个方法
def nearest_position(stn_lat, stn_lon, lat2d, lon2d):
"""获取最临近格点坐标索引
stn_lat : 站点纬度
stn_lon : 站点经度
lat2d : numpy.ndarray网格二维经度坐标
lon2d : numpy.ndarray网格二维纬度坐标
Return: (y_index, x_index)
"""
# 计算经纬度距离差(一个站点到每一个格点)
difflat = stn_lat - lat2d
difflon = stn_lon - lon2d
# 计算欧氏距离
rad = np.multiply(difflat,difflat)+np.multiply(difflon , difflon)
# 找到距离最小的值的索引
aa=np.where(rad==np.min(rad))
ind=np.squeeze(np.array(aa)) # 维度压缩
# 返回经纬度索引
return tuple(ind)
t2m_sta_nearest = []
# 将一维的经纬度数据网格二维化
lon2D, lat2D = np.meshgrid(longitude, latitude)
# 利用循环对所有站点进行计算
for i in range(len(lonSta)):
indexSta = nearest_position(latSta[i], lonSta[i], lat2D, lon2D)
jSta = indexSta[0] # 范围结果的维度索引
iSta = indexSta[1] # 范围结果的经度索引
# 将每一步结果 t2m[jSta, iSta] 添加到列表中
t2m_sta_nearest.append(t2m[jSta, iSta])
stations_info['t2m_nearest'] = np.array(t2m_sta_nearest)
# 重置索引(非必须)
stations_info.index = range(len(lonSta))
# 将数据保存为新的xlsx文件
stations_info.to_excel('weather_station_data.xlsx', sheet_name='Sheet1')
双线性插值:已知网格点Q12,Q22,Q11,Q21,但是要插值的点为P点,这就要用双线性插值了,首先在x轴方向上,对R1和R2两个点进行插值,这个很简单,然后根据R1和R2对P点进行插值,这就是所谓的双线性插值。在数学上,双线性插值是有两个变量的插值函数的线性插值扩展,其核心思想是在两个方向分别进行一次线性插值。
import pandas as pd
import numpy as np
import netCDF4 as nc
#站点信息
stations_info = pd.read_excel('china_weather_stations.xlsx', 'Sheet1',
index_col=None, na_values=['NA'])
lons = stations_info['经度'].to_numpy()
lats = stations_info['纬度'].to_numpy()
#ERA5数据
dataset = nc.Dataset("ERA5_single_level_2020_1-4-7-10.nc")
# 经纬度
longitude = dataset.variables['longitude'][:].data
latitude = dataset.variables['latitude'][:].data
# 2m温度
t2m = dataset.variables['t2m'][2].data # 2代表第二个数据,表示7月份的数据
不是所有的数据都需要这一步。
我们使用的 ERA5 纬度从大到小,不符合使用习惯。需要这一步处理。
latitude_old = latitude.copy()
t2m_old = t2m.copy()
nlats = len(latitude)
for i in range(nlats):
latitude[i] = latitude_old[nlats-1-i]
print(nlats-1-i, latitude_old[nlats-1-i])
t2m[i,:] = t2m_old[nlats-1-i,:]
将只在格点范围内的站点筛选出来,不在格点范围内的站点我们不进行插值;
此情况适用条件:站点数据范围比格点的数据范围大
# ERA5数据经纬度范围
lonMin = np.min(longitude)
lonMax = np.max(longitude)
latMin = np.min(latitude)
latMax = np.max(latitude)
#筛选范围内的气象站点
stations_info = stations_info[stations_info["经度"] >= lonMin]
stations_info = stations_info[stations_info["经度"] <= lonMax]
stations_info = stations_info[stations_info["纬度"] >= latMin]
stations_info = stations_info[stations_info["纬度"] <= latMax]
lonSta = stations_info['经度'].to_numpy()
latSta = stations_info['纬度'].to_numpy()
考虑代表性,推荐最邻近插值,双线性插值等于平滑了一次。
# 定义一个方法
def Bilinear_interp(lonSta, latSta, longitude, latitude, var):
var_sta = []
for i in range(len(lonSta)):
iSta = np.searchsorted(longitude, lonSta[i])
if longitude[iSta] > lonSta[i]:
iSta = iSta - 1 # 经度左下角点在经度array里的索引
jSta = np.searchsorted(latitude, latSta[i])
if latitude[jSta] > latSta[i]:
jSta = jSta - 1 # 纬度左下角点在纬度array里的索引
var11 = var[jSta, iSta] #### t2m -> var
var21 = var[jSta, iSta+1]
var12 = var[jSta+1, iSta]
var22 = var[jSta+1, iSta+1]
# 改变变量名,为了公式计算时候方便
x = lonSta[i]
y = latSta[i]
x1 = longitude[iSta]
x2 = longitude[iSta+1]
y1 = latitude[jSta]
y2 = latitude[jSta+1]
arg = 1.0 / ((x2 - x1)*(y2 - y1))
arg11 = arg * (x2 - x) * (y2 - y)
arg21 = arg * (x - x1) * (y2 - y)
arg12 = arg * (x2 - x) * (y - y1)
arg22 = arg * (x - x1) * (y - y1)
var_interp = arg11*var11 + arg21*var21 + arg12*var12 + arg22*var22
var_sta.append(var_interp)
return var_sta
t2m_sta_bilinear = Bilinear_interp(lonSta, latSta, longitude, latitude, t2m)
stations_info['t2m_bilinear'] = np.array(t2m_sta_bilinear)
stations_info.index = range(len(lonSta))
stations_info.to_excel('weather_station_data.xlsx', sheet_name='Sheet1')