数据集为Barcelona某段时间内的气象数据,其中包括温度、湿度以及风速等。本文将简单搭建来对风速进行预测。
对于风速的预测,除了考虑历史风速数据外,还应该充分考虑其余气象因素的影响。因此,我们根据前24个时刻的风速+下一时刻的其余气象数据来预测下一时刻的风速。
数据预处理阶段,主要将某些列上的文本数据转为数值型数据,同时对原始数据进行归一化处理。文本数据如下所示:
经过转换后,上述各个类别分别被赋予不同的数值,比如"sky is clear"为0,"few clouds"为1。
def load_data():
global Max, Min
df = pd.read_csv('Barcelona/Barcelona.csv')
df.drop_duplicates(subset=[df.columns[0]], inplace=True)
# weather_main
listType = df['weather_main'].unique()
df.fillna(method='ffill', inplace=True)
dic = dict.fromkeys(listType)
for i in range(len(listType)):
dic[listType[i]] = i
df['weather_main'] = df['weather_main'].map(dic)
# weather_description
listType = df['weather_description'].unique()
dic = dict.fromkeys(listType)
for i in range(len(listType)):
dic[listType[i]] = i
df['weather_description'] = df['weather_description'].map(dic)
# weather_icon
listType = df['weather_icon'].unique()
dic = dict.fromkeys(listType)
for i in range(len(listType)):
dic[listType[i]] = i
df['weather_icon'] = df['weather_icon'].map(dic)
# print(df)
columns = df.columns
Max = np.max(df['wind_speed']) # 归一化
Min = np.min(df['wind_speed'])
for i in range(2, 17):
column = columns[i]
if column == 'wind_speed':
continue
df[column] = df[column].astype('float64')
if len(df[df[column] == 0]) == len(df): # 全0
continue
mx = np.max(df[column])
mn = np.min(df[column])
df[column] = (df[column] - mn) / (mx - mn)
# print(df.isna().sum())
return df
利用当前时刻的气象数据和前24个小时的风速数据来预测当前时刻的风速:
def nn_seq():
"""
:param flag:
:param data: 待处理的数据
:return: X和Y两个数据集,X=[当前时刻的year,month, hour, day, lowtemp, hightemp, 前一天当前时刻的负荷以及前23小时负荷]
Y=[当前时刻负荷]
"""
print('处理数据:')
data = load_data()
speed = data['wind_speed']
speed = speed.tolist()
speed = torch.FloatTensor(speed).view(-1)
data = data.values.tolist()
seq = []
for i in range(len(data) - 30):
train_seq = []
train_label = []
for j in range(i, i + 24):
train_seq.append(speed[j])
# 添加温度、湿度、气压等信息
for c in range(2, 7):
train_seq.append(data[i + 24][c])
for c in range(8, 17):
train_seq.append(data[i + 24][c])
train_label.append(speed[i + 24])
train_seq = torch.FloatTensor(train_seq).view(-1)
train_label = torch.FloatTensor(train_label).view(-1)
seq.append((train_seq, train_label))
# print(seq[:5])
Dtr = seq[0:int(len(seq) * 0.5)]
Den = seq[int(len(seq) * 0.50):int(len(seq) * 0.75)]
Dte = seq[int(len(seq) * 0.75):len(seq)]
return Dtr, Den, Dte
任意输出其中一条数据:
(tensor([1.0000e+00, 1.0000e+00, 2.0000e+00, 1.0000e+00, 1.0000e+00, 1.0000e+00,
1.0000e+00, 1.0000e+00, 0.0000e+00, 1.0000e+00, 5.0000e+00, 0.0000e+00,
2.0000e+00, 0.0000e+00, 0.0000e+00, 5.0000e+00, 0.0000e+00, 2.0000e+00,
2.0000e+00, 5.0000e+00, 6.0000e+00, 5.0000e+00, 5.0000e+00, 5.0000e+00,
5.3102e-01, 5.5466e-01, 4.6885e-01, 1.0066e-03, 5.8000e-01, 6.6667e-01,
0.0000e+00, 0.0000e+00, 0.0000e+00, 0.0000e+00, 9.9338e-01, 0.0000e+00,
0.0000e+00, 0.0000e+00]), tensor([5.]))
数据被划分为三部分:Dtr、Den以及Dte,Dtr用作训练集,Dte用作测试集。
ANN模型搭建如下:
def ANN():
Dtr, Den, Dte = nn_seq()
my_nn = torch.nn.Sequential(
torch.nn.Linear(38, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 1),
)
model = my_nn.to(device)
loss_function = nn.MSELoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
train_inout_seq = Dtr
# 训练
epochs = 50
for i in range(epochs):
print('当前', i)
for seq, labels in train_inout_seq:
seq = seq.to(device)
labels = labels.to(device)
y_pred = model(seq)
single_loss = loss_function(y_pred, labels)
optimizer.zero_grad()
single_loss.backward()
optimizer.step()
# if i % 2 == 1:
print(f'epoch: {i:3} loss: {single_loss.item():10.8f}')
print(f'epoch: {i:3} loss: {single_loss.item():10.10f}')
state = {'model': model.state_dict(), 'optimizer': optimizer.state_dict(), 'epoch': epochs}
torch.save(state, 'Barcelona' + ANN_PATH)
可以看到,模型定义的代码段为:
my_nn = torch.nn.Sequential(
torch.nn.Linear(38, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 1),
)
第一层全连接层输入维度为38(前24小时风速+14种气象数据),输出维度为64;第二层输入为64,输出128;第三层输入为128,输出为1。
def ANN_predict(ann, test_seq):
pred = []
for seq, labels in test_seq:
seq = seq.to(device)
with torch.no_grad():
pred.append(ann(seq).item())
pred = np.array([pred])
return pred
测试:
def test():
Dtr, Den, Dte = nn_seq()
ann = torch.nn.Sequential(
torch.nn.Linear(38, 64),
torch.nn.ReLU(),
torch.nn.Linear(64, 128),
torch.nn.ReLU(),
torch.nn.Linear(128, 1),
)
ann = ann.to(device)
ann.load_state_dict(torch.load('Barcelona' + ANN_PATH)['model'])
ann.eval()
pred = ANN_predict(ann, Dte)
print(mean_absolute_error(te_y, pred2.T), np.sqrt(mean_squared_error(te_y, pred2.T)))
ANN在Dte上的表现如下表所示:
MAE | RMSE |
---|---|
1.04 | 1.46 |