#引用所需要的库
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import torch
import torch.optim as optim#优化器
#过滤警告
import warnings
warnings.filterwarnings(“ignore”)
%matplotlib inline
features=pd.read_csv(‘temps.csv’)
features.head()
year month day week temp_2 temp_1 average actual friend
0 2016 1 1 Fri 45 45 45.6 45 29
1 2016 1 2 Sat 44 45 45.7 44 61
2 2016 1 3 Sun 45 44 45.8 41 56
3 2016 1 4 Mon 44 41 45.9 40 53
4 2016 1 5 Tues 41 40 46.0 44 41
#把列转为时间处理数据
import datetime
years=features[‘year’]
months=features[‘month’]
days=features[‘day’]
#datetime格式
dates=[str(int (year))+‘-’+str(int(month))+‘-’+str(int(day)) for year,month,day in zip(years,months,days)]
dates=[datetime.datetime.strptime(date,‘%Y-%m-%d’)for date in dates]
features.shape
(348, 9)
dates[:5]
[datetime.datetime(2016, 1, 1, 0, 0),
datetime.datetime(2016, 1, 2, 0, 0),
datetime.datetime(2016, 1, 3, 0, 0),
datetime.datetime(2016, 1, 4, 0, 0),
datetime.datetime(2016, 1, 5, 0, 0)]
#小展示,看看数据集长什么样
#独热编码
features=pd.get_dummies(features)
features.head(5)
year month day temp_2 temp_1 average actual friend week_Fri week_Mon week_Sat week_Sun week_Thurs week_Tues week_Wed
0 2016 1 1 45 45 45.6 45 29 1 0 0 0 0 0 0
1 2016 1 2 44 45 45.7 44 61 0 0 1 0 0 0 0
2 2016 1 3 45 44 45.8 41 56 0 0 0 1 0 0 0
3 2016 1 4 44 41 45.9 40 53 0 1 0 0 0 0 0
4 2016 1 5 41 40 46.0 44 41 0 0 0 0 0 1 0
features.shape
(348, 15)
#标签(Y)
labels=np.array(features[‘actual’])
#在特征集中剔除标签,剩下x
features=features.drop(‘actual’,axis=1)
#单独保存名字,以备后患
feature_list=list(features.columns)
#转成数组格式->后续还需要转换成tensor张量
features=np.array(features)
features.shape
(348, 14)
#因为数据有大有小,归一化(数值浮动范围小)
from sklearn import preprocessing
input_features=preprocessing.StandardScaler().fit_transform(features)
#构建网络模型(复杂版)
#转为tensor
x = torch.tensor(input_features, dtype = float)
y = torch.tensor(labels, dtype = float)
#权重参数初始化
weights = torch.randn((14,128),dtype=float,requires_grad=True)
biases = torch.randn(128,dtype=float,requires_grad=True)
weights2 = torch.randn((128,1),dtype=float,requires_grad=True)
biases2 = torch.randn(1,dtype=float,requires_grad=True)
learning_rate = 0.001
losses = []
for i in range(1000):
#计算隐藏层
hidden = x.mm(weights)+biases
#给激活函数
hidden = torch.relu(hidden)
#预测
predictions = hidden.mm(weights2)+biases2
#计算损失
loss = torch.mean((predictions - y) ** 2)
losses.append(loss.data.numpy())
if i % 100 == 0 :
print('loss:',loss)
# 反向传播计算
loss.backward()
#更新参数
weights.data.add_(- learning_rate * weights.grad.data)
biases.data.add_(- learning_rate * biases.grad.data)
weights2.data.add_(- learning_rate * weights2.grad.data)
biases2.data.add_(- learning_rate * biases2.grad.data)
#记得清空权重参数,因为每次迭代会累计
weights.grad.data.zero_()
biases.grad.data.zero_()
weights2.grad.data.zero_()
biases2.grad.data.zero_()
loss: tensor(8652.8872, dtype=torch.float64, grad_fn=)
loss: tensor(155.4351, dtype=torch.float64, grad_fn=)
loss: tensor(147.5643, dtype=torch.float64, grad_fn=)
loss: tensor(144.6621, dtype=torch.float64, grad_fn=)
loss: tensor(143.1741, dtype=torch.float64, grad_fn=)
loss: tensor(142.2740, dtype=torch.float64, grad_fn=)
loss: tensor(141.6748, dtype=torch.float64, grad_fn=)
loss: tensor(141.2530, dtype=torch.float64, grad_fn=)
loss: tensor(140.9336, dtype=torch.float64, grad_fn=)
loss: tensor(140.6799, dtype=torch.float64, grad_fn=)
input_size = input_features.shape[1]
hidden_size = 128
output_size = 1
batch_size = 16
my_nn = torch.nn.Sequential(
torch.nn.Linear(input_size,hidden_size),
torch.nn.Sigmoid(),
torch.nn.Linear(hidden_size,output_size),
)
cost = torch.nn.MSELoss(reduction = ‘mean’)
optimizer = torch.optim.Adam(my_nn.parameters(),lr = 0.001)
losses = []
for i in range(1000):
batch_loss = []
# 小批量随机梯度下降进行训练
for start in range(0,len(input_features),batch_size):
end = start+batch_size if start + batch_size < len(input_features) else len(input_features)
xx = torch.tensor(input_features[start:end],dtype = torch.float,requires_grad = True)
yy = torch.tensor(labels[start:end],dtype = torch.float,requires_grad = True)
prediction = my_nn(xx)
loss = cost(prediction,yy)
optimizer.zero_grad()
loss.backward(retain_graph=True)
optimizer.step()
batch_loss.append(loss.data.numpy())
# 打印损失
# 打印损失值
if i % 100 == 0:
losses.append(np.mean(batch_loss))
print(i,np.mean(batch_loss))
0 4015.5623
100 38.040577
200 35.64831
300 35.261333
400 35.099106
500 34.968235
600 34.84836
700 34.728233
800 34.605637
900 34.48074
#评估模型
x = torch.tensor(input_features,dtype = torch.float)
predict = my_nn(x).data.numpy()
dates = [str(int(year))+‘-’+str(int(month))+‘-’+str(int(day)) for year,month,day in zip(years,months,days)]
dates = [datetime.datetime.strptime(date,‘%Y-%m-%d’) for date in dates]
true_data = pd.DataFrame(data = {‘date’:dates,‘actual’:labels})
mouths = features[:,feature_list.index(‘month’)]
days = features[:,feature_list.index(‘day’)]
years = features[:,feature_list.index(‘year’)]
test_dates = [str(int(year))+‘-’+str(int(month))+‘-’+str(int(day)) for year,month,day in zip(years,months,days)]
test_dates = [datetime.datetime.strptime(date,‘%Y-%m-%d’) for date in test_dates]
predictions_data = pd.DataFrame(data = {‘date’:test_dates,‘prediction’:predict.reshape(-1)})
plt.plot(true_data[‘date’],true_data[‘actual’],‘b-’,label = ‘actual’)
plt.plot(predictions_data[‘date’],predictions_data[‘prediction’],‘ro’,label = ‘prediction’)
plt.xticks(rotation = ‘60’)
plt.legend()
plt.xlabel(‘Date’);plt.ylabel(‘Maximum Temperature (F)’);plt.title(‘Actual and Predicted Values’)
plt.show()
ValueError: rotation must be ‘vertical’, ‘horizontal’ or a number, not 60