MLP是一种常用的前馈神经网络,使用了BP算法的MLP可以被称为BP神经网络。MLP的隐节点采用输入向量与权向量的内积作为激活函数的自变量,激活函数采用Relu函数。各参数对网络的输出具有同等地位的影响,因此MLP是对非线性映射的全局逼近。除了使用Sklearn提供的MLPRegressor
函数以外,我们可以通过Pytorch建立自定义程度更高的人工神经网络。本文将不再对MLP的理论基础进行赘述,而将介绍MLP的具体建立方法。
通过继承nn.Moudule类对MLP进行逐层建立
该网络输入维度为8(即八个参量),输出为一个batch,即一个批训练的样本量,并在最后对Tensor进行展平。隐藏层分比为20,20,5,输出层为1,即预测值。
import torch.nn as nn
import torch.nn.functional as F
class MLP(nn.Module):
def __init__(self):
super(MLP, self).__init__()
self.hidden1 = nn.Linear(in_features = 8, out_features = 20, bias = True)
self.hidden2 = nn.Linear(20,20)
self.hidden3 = nn.Linear(20,5)
self.predict = nn.Linear(5,1)
def forward(self,x):
x = F.relu(self.hidden1(x))
x = F.relu(self.hidden2(x))
x = F.relu(self.hidden3(x))
output = self.predict(x)
out = output.view(-1)
return out
使用Graphviz对这个简单的MLP进行可视化。
这里输入了一个64X8的张量,即代表一次性输入了64个8参量数据
import torch
from net import MLP
from torchviz import make_dot
tensor = torch.zeros(64,8).requires_grad_(True)
model = MLP()
net = model(tensor)
net_strc = make_dot(net, params = dict(list(model.named_parameters()) + [('tensor',tensor)]))
net_strc.view()
这里使用了sklearn提供的fetch_california_housing
数据集,该数据集输入参量为8,与网络第一层输入对应。
通过sklearn提供的StandardScaler
和train_test_split
函数,对输入数据进行正则化与数据集分割。
def dataset():
house_data = fetch_california_housing()
x_train, x_test, y_train, y_test = train_test_split(house_data.data, house_data.target, test_size = 0.3, random_state = 42)
scale = StandardScaler()
x_train_s = scale.fit_transform(x_train)
x_test_s = scale.transform(x_test)
return x_train_s, x_test_s, y_train, y_test
最终返回正则化后的训练集和验证集的输入数据,以及训练集与验证集的真值。
除了载入建立好的网络与数据,我们还需要设定优化器与损失函数。在这个MLP回归网络中,我们使用随机梯度优化器(SGD
)与均方误差损失函数(MSE loss
)。
其中draw_plot(Full_Epoch,train_loss_list,train_acc_list,val_loss_list,val_acc_list)
用于绘制模型训练过程中的loss值和ACC精度变化,精度评估使用r2_score
函数,即反映了样本真值与预测样本分布的决定系数,在0~1之间。
train_one_epoch(model, loss_func, epoch, epoch_size, epoch_size_val, gen, gen_val, Full_Epoch)
函数用于对网络进行一个Epoch的训练。
import matplotlib.pyplot as plt
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.metrics import explained_variance_score, mean_squared_error,
r2_score
from torch.utils.data import DataLoader, TensorDataset
from tqdm import tqdm
from data import dataset, dataset_swc
from net import MLP
def draw_plot(Full_Epoch,train_loss_list,train_acc_list,val_loss_list,val_acc_list):
x = range(0,Full_Epoch)
y1 = train_loss_list
y2 = val_loss_list
y3 = train_acc_list
y4 = val_acc_list
plt.figure(figsize=(13,13))
plt.subplot(2, 1, 1)
plt.plot(x,train_loss_list,color="blue",label="train_loss_list Line",linewidth=2)
plt.plot(x,val_loss_list,color="orange",label="val_loss_list Line",linewidth=2)
plt.title("Loss_curve",fontsize = 20)
plt.xlabel(xlabel = "Epochs", fontsize = 15)
plt.ylabel(ylabel = "Loss", fontsize = 15)
plt.legend()
plt.subplot(2, 1, 2)
plt.plot(x,train_acc_list,color="blue",label="train_acc_list Line",linewidth=2)
plt.plot(x,val_acc_list,color="orange",label="val_acc_list Line",linewidth=2)
plt.title("Acc_curve",fontsize = 20)
plt.xlabel(xlabel = "Epochs", fontsize = 15)
plt.ylabel(ylabel = "Accuracy", fontsize = 15)
plt.legend()
plt.savefig("Loss&acc.jpg")
def train_one_epoch(model, loss_func, epoch, epoch_size, epoch_size_val, gen, gen_val, Full_Epoch):
train_loss = 0
val_loss = 0
total_loss = 0
total_val_loss = 0
print(f"世代{epoch+1}开始训练")
with tqdm(total = epoch_size, desc = f'Epoch{epoch +1}/{Full_Epoch}', postfix = dict, mininterval = 0.3) as pbar:
for iteration, batch in enumerate(gen):
if iteration >= epoch_size:
break
data, target = batch
optimizer.zero_grad()
output = model(data)
loss = loss_func(output, target)
loss.backward()
optimizer.step()
total_loss += loss.item()
train_loss = total_loss / (iteration + 1)
train_acc = r2_score(target.detach().numpy(),output.detach().numpy())
pbar.set_postfix(**{"total_loss": train_loss,
"learning_rate:": 1e-3,
"Acc": train_acc})
pbar.update(1) #更新进度条
print(f"世代{epoch+1}权值更新完成")
print(f"开始评估世代{epoch+1}")
with tqdm(total = epoch_size_val, desc = f'Epoch{epoch +1}/{Full_Epoch}', postfix = dict, mininterval = 0.3) as pbar:
for iteration, batch in enumerate(gen_val):
if iteration >= epoch_size_val:
break
data, target = batch
optimizer.zero_grad()
output = model(data)
loss = loss_func(output, target)
total_val_loss += loss.item()
val_loss = total_val_loss / (iteration + 1)
val_acc = r2_score(target.detach().numpy(),output.detach().numpy())
pbar.set_postfix(**{"val_loss": val_loss,
"Acc": val_acc})
pbar.update(1) #更新进度条
if epoch+1 == Full_Epoch:
torch.save(model.state_dict(),
'weights/mlp_weights-epoch%d-Total_loss%.4f-val_loss%.4f.pkl'%((epoch+1),train_loss,val_loss / (iteration + 1)))
return train_loss, train_acc, val_loss, val_acc
if __name__ == "__main__":
Full_Epoch = 300
Batch_size = 64
lr = 1e-3
loss_and_acc_curve = True
train_loss_list = []
val_loss_list = []
train_acc_list = []
val_acc_list = []
x_train_s, x_test_s, y_train, y_test = dataset()
x_train_st = torch.from_numpy(x_train_s.astype(np.float32))
x_test_st = torch.from_numpy(x_test_s.astype(np.float32))
y_train_t = torch.from_numpy(y_train.astype(np.float32))
y_test_t = torch.from_numpy(y_test.astype(np.float32))
train_dataset = TensorDataset(x_train_st,y_train_t)
test_dataset = TensorDataset(x_test_st,y_test_t)
gen = DataLoader(train_dataset, batch_size = Batch_size, num_workers = 0,
pin_memory = True, drop_last = True, shuffle = True)
gen_val = DataLoader(test_dataset, batch_size = Batch_size, num_workers = 0,
pin_memory = True, drop_last = True, shuffle = True)
model = MLP()
optimizer = optim.SGD(model.parameters(), lr)
loss_func = nn.MSELoss()
epoch_size = y_train_t.size(0) // Batch_size
epoch_size_val = y_test_t.size(0) // Batch_size
for epoch in range(0,Full_Epoch):
train_loss, train_acc, val_loss, val_acc = train_one_epoch(model, loss_func, epoch, epoch_size, epoch_size_val, gen, gen_val, Full_Epoch)
train_loss_list.append(train_loss)
train_acc_list.append(train_acc)
val_loss_list.append(val_loss)
val_acc_list.append(val_acc)
if loss_and_acc_curve:
draw_plot(Full_Epoch,train_loss_list,train_acc_list,val_loss_list,val_acc_list)
在训练300个世代后,获得网络训练过程的loss值与acc值变化曲线。
网络的loss值在200个世代后基本稳定,精度变化也一直保持在0.5~0.75之间。
虽然MLP网络极为简单,且已有函数提供了快速的建立方法,但是通过手动建立可以提高对网络构建以及参数传播的理解,为更加深层次的卷积神经网络奠定基础。