输入: 训练集 D = { ( x k , y k ) } k = 1 m D=\left\{\left(\boldsymbol{x}_{k}, \boldsymbol{y}_{k}\right)\right\}_{k=1}^{m} D={(xk,yk)}k=1m
\quad\quad 学习率 η \eta η
过程:
1: 在(0,1)范围内随机初始化网络中所有连接权和阈值
2: repeat
3: \quad for all ( x k , y k ) ∈ D \left(x_{k}, y_{k}\right) \in D (xk,yk)∈D do
4: \quad\quad 根据当前参数计算当前样本的输出 y ^ k \hat{\boldsymbol{y}}_{k} y^k
5: \quad\quad 计算输出层神经元的梯度项 g j g_{j} gj
6: \quad\quad 计算隐层神经元的梯度项 e h e_{h} eh
7: \quad\quad 更新连接权 w h j , v i h w_{h j}, v_{i h} whj,vih 与闻值 θ j , γ h \theta_{j}, \gamma_{h} θj,γh
8: \quad end for
9: until 达到停止条件
输出:连接权与阈值确定的多层前馈神经网络
首先将西瓜数据集中的特征进行离散化后,建立数据集。
def dataSet():
# 西瓜数据集离散化
x = [[2, 3, 3, 2, 1, 2, 3, 3, 3, 2, 1, 1, 2, 1, 3, 1, 2],
[1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 1, 2, 2, 2, 1, 1],
[2, 3, 2, 3, 2, 2, 2, 2, 3, 1, 1, 2, 2, 3, 2, 2, 3],
[3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 1, 1, 2, 2, 3, 1, 2],
[1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 1, 1, 2, 3, 2],
[1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1],
[0.697, 0.774, 0.634, 0.668, 0.556, 0.403, 0.481, 0.437, 0.666, 0.243, 0.245, 0.343, 0.639, 0.657, 0.360,
0.593, 0.719],
[0.460, 0.376, 0.264, 0.318, 0.215, 0.237, 0.149, 0.211, 0.091, 0.267, 0.057, 0.099, 0.161, 0.198, 0.370,
0.042, 0.103]]
x = np.array(x).T # 样本属性集合,转置后的数据集⾏表示一个样本,列表示⼀个属性
y = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
y = np.array(y).T
return X, Y
定义激活函数,本文选用Sigmoid函数
def sigmod(x):
return 1.0 / (1.0 + np.exp(-x))
搭建神经网络
def bpstand(hideNum): # 标准的反向传播算法
X, Y = dataSet()
V = np.random.rand(X.shape[1], hideNum) # 权值及偏置初始化
V_b = np.random.rand(1, hideNum)
W = np.random.rand(hideNum, Y.shape[1])
W_b = np.random.rand(1, Y.shape[1])
rate = 0.1
error = 0.001
maxTrainNum = 1000000
trainNum = 0
loss = 10
while (loss > error) and (trainNum < maxTrainNum):
for k in range(X.shape[0]): # 标准bp方法一次只处理一个样本
H = sigmod(X[k, :].dot(V) - V_b) # 因为书上一直给出的是减去阈值,所以这里用减号。
Y_ = sigmod(H.dot(W) - W_b) # 其实大部分情况下人们都用的是加上偏置b这种表达方式
loss = sum((Y[k] - Y_) ** 2) * 0.5 # 改成加号后只需要在下面更新参数时也用加号即可
g = Y_ * (1 - Y_) * (Y[k] - Y_) # 计算相应的梯度,及更新参数。 此处特别注意维度的正确对应关系
e = H * (1 - H) * g.dot(W.T)
W += rate * H.T.dot(g)
W_b -= rate * g
V += rate * X[k].reshape(1, X[k].size).T.dot(e)
V_b -= rate * e
trainNum += 1
print("总训练次数:", trainNum)
print("最终损失:", loss)
print("V:", V)
print("V_b:", V_b)
print("W:", W)
print("W_b:", W_b)
主函数
if __name__ == '__main__': bpstand(5)
bpAccum(5)
全部代码
import numpy as np
def dataSet():
# 西瓜数据集离散化
x = [[2, 3, 3, 2, 1, 2, 3, 3, 3, 2, 1, 1, 2, 1, 3, 1, 2],
[1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 1, 2, 2, 2, 1, 1],
[2, 3, 2, 3, 2, 2, 2, 2, 3, 1, 1, 2, 2, 3, 2, 2, 3],
[3, 3, 3, 3, 3, 3, 2, 3, 2, 3, 1, 1, 2, 2, 3, 1, 2],
[1, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 1, 1, 2, 3, 2],
[1, 1, 1, 1, 1, 2, 2, 1, 1, 2, 1, 2, 1, 1, 2, 1, 1],
[0.697, 0.774, 0.634, 0.668, 0.556, 0.403, 0.481, 0.437, 0.666, 0.243, 0.245, 0.343, 0.639, 0.657, 0.360,
0.593, 0.719],
[0.460, 0.376, 0.264, 0.318, 0.215, 0.237, 0.149, 0.211, 0.091, 0.267, 0.057, 0.099, 0.161, 0.198, 0.370,
0.042, 0.103]]
x = np.array(x).T # 样本属性集合,转置后的数据集⾏表示一个样本,列表示⼀个属性
y = [1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0]
y = np.array(y).T
return X, Y
def sigmod(x):
return 1.0 / (1.0 + np.exp(-x))
def bpstand(hideNum): # 标准的反向传播算法
X, Y = dataSet()
V = np.random.rand(X.shape[1], hideNum) # 权值及偏置初始化
V_b = np.random.rand(1, hideNum)
W = np.random.rand(hideNum, Y.shape[1])
W_b = np.random.rand(1, Y.shape[1])
rate = 0.1
error = 0.001
maxTrainNum = 1000000
trainNum = 0
loss = 10
while (loss > error) and (trainNum < maxTrainNum):
for k in range(X.shape[0]): # 标准bp方法一次只处理一个样本
H = sigmod(X[k, :].dot(V) - V_b) # 因为书上一直给出的是减去阈值,所以这里用减号。
Y_ = sigmod(H.dot(W) - W_b) # 其实大部分情况下人们都用的是加上偏置b这种表达方式
loss = sum((Y[k] - Y_) ** 2) * 0.5 # 改成加号后只需要在下面更新参数时也用加号即可
g = Y_ * (1 - Y_) * (Y[k] - Y_) # 计算相应的梯度,及更新参数。 此处特别注意维度的正确对应关系
e = H * (1 - H) * g.dot(W.T)
W += rate * H.T.dot(g)
W_b -= rate * g
V += rate * X[k].reshape(1, X[k].size).T.dot(e)
V_b -= rate * e
trainNum += 1
print("总训练次数:", trainNum)
print("最终损失:", loss)
print("V:", V)
print("V_b:", V_b)
print("W:", W)
print("W_b:", W_b)
if __name__ == '__main__':
bpAccum(5)
因为Pytorch只能识别tensor格式的数据,所以输入数据集,并将其转化为Tensor的形式
# 输入dataset,并转化为Tensor形式
x = [[2,3,3,2,1,2,3,3,3,2,1,1,2,1,3,1,2],
[1,1,1,1,1,2,2,2,2,3,3,1,2,2,2,1,1],
[2,3,2,3,2,2,2,2,3,1,1,2,2,3,2,2,3],
[3,3,3,3,3,3,2,3,2,3,1,1,2,2,3,1,2],
[1,1,1,1,1,2,2,2,2,3,3,3,1,1,2,3,2],
[1,1,1,1,1,2,2,1,1,2,1,2,1,1,2,1,1],
[0.697,0.774,0.634,0.668,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719],
[0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103]]
x = np.array(x).T # 样本属性集合,转置后的数据集⾏表示一个样本,列表示⼀个属性
x = torch.from_numpy(x).type(torch.FloatTensor)
y = [1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]
y = np.array(y).T
y = torch.from_numpy(y).type(torch.LongTensor)
## 观察X、Y的维度,应该分别为torch.Size([m, n]),torch.Size([m]),m为样本个数,n为特征数,否则可能会报错RUNTimeError mismatch
# print(X.shape)
# print(Y.shape)
通常将数据集转化为TensorDataset,这样之后可以直接利用其中的功能,对数据集进行抽样等操作
# 转换成 torch 能识别的 Dataset
torch_dataset = Data.TensorDataset(x, y)
DataLoader可以对数据进行加载,取样,设置batch等
# 把 dataset 放入 DataLoader
# 设置batch的大小,此处设置为5,即每个batch含有5个样本
BATCH_SIZE = 5
loader = Data.DataLoader(
dataset=torch_dataset, # torch TensorDataset format
batch_size=BATCH_SIZE, # mini batch size
shuffle=True, # 要不要打乱数据 (打乱比较好)
num_workers=2, # 多线程来读数据
)
建立神经网络
# 建立神经网络
class bpnn(torch.nn.Module):
def __init__(self, n_features, n_hidden, n_output):
super(bpnn, self).__init__()
# 隐含层一层
self.hidden = torch.nn.Linear(n_features, n_hidden)
# 输出层
self.predict = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
#激活函数,此处选择sigmoid
x = torch.sigmoid(self.hidden(x))
out = F.log_softmax(self.predict(x), dim=1)
return out
设置神经网络,有8个特征,隐层输出数设置为16,有“好瓜”“坏瓜”两个类别,所以输出数量为2
fnet1 = bpnn(8, 16, 2)
print(fnet1)
设置优化器optimizer,选择随机梯度下降算法SGD,除此之外还有AdaGrad、RMSProp、Adam等;学习率lr设置为0.6
# optimizer 是训练的工具
optimizer = torch.optim.SGD(fnet1.parameters(), lr=0.6) # 传入 net 的所有参数, 输入学习率lr
loss_func = torch.nn.MSELoss()
开始训练
#开始训练
#记录次数与误差
px, py = [], []
i = 0
for epoch in range(100): # 训练所有整套数据 100 次
for step, (batch_x, batch_y) in enumerate(loader): # 每一步 loader 释放一小批数据用来学习
# # 打出来一些数据
# print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
# batch_x.numpy(), '| batch y: ', batch_y.numpy())
# 进行训练
# 数据集传入网络前向计算预测值
prediction = fnet1(x)
# 计算损失
loss = F.nll_loss(prediction, y)
# 清除网络状态
optimizer.zero_grad()
# 误差反向传播
loss.backward()
# 更新参数
optimizer.step()
px.append(2*epoch+step+1)
py.append(loss.item())
# 打印并记录当前的index和loss
# print(2*epoch+step+1, " loss: ", loss.item())
将损失函数的变化可视化
#可视化
plt.figure(figsize=(6, 4), dpi=144)
plt.plot(px, py, 'r-', lw=1)
plt.yticks([x * 0.1 for x in range(16)])
plt.show()
全部代码
import numpy as np
import torch.utils.data as Data
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.use('TkAgg')
# 输入dataset,并转化为Tensor形式
x = [[2,3,3,2,1,2,3,3,3,2,1,1,2,1,3,1,2],
[1,1,1,1,1,2,2,2,2,3,3,1,2,2,2,1,1],
[2,3,2,3,2,2,2,2,3,1,1,2,2,3,2,2,3],
[3,3,3,3,3,3,2,3,2,3,1,1,2,2,3,1,2],
[1,1,1,1,1,2,2,2,2,3,3,3,1,1,2,3,2],
[1,1,1,1,1,2,2,1,1,2,1,2,1,1,2,1,1],
[0.697,0.774,0.634,0.668,0.556,0.403,0.481,0.437,0.666,0.243,0.245,0.343,0.639,0.657,0.360,0.593,0.719],
[0.460,0.376,0.264,0.318,0.215,0.237,0.149,0.211,0.091,0.267,0.057,0.099,0.161,0.198,0.370,0.042,0.103]]
x = np.array(x).T # 样本属性集合,转置后的数据集⾏表示一个样本,列表示⼀个属性
x = torch.from_numpy(x).type(torch.FloatTensor)
y = [1,1,1,1,1,1,1,1,0,0,0,0,0,0,0,0,0]
y = np.array(y).T
y = torch.from_numpy(y).type(torch.LongTensor)
## 观察X、Y的维度,应该分别为torch.Size([m, n]),torch.Size([m]),m为样本个数,n为特征数,否则可能会报错
# print(X.shape)
# print(Y.shape)
# 转换成 torch 能识别的 Dataset
torch_dataset = Data.TensorDataset(x, y)
# 把 dataset 放入 DataLoader
# 设置batch的大小,此处设置为5,即每个batch含有5个样本
BATCH_SIZE = 5
loader = Data.DataLoader(
dataset=torch_dataset, # torch TensorDataset format
batch_size=BATCH_SIZE, # mini batch size
shuffle=True, # 要不要打乱数据 (打乱比较好)
num_workers=2, # 多线程来读数据
)
# 建立神经网络
class bpnn(torch.nn.Module):
def __init__(self, n_features, n_hidden, n_output):
super(bpnn, self).__init__()
# 隐含层一层
self.hidden = torch.nn.Linear(n_features, n_hidden)
# 输出层
self.predict = torch.nn.Linear(n_hidden, n_output)
def forward(self, x):
#激活函数,此处选择sigmoid
x = torch.sigmoid(self.hidden(x))
out = F.log_softmax(self.predict(x), dim=1)
return out
# 特征数为8,隐层输出数设为16,输出值为2(类别数)
fnet1 = bpnn(8, 16, 2)
print(fnet1)
# optimizer 是训练的工具
optimizer = torch.optim.SGD(fnet1.parameters(), lr=0.6) # 传入 net 的所有参数, 输入学习率lr
loss_func = torch.nn.MSELoss()
#开始训练
#记录次数与误差
px, py = [], []
i = 0
for epoch in range(100): # 训练所有!整套!数据 100 次
for step, (batch_x, batch_y) in enumerate(loader): # 每一步 loader 释放一小批数据用来学习
# # 打出来一些数据
# print('Epoch: ', epoch, '| Step: ', step, '| batch x: ',
# batch_x.numpy(), '| batch y: ', batch_y.numpy())
# 进行训练
# 数据集传入网络前向计算预测值
prediction = fnet1(x)
# 计算损失
loss = F.nll_loss(prediction, y)
# 清除网络状态
optimizer.zero_grad()
# 误差反向传播
loss.backward()
# 更新参数
optimizer.step()
px.append(2*epoch+step+1)
py.append(loss.item())
# 打印并记录当前的index和loss
# print(2*epoch+step+1, " loss: ", loss.item())
#可视化
plt.figure(figsize=(6, 4), dpi=144)
plt.plot(px, py, 'r-', lw=1)
plt.yticks([x * 0.1 for x in range(16)])
plt.show()