模型构建步骤:
1.定义:找到目标函数,将输入属性映射为输出属性。
2.模型假设:用一条直线拟合数据
3.模型评估:采用损失函数来评价预测值和真实值的接近程度。argmin是机器学习常用的函数,用来寻找使损失函数最小时的参数取值。
4.梯度下降算法:也叫最速下降法。用来求代价函数(损失函数或者叫目标函数)的最小值。基本思想:随机选择一组参数初始值,计算损失或者代价,然后寻找能让代价下降最多的另一组参数,反复迭代直至达到一个局部最优。
数据集为奥运会自由泳冠军数据,
创建文件display_olympics_freestyle100m.py
添加代码如下:
import torch
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib as mpl
# 防止plt汉字乱码
mpl.rcParams[u'font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
def main():
# 加载奥运会数据
file_path = "../datasets/Freestyle100m.csv"
dataset = pd.read_csv(file_path)
dataset_tensor = torch.tensor(dataset.values) # 转换为tensor
x = dataset_tensor[:, :-1]
y = dataset_tensor[:, -1]
# 绘图
plt.plot(x.numpy(), y.numpy(), 'o')
plt.xlabel(u'奥运会年')
plt.ylabel(u'取胜时间(秒)')
plt.show()
if __name__ == "__main__":
main()
横坐标为奥运举办年份,纵坐标为取胜时间
思路:
1.建立线性回归模型;
2.损失函数及损失函数求导
3.利用损失函数求导,实现梯度函数
梯度下降的收敛可以通过设置最大迭代次数或者判断参数不再改变来确定已经收敛。更新参数使用全部样本成为批量梯度下降法(BGD);使用一个样本成为随机梯度下降法(SGD);使用部分样本叫小批量梯度下降法,兼顾前两种优缺点。
4.重复:计算预测值(前向传播),然后计算损失和梯度;更新参数;
创建文件linear_regression_from_scratch.py
实现代码如下:
# -*- coding: utf-8 -*-
"""
从头开始实现线性回归模型
"""
import torch
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib as mpl
# 防止plt汉字乱码
mpl.rcParams[u'font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
def model(x, w, b):
""" 回归模型 """
return w * x + b
def loss_fn(y_pred, y):
""" 损失函数 """
loss = (y_pred - y) ** 2
return loss.mean()
def grad_loss_fn(y_pred, y):
""" 损失函数求导 """
return 2 * (y_pred - y)
def grad_fn(x, y, y_pred):
""" 梯度函数 """
grad_w = grad_loss_fn(y_pred, y) * x
grad_b = grad_loss_fn(y_pred, y)
return torch.stack([grad_w.mean(), grad_b.mean()])
def model_training(x, y, n_epochs, learning_rate, params, print_params=True):
""" 训练 """
for epoch in range(1, n_epochs + 1):
w, b = params
# 前向传播
y_pred = model(x, w, b)
# 计算损失
loss = loss_fn(y_pred, y)
# 梯度
grad = grad_fn(x, y, y_pred)
# 更新参数
params = params - learning_rate * grad
if epoch == 1 or epoch % 500 == 1:
print('轮次:%d,\t损失:%f' % (epoch, float(loss)))
if print_params:
print(f'参数:{params.detach().numpy()}')
print(f'梯度:{grad.detach().numpy()}')
return params
def main():
# 加载奥运会数据
file_path = "../datasets/Freestyle100m.csv"
dataset = pd.read_csv(file_path)
dataset_tensor = torch.tensor(dataset.values) # 转换为tensor
xx = dataset_tensor[:, 0]
y = dataset_tensor[:, -1]
# 为方便数值运算,将原来的举办年减去第一届奥运会年
x = xx - xx[0]
# 模型参数初始化
w = torch.zeros(1)
b = torch.zeros(1)
# 直接用未优化的模型来预测
y_pred = model(x, w, b)
print('未优化的模型的预测结果:', y_pred.detach().numpy())
loss = loss_fn(y_pred, y)
print('未优化的模型的损失:', loss.detach().numpy())
params = model_training(
x=x,
y=y,
n_epochs=50000,
learning_rate=0.00017,
params=torch.tensor([0.0, 0.0]))
print('梯度下降找到的w和b:%f %f \n' % (params[0], params[1]))
y_pred = model(x, *params)
print(f'优化后的模型的预测结果:{y_pred.detach().numpy()}\n')
# 绘图
plt.plot(xx.numpy(), y_pred.detach().numpy(), label=u'模型')
plt.plot(xx.numpy(), y.numpy(), 'o', label=u'数据')
plt.xlabel(u'奥运会年')
plt.ylabel(u'取胜时间(秒)')
plt.legend()
plt.show()
if __name__ == "__main__":
main()
运行结果:
pytorch提供自动求导功能,计算梯度。只要提供前向表达式,就可以计算任意节点梯度。
只需要在张量的构造中添加requires_grad=True参数即可。
detach().requires_grad_()表示将张量分离同时设置requires_grad=True。
创建文件linear_regression_autograd.py
完整实现代码如下:
# -*- coding: utf-8 -*-
"""
使用自动求导训练线性回归模型
"""
import torch
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib as mpl
# 防止plt汉字乱码
mpl.rcParams[u'font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
def model(x, w, b):
""" 回归模型 """
return w * x + b
def loss_fn(y_pred, y):
""" 损失函数 """
loss = (y_pred - y) ** 2
return loss.mean()
def model_training(x, y, params, n_epochs, learning_rate):
""" 训练 """
for epoch in range(1, n_epochs + 1):
# 梯度清零
if params.grad is not None:
params.grad.zero_()
# 前向传播
y_pred = model(x, *params)
# 计算损失
loss = loss_fn(y_pred, y)
# 反向传播
loss.backward()
# 更新参数
params = (params - learning_rate * params.grad).detach().requires_grad_()
if epoch == 1 or epoch % 500 == 0:
print('轮次:%d,\t损失:%f' % (epoch, float(loss)))
return params
def main():
# 加载奥运会数据
file_path = "../datasets/Freestyle100m.csv"
dataset = pd.read_csv(file_path)
dataset_tensor = torch.tensor(dataset.values) # 转换为tensor
xx = dataset_tensor[:, 0]
y = dataset_tensor[:, -1]
# 为方便数值运算,将原来的举办年减去第一届奥运会年
x = xx - xx[0]
# 初始化模型参数
params = torch.tensor([0.0, 0.0], requires_grad=True)
print('params.grad is None: ', params.grad is None)
loss = loss_fn(model(x, *params), y)
loss.backward()
print(params.grad)
if params.grad is not None:
params.grad.zero_()
params = model_training(
x=x,
y=y,
params=torch.tensor([0.0, 0.0], requires_grad=True),
n_epochs=50000,
learning_rate=0.00017)
print('梯度下降找到的w和b:%f %f \n' % (params[0], params[1]))
y_pred = model(x, *params)
print(f'优化后的模型的预测结果:{y_pred.detach().numpy()}\n')
# 绘图
plt.plot(xx.numpy(), y_pred.detach().numpy(), label=u'模型')
plt.plot(xx.numpy(), y.numpy(), 'o', label=u'数据')
plt.xlabel(u'奥运会年')
plt.ylabel(u'取胜时间(秒)')
plt.legend()
plt.show()
if __name__ == "__main__":
main()
优化器(optimizer)用来优化梯度计算。包括SGD 优化,Adam。
创建文件linear_regression_optimizers.py
完整实现代码如下:
# -*- coding: utf-8 -*-
"""
使用优化器训练线性回归模型
"""
import torch
import torch.optim as optim
import pandas as pd
from matplotlib import pyplot as plt
import matplotlib as mpl
# 防止plt汉字乱码
mpl.rcParams[u'font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
def model(x, w, b):
""" 回归模型 """
return w * x + b
def loss_fn(y_pred, y):
""" 损失函数 """
loss = (y_pred - y) ** 2
return loss.mean()
def model_training(x, y, params, n_epochs, optimizer):
""" 训练 """
for epoch in range(1, n_epochs + 1):
# 前向传播
p_pred = model(x, *params)
# 计算损失
loss = loss_fn(p_pred, y)
# 梯度清零
optimizer.zero_grad()
# 反向传播
loss.backward()
# 更新参数
optimizer.step()
if epoch == 1 or epoch % 500 == 0:
print('轮次:%d,\t损失:%f' % (epoch, float(loss)))
return params
def main():
# 加载奥运会数据
file_path = "../datasets/Freestyle100m.csv"
dataset = pd.read_csv(file_path)
dataset_tensor = torch.tensor(dataset.values) # 转换为tensor
xx = dataset_tensor[:, 0]
y = dataset_tensor[:, -1]
# 为方便数值运算,将原来的举办年减去第一届奥运会年
x = xx - xx[0]
# 参数
params = torch.tensor([0.0, 0.0], requires_grad=True)
learning_rate = 0.00017
# 使用optim模块中的现成优化器
optimizer = optim.SGD([params], lr=learning_rate)
# 手工做一次优化
y_pred = model(x, *params)
loss = loss_fn(y_pred, y)
# 梯度清零
optimizer.zero_grad()
# 优化
loss.backward()
optimizer.step()
# 打印一次优化后的参数
print('一次优化后的参数:', params)
# 手工再做一次优化
params = torch.tensor([0.0, 0.0], requires_grad=True)
optimizer = optim.SGD([params], lr=learning_rate)
y_pred = model(x, *params)
loss = loss_fn(y_pred, y)
# 梯度清零
optimizer.zero_grad()
# 优化
loss.backward()
optimizer.step()
# 打印再一次优化后的参数
print('再一次优化后的参数:', params)
print('\n使用SGD训练模型')
params = torch.tensor([0.0, 0.0], requires_grad=True)
optimizer = optim.SGD([params], lr=learning_rate)
params = model_training(
x=x,
y=y,
params=params,
n_epochs=50000,
optimizer=optimizer)
print('SGD优化器找到的w和b:%f %f \n' % (params[0], params[1]))
print('\n使用Adam再训练一次模型')
# 注意学习率和训练轮次都不同于SGD,说明Adam是高级优化函数
params = torch.tensor([0.0, 0.0], requires_grad=True)
learning_rate = 1e-1
optimizer = optim.Adam([params], lr=learning_rate)
params = model_training(
x=x,
y=y,
params=params,
n_epochs=2000,
optimizer=optimizer)
print('Adam优化器找到的w和b:%f %f \n' % (params[0], params[1]))
# 再做一次预测
y_pred = model(x, *params)
# 绘图
plt.plot(xx.numpy(), y_pred.detach().numpy(), label=u'模型')
plt.plot(xx.numpy(), y.numpy(), 'o', label=u'数据')
plt.xlabel(u'奥运会年')
plt.ylabel(u'取胜时间(秒)')
plt.legend()
plt.show()
if __name__ == "__main__":
main()
pytorch提供nn模块,包含有Lineaar、Sequential、Model和各种损失函数。
步骤:
1.加载数据集,划分划分训练集验证集
2.定义线性模型
3.设置优化器
4.分别使用自定义的损失函数和nn模块使用开箱即用的损失函数
# -*- coding: utf-8 -*-
"""
使用torch.nn.Linear训练线性回归模型
"""
import torch
import torch.optim as optim
import torch.nn as nn
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib as mpl
# 防止plt汉字乱码
mpl.rcParams[u'font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
def loss_fn(y_pred, y):
""" 损失函数 """
loss = (y_pred - y) ** 2
return loss.mean()
def model_training(x_train, y_train, x_val, y_val, n_epochs, optimizer, model, loss_fn):
""" 训练 """
for epoch in range(1, n_epochs + 1):
# 前向传播
y_pred_train = model(x_train)
loss_train = loss_fn(y_pred_train, y_train)
with torch.no_grad():
y_pred_val = model(x_val)
loss_val = loss_fn(y_pred_val, y_val)
# 梯度清零
optimizer.zero_grad()
# 反向传播
loss_train.backward()
# 更新参数
optimizer.step()
if epoch == 1 or epoch % 100 == 0:
print(f'轮次:{epoch},\t训练损失:{float(loss_train)},\t验证损失:{float(loss_val)}')
def main():
# 加载奥运会数据
file_path = "../datasets/Freestyle100m.csv"
dataset = pd.read_csv(file_path)
dataset_tensor = torch.tensor(dataset.values) # 转换为tensor
xx = dataset_tensor[:, 0]
y = dataset_tensor[:, -1]
# 为方便数值运算,将原来的举办年减去第一届奥运会年
x = xx - xx[0]
# 扩充一维,变为nn可以接受的shape和数据类型
x = x.unsqueeze(1).float()
y = y.unsqueeze(1).float()
# 划分训练集验证集
n_samples = x.shape[0]
n_train = int(0.7 * n_samples)
# 随机置乱
shuffled_idx = torch.randperm(n_samples)
train_idx = shuffled_idx[:n_train]
val_idx = shuffled_idx[n_train:]
x_train = x[train_idx]
y_train = y[train_idx]
x_val = x[val_idx]
y_val = y[val_idx]
# 定义模型
linear_model = nn.Linear(1, 1)
# 使用optim模块中的现成优化器
learning_rate = 1e-1
epochs = 3000
optimizer = optim.Adam(linear_model.parameters(), lr=learning_rate)
# 使用自定义的损失函数
model_training(
x_train=x_train,
y_train=y_train,
x_val=x_val,
y_val=y_val,
n_epochs=epochs,
optimizer=optimizer,
model=linear_model,
loss_fn=loss_fn)
print()
print(linear_model.weight)
print(linear_model.bias)
linear_model = nn.Linear(1, 1)
optimizer = optim.Adam(linear_model.parameters(), lr=learning_rate)
# 使用开箱即用的损失函数
model_training(
x_train=x_train,
y_train=y_train,
x_val=x_val,
y_val=y_val,
n_epochs=epochs,
optimizer=optimizer,
model=linear_model,
loss_fn=nn.MSELoss())
print('优化后的模型参数')
print(linear_model.weight)
print(linear_model.bias)
# 绘图
plt.plot(xx.numpy(), y.numpy(), 'go', label=u'数据')
x_range = torch.arange(min(xx), max(xx)).unsqueeze(1)
plt.plot(x_range.numpy(), linear_model(x_range - xx[0]).detach().numpy(), 'r-', label=u'模型')
plt.plot(xx.numpy(), linear_model(x).detach().numpy(), 'bx', label=u'预测')
plt.xlabel(u'奥运会年')
plt.ylabel(u'取胜时间(秒)')
plt.legend()
plt.show()
if __name__ == "__main__":
main()
Sequential是一种容器,添加并连接模块,最终形成一个网络模型。
创建文件linear_regression_nn_sequential.py
添加代码如下:
# -*- coding: utf-8 -*-
"""
使用torch.nn.Sequential类训练线性回归模型
"""
import torch
import torch.optim as optim
import torch.nn as nn
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib as mpl
# 防止plt汉字乱码
mpl.rcParams[u'font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
def model_training(x_train, y_train, x_val, y_val, n_epochs, optimizer, model, loss_fn):
""" 训练 """
for epoch in range(1, n_epochs + 1):
# 前向传播
y_pred_train = model(x_train)
loss_train = loss_fn(y_pred_train, y_train)
y_pred_val = model(x_val)
loss_val = loss_fn(y_pred_val, y_val)
# 梯度清零
optimizer.zero_grad()
# 反向传播
loss_train.backward()
# 更新参数
optimizer.step()
if epoch == 1 or epoch % 100 == 0:
print(f'轮次:{epoch},\t训练损失:{float(loss_train)},\t验证损失:{float(loss_val)}')
def main():
learning_rate = 1e-1
# 设置随机数种子
seed = 1
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
# 加载奥运会数据
file_path = "../datasets/Freestyle100m.csv"
dataset = pd.read_csv(file_path)
dataset_tensor = torch.tensor(dataset.values) # 转换为tensor
xx = dataset_tensor[:, 0]
y = dataset_tensor[:, -1]
# 为方便数值运算,将原来的举办年减去第一届奥运会年
x = xx - xx[0]
# 扩充一维,变为nn可以接受的shape和数据类型
x = x.unsqueeze(1).float()
y = y.unsqueeze(1).float()
# 划分训练集验证集
n_samples = x.shape[0]
n_trin = int(0.7 * n_samples)
# 随机置乱
shuffled_idx = torch.randperm(n_samples)
train_idx = shuffled_idx[:n_trin]
val_idx = shuffled_idx[n_trin:]
x_train = x[train_idx]
y_train = y[train_idx]
x_val = x[val_idx]
y_val = y[val_idx]
# 定义模型
# 读者可尝试使用两种模型定义中的一种,看看有什么区别
# 第一种定义
# seq_model = nn.Sequential(
# nn.Linear(1, 10),
# nn.LogSigmoid(),
# nn.Linear(10, 1)
# )
# 第二种定义
from collections import OrderedDict
seq_model = nn.Sequential(OrderedDict([
('hidden_linear', nn.Linear(1, 10)),
('hidden_activation', nn.LogSigmoid()),
('output_linear', nn.Linear(10, 1))
]))
# 打印模型信息
print(seq_model)
for name, param in seq_model.named_parameters():
print("{:21} {:19} {}".format(name, str(param.shape), param.numel()))
optimizer = optim.Adam(seq_model.parameters(), lr=learning_rate)
# 使用开箱即用的损失函数
model_training(
x_train=x_train,
y_train=y_train,
x_val=x_val,
y_val=y_val,
n_epochs=3000,
optimizer=optimizer,
model=seq_model,
loss_fn=nn.MSELoss())
print('优化后的模型参数')
for name, param in seq_model.named_parameters():
print(name, param)
# 绘图
plt.plot(xx.numpy(), y.numpy(), 'go', label=u'数据')
x_range = torch.arange(min(xx), max(xx)).unsqueeze(1)
plt.plot(x_range.numpy(), seq_model(x_range - xx[0]).detach().numpy(), 'r-', label=u'模型')
plt.plot(xx.numpy(), seq_model(x).detach().numpy(), 'bx', label=u'预测')
plt.xlabel(u'奥运会年')
plt.ylabel(u'取胜时间(秒)')
plt.legend()
plt.show()
if __name__ == "__main__":
main()
使用nn.Module实现自定义线性模型。
创建文件linear_regression_nn_module.py
添加代码如下:
# -*- coding: utf-8 -*-
"""
使用torch.nn.Module训练线性回归模型
"""
import torch
import torch.optim as optim
import torch.nn as nn
from matplotlib import pyplot as plt
import pandas as pd
import matplotlib as mpl
# 防止plt汉字乱码
mpl.rcParams[u'font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
class LinearModel(nn.Module):
""" 自定义模型 """
def __init__(self):
super().__init__()
self.hidden_linear = nn.Linear(1, 10)
self.activation = nn.LogSigmoid()
self.output_linear = nn.Linear(10, 1)
def forward(self, input):
output = self.hidden_linear(input)
output = self.activation(output)
output = self.output_linear(output)
return output
def model_training(x_train, y_train, x_val, y_val, n_epochs, optimizer, model, loss_fn):
""" 训练 """
for epoch in range(1, n_epochs + 1):
# 前向传播
y_pred_train = model(x_train)
loss_train = loss_fn(y_pred_train, y_train)
y_pred_val = model(x_val)
loss_val = loss_fn(y_pred_val, y_val)
# 梯度清零
optimizer.zero_grad()
# 反向传播
loss_train.backward()
# 更新参数
optimizer.step()
if epoch == 1 or epoch % 100 == 0:
print(f'轮次:{epoch},\t训练损失:{float(loss_train)},\t验证损失:{float(loss_val)}')
def main():
learning_rate = 1e-1
# 设置随机数种子
seed = 1
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
# 加载奥运会数据
file_path = "../datasets/Freestyle100m.csv"
dataset = pd.read_csv(file_path)
dataset_tensor = torch.tensor(dataset.values) # 转换为tensor
xx = dataset_tensor[:, 0]
y = dataset_tensor[:, -1]
# 为方便数值运算,将原来的举办年减去第一届奥运会年
x = xx - xx[0]
# 扩充一维,变为nn可以接受的shape和数据类型
x = x.unsqueeze(1).float()
y = y.unsqueeze(1).float()
# 划分训练集验证集
n_samples = x.shape[0]
n_trin = int(0.7 * n_samples)
# 随机置乱
shuffled_idx = torch.randperm(n_samples)
train_idx = shuffled_idx[:n_trin]
val_idx = shuffled_idx[n_trin:]
x_train = x[train_idx]
y_train = y[train_idx]
x_val = x[val_idx]
y_val = y[val_idx]
# 定义模型
linear_model = LinearModel()
# 打印模型信息
print(linear_model)
for name, param in linear_model.named_parameters():
print("{:21} {:19} {}".format(name, str(param.shape), param.numel()))
optimizer = optim.Adam(linear_model.parameters(), lr=learning_rate)
# 使用开箱即用的损失函数
model_training(
x_train=x_train,
y_train=y_train,
x_val=x_val,
y_val=y_val,
n_epochs=3000,
optimizer=optimizer,
model=linear_model,
loss_fn=nn.MSELoss())
print('优化后的模型参数')
for name, param in linear_model.named_parameters():
print(name, param)
# 绘图
plt.plot(xx.numpy(), y.numpy(), 'go', label=u'数据')
x_range = torch.arange(min(xx), max(xx)).unsqueeze(1)
plt.plot(x_range.numpy(), linear_model(x_range - xx[0]).detach().numpy(), 'r-', label=u'模型')
plt.plot(xx.numpy(), linear_model(x).detach().numpy(), 'bx', label=u'预测')
plt.xlabel(u'奥运会年')
plt.ylabel(u'取胜时间(秒)')
plt.legend()
plt.show()
if __name__ == "__main__":
main()
线性回归解决回归问题;逻辑回归主要解决分类问题。区别在于前者目标值连续;后者是离散的。
可以将逻辑回归视为神经网络的一个神经元
思路:
1.假设函数:使用激活函数求输出。Sigmoid函数很好模拟了阶跃函数
2.代价函数:求代价使用负对数似然代价函数表示。
3.逻辑回归梯度下降算法
创建文件logistic_regression_from_scratch.py
添加代码如下:
# -*- coding: utf-8 -*-
"""
从头开始实现逻辑回归模型
"""
import torch
import numpy as np
from matplotlib import pyplot as plt
import matplotlib as mpl
# 防止plt汉字乱码
mpl.rcParams[u'font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
def sigmoid(z):
""" S型激活函数 """
g = 1 / (1 + torch.exp(-z))
return g
def model(x, w, b):
""" 逻辑回归模型 """
return sigmoid(x.mv(w) + b)
def loss_fn(y_pred, y):
""" 损失函数 """
loss = - y.mul(y_pred.view_as(y)) - (1 - y).mul(1 - y_pred.view_as(y))
return loss.mean()
def grad_loss_fn(y_pred, y):
""" 损失函数求导 """
return y_pred.view_as(y) - y
def grad_fn(x, y, y_pred):
""" 梯度函数 """
grad_w = grad_loss_fn(y_pred, y) * x
grad_b = grad_loss_fn(y_pred, y)
return torch.cat((grad_w.mean(dim=0), grad_b.mean().unsqueeze(0)), 0)
def model_training(x, y, n_epochs, learning_rate, params, print_params=True):
""" 训练 """
for epoch in range(1, n_epochs + 1):
w, b = params[: -1], params[-1]
# 前向传播
y_pred = model(x, w, b)
# 计算损失
loss = loss_fn(y_pred, y)
# 梯度
grad = grad_fn(x, y, y_pred)
# 更新参数
params -= learning_rate * grad
if epoch == 1 or epoch % 10 == 1:
print('轮次:%d,\t损失:%f' % (epoch, float(loss)))
if print_params:
print(f'参数:{params.detach().numpy()}')
print(f'梯度:{grad.detach().numpy()}')
return params
def generate_data():
""" 随机生成数据 """
np.random.seed(0)
# 使用二个高斯分布来生成随机数据
phi = 0.6
means = [[0.0, -3.2], [1.0, 3.5]]
covs = np.zeros((2, 2, 2))
covs[0] = [[0.58, -0.05], [-0.05, 1.55]]
covs[1] = [[0.65, -0.15], [-0.15, 1.12]]
priors = [1 - phi, phi]
n = 100
x = np.zeros((n, 2))
y = np.zeros((n, 1))
# 选择一个高斯,并随机抽样数据
for i in range(n):
comp = np.random.choice(2, p=priors)
x[i] = np.random.multivariate_normal(means[comp], covs[comp], 1)
y[i] = comp
return torch.from_numpy(x).float(), torch.from_numpy(y).float()
def plot_decision_boundary(x, y, theta):
"""
绘制二元分类问题的决策边界
输入参数
x:输入,y:输出,theta:参数
输出参数
无
"""
plt.figure()
neg_x = x[np.where(y[:, 0] == 0)]
pos_x = x[np.where(y[:, 0] == 1)]
neg = plt.scatter(neg_x[:, 0], neg_x[:, 1], c='b', marker='o')
pos = plt.scatter(pos_x[:, 0], pos_x[:, 1], c='r', marker='+')
# 绘制决策边界
# 只需要两点便可以定义一条直线,选择两个端点
plot_x = np.array([min(x[:, 0]), max(x[:, 0])])
# 计算决策边界线,theta0 + theta1*x + theta2*y = 0
# 已知x,可计算y
plot_y = np.dot(np.divide(-1, theta[1]), (theta[2] + np.dot(theta[0], plot_x)))
lr, = plt.plot(plot_x, plot_y, 'g') # 绘制拟合直线
# 坐标
plt.xlabel('x1')
plt.ylabel('x2')
# 图例
plt.legend([neg, pos, lr], ['负例', '正例', u'决策边界'], loc='lower right')
plt.show()
def main():
# 随机生成数据
x, y = generate_data()
# 模型参数初始化
w = torch.zeros(2)
b = torch.zeros(1)
# 直接用未优化的模型来预测
y_pred = model(x, w, b)
print('未优化的模型的预测结果:', y_pred.detach().numpy())
loss = loss_fn(y_pred, y)
print('未优化的模型的损失:', loss.detach().numpy())
params = model_training(
x=x,
y=y,
n_epochs=500,
learning_rate=0.1,
params=torch.tensor([0.0, 0.0, 0.0]))
print(f'梯度下降找到的w和b:{params.numpy()} \n')
y_pred = model(x, params[: -1], params[-1])
print(f'优化后的模型的预测结果:{y_pred.detach().numpy()}\n')
# 绘制决策边界
plot_decision_boundary(x, y, params)
if __name__ == "__main__":
main()
使用nn.Sequential定义模型;nn.Sigmoid定义激活函数。
创建文件logistic_regression_concise.py
添加代码如下:
# -*- coding: utf-8 -*-
"""
简洁实现逻辑回归模型
"""
import torch
import torch.optim as optim
import torch.nn as nn
import numpy as np
from matplotlib import pyplot as plt
import matplotlib as mpl
# 防止plt汉字乱码
mpl.rcParams[u'font.sans-serif'] = ['SimHei']
mpl.rcParams['axes.unicode_minus'] = False
def model_training(x, y, n_epochs, optimizer, model, loss_fn):
""" 训练 """
for epoch in range(1, n_epochs + 1):
# 前向传播
y_pred = model(x)
# 计算损失
loss = loss_fn(y_pred, y)
# 梯度清零
optimizer.zero_grad()
# 反向传播
loss.backward()
# 更新参数
optimizer.step()
if epoch == 1 or epoch % 10 == 1:
print('轮次:%d,\t损失:%f' % (epoch, float(loss)))
def generate_data():
""" 随机生成数据 """
np.random.seed(0)
# 使用二个高斯分布来生成随机数据
phi = 0.6
means = [[0.0, -3.2], [1.0, 3.5]]
covs = np.zeros((2, 2, 2))
covs[0] = [[0.58, -0.05], [-0.05, 1.55]]
covs[1] = [[0.65, -0.15], [-0.15, 1.12]]
priors = [1 - phi, phi]
n = 100
x = np.zeros((n, 2))
y = np.zeros((n, 1))
# 选择一个高斯,并随机抽样数据
for i in range(n):
comp = np.random.choice(2, p=priors)
x[i] = np.random.multivariate_normal(means[comp], covs[comp], 1)
y[i] = comp
return torch.from_numpy(x).float(), torch.from_numpy(y).float()
def plot_decision_boundary(x, y, model):
"""
绘制二元分类问题的决策边界
输入参数
x:输入,y:输出,model:模型
输出参数
无
"""
plt.figure()
neg_x = x[np.where(y[:, 0] == 0)]
pos_x = x[np.where(y[:, 0] == 1)]
neg = plt.scatter(neg_x[:, 0], neg_x[:, 1], c='b', marker='o')
pos = plt.scatter(pos_x[:, 0], pos_x[:, 1], c='r', marker='+')
# 绘制决策边界
# 网格范围
u = np.linspace(min(x[:, 0]), max(x[:, 0]), 150)
v = np.linspace(min(x[:, 1]), max(x[:, 1]), 150)
uu, vv = np.meshgrid(u, v) # 生成网格数据
z = model(torch.cat((torch.from_numpy(uu.ravel()).float().unsqueeze(1),
torch.from_numpy(vv.ravel()).float().unsqueeze(1)), 1))
# 保持维度一致
z = z.detach().numpy().reshape(uu.shape)
# 画图
plt.contour(uu, vv, z, 0)
# 坐标
plt.xlabel('x1')
plt.ylabel('x2')
# 图例
plt.legend([neg, pos], ['负例', '正例'], loc='lower right')
plt.show()
def main():
# 随机生成数据
x, y = generate_data()
# 定义模型
seq_model = nn.Sequential(
nn.Linear(2, 1),
nn.Sigmoid()
)
# 打印模型信息
print(seq_model)
# 模型参数初始化
print('\n使用SGD训练模型')
learning_rate = 0.1
# 使用optim模块中的现成优化器
optimizer = optim.SGD(seq_model.parameters(), lr=learning_rate)
model_training(
x=x,
y=y,
n_epochs=500,
optimizer=optimizer,
model=seq_model,
loss_fn=nn.BCELoss()
)
print('优化后的模型参数')
for name, param in seq_model.named_parameters():
print(name, param)
y_pred = seq_model(x)
print(f'优化后的模型的预测结果:{y_pred.detach().numpy()}\n')
# 绘制决策边界
plot_decision_boundary(x, y, seq_model)
if __name__ == "__main__":
main()
解决多元分类问题可以使用softmax回归。softmax回归的输出单元数k>2,对应k个类别的预测概率分布。训练好模型后,选择概率最大的类别作为预测类别。
思想:
1.假设函数,概率分布归一化
2.代价函数使用交叉熵
创建文件softmax_regression_from_scratch.py
添加代码如下:
# -*- coding: utf-8 -*-
"""
从头开始实现softmax回归
"""
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils import data
import numpy as np
# 超参数
num_inputs = 784
num_outputs = 10
def softmax(z):
"""
实现Softmax激活函数。每一个Z减去一个max值是为了避免数值溢出
输入参数:
z:二维Tensor数组
返回:
a:softmax(z)输出,与z的shape一致
"""
z_rescale = z - torch.max(z, dim=1, keepdim=True)[0]
a = torch.exp(z_rescale) / torch.sum(torch.exp(z_rescale), dim=1, keepdim=True)
assert (a.shape == z.shape)
return a
def model(x, w, b):
""" 定义Softmax模型 """
return softmax(torch.mm(x.view((-1, num_inputs)), w) + b)
def cross_entropy(y_hat, y):
""" 计算交叉熵 """
return - torch.log(y_hat.gather(1, y.view(-1, 1)))
def grad_loss_fn(y_pred, y):
""" 损失函数求导 """
return y_pred - torch.nn.functional.one_hot(y, num_classes=10)
def grad_fn(x, y, y_pred):
""" 梯度函数 """
grad_w = torch.mm(x.view((-1, num_inputs)).T, grad_loss_fn(y_pred, y))
grad_b = grad_loss_fn(y_pred, y)
return [grad_w, grad_b.mean(dim=0)]
def accuracy(y_hat, y):
""" 计算分类准确率 """
_, pred_y = torch.max(y_hat, 1)
return (pred_y == y).mean().item()
def evaluate_accuracy(data_iter, net, w, b):
""" 计算准确率 """
correct_acc, n = 0.0, 0
for x, y in data_iter:
_, pred_y = torch.max(net(x, w, b).data, 1)
correct_acc += (pred_y == y).sum().item()
n += y.shape[0]
return correct_acc / n
def train(net, train_iter, test_iter, loss, num_epochs, params, lr):
""" 模型训练 """
w, b = params
for epoch in range(num_epochs):
train_loss_sum, train_acc_sum, n = 0.0, 0.0, 0
for x, y in train_iter:
# 前向传播
y_pred = net(x, w, b)
# 计算损失
loss_mini_batch = loss(y_pred, y).sum()
# 梯度
grad = grad_fn(x, y, y_pred)
# 更新参数
for i in range(len(params)):
params[i] -= lr * grad[i]
# 统计
train_loss_sum += loss_mini_batch.item()
train_acc_sum += (y_pred.argmax(dim=1) == y).sum().item()
n += y.shape[0]
# 测试准确率
test_acc = evaluate_accuracy(test_iter, net, w, b)
print('轮次:%d,损失%.4f,训练准确率:%.3f,测试准确率:%.3f'
% (epoch + 1, train_loss_sum / n, train_acc_sum / n, test_acc))
def get_labels_by_indices(labels):
""" 返回目标索引对应的标签字符串 """
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat', 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[int(i)] for i in labels]
def main():
# 超参数
num_epochs, learning_rate, batch_size = 5, 0.001, 32
# 加载数据集
mnist_train = torchvision.datasets.FashionMNIST(root='../datasets/FashionMNIST', train=True, download=False,
transform=transforms.ToTensor())
train_iter = data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
mnist_test = torchvision.datasets.FashionMNIST(root='../datasets/FashionMNIST', train=False, download=False,
transform=transforms.ToTensor())
test_iter = data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False)
# 参数
w = torch.tensor(np.random.normal(0, 0.01, (num_inputs, num_outputs)), dtype=torch.float, requires_grad=False)
b = torch.zeros(num_outputs, dtype=torch.float, requires_grad=False)
# 训练模型
train(model, train_iter, test_iter, cross_entropy, num_epochs, [w, b], learning_rate)
# 显示部分测试数据的真实标签和预测标签
x, y = next(iter(test_iter))
true_labels = get_labels_by_indices(y.numpy())
pred_labels = get_labels_by_indices(model(x, w, b).argmax(dim=1).numpy())
for true_label, pred_label in zip(true_labels, pred_labels):
print(f"真实标签:{true_label}\t预测标签:{pred_label}")
if __name__ == '__main__':
main()
使用Pytorch的自动求导功能。
创建文件softmax_regression.py
添加代码如下:
# -*- coding: utf-8 -*-
"""
softmax回归实现
"""
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
from torch.utils import data
import torch.nn.functional as F
class Net(nn.Module):
""" 定义Softmax模型 """
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(784, 10)
def forward(self, x):
# 展平数据 (n, 1, 28, 28) --> (n, 784)
x = x.view(-1, 784)
return F.softmax(self.fc1(x), dim=1)
def train(epochs, train_loader, model, optimizer, loss_fn, print_every):
""" 迭代训练模型 """
for epoch in range(epochs):
# 每次输入batch_idx个数据
loss_acc = 0.0 # 累计损失
for batch_idx, (data, target) in enumerate(train_loader):
# 梯度清零
optimizer.zero_grad()
# 前向传播
output = model(data)
# 损失
loss = loss_fn(output, target)
# 反向传播
loss.backward()
# 更新参数
optimizer.step()
loss_acc += loss.item()
if batch_idx % print_every == print_every - 1:
print('[%d, %5d] 损失: %.3f' % (epoch + 1, batch_idx + 1, loss_acc / print_every))
loss_acc = 0.0
print('完成训练!')
def test(model, test_loader):
""" 测试 """
correct = 0
total = 0
# 预测不需要梯度来修改参数
with torch.no_grad():
for data in test_loader:
images, labels = data
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('在测试集中的预测准确率: {:.2%}'.format(correct / total))
def main():
# 超参数
num_epochs = 5
batch_size = 16
print_every = 200
learning_rate = 0.001
# 加载数据集
mnist_train = torchvision.datasets.FashionMNIST(root='../datasets/FashionMNIST', train=True, download=False,
transform=transforms.ToTensor())
train_loader = data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
mnist_test = torchvision.datasets.FashionMNIST(root='../datasets/FashionMNIST', train=False, download=False,
transform=transforms.ToTensor())
test_loader = data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False)
model = Net()
# 交叉熵损失函数
loss_fn = nn.CrossEntropyLoss()
# 优化算法
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
train(num_epochs, train_loader, model, optimizer, loss_fn, print_every)
test(model, test_loader)
if __name__ == '__main__':
main()
运行结果:
轮次:1,损失0.6634,训练准确率:0.782,测试准确率:0.805
轮次:2,损失0.5175,训练准确率:0.826,测试准确率:0.817
轮次:3,损失0.4881,训练准确率:0.835,测试准确率:0.827
轮次:4,损失0.4720,训练准确率:0.840,测试准确率:0.830
轮次:5,损失0.4616,训练准确率:0.844,测试准确率:0.830
真实标签:ankle boot 预测标签:ankle boot
真实标签:pullover 预测标签:pullover
真实标签:trouser 预测标签:trouser
真实标签:trouser 预测标签:trouser
真实标签:shirt 预测标签:shirt
真实标签:trouser 预测标签:trouser
真实标签:coat 预测标签:coat
真实标签:shirt 预测标签:shirt
真实标签:sandal 预测标签:sandal
真实标签:sneaker 预测标签:sneaker
真实标签:coat 预测标签:coat
真实标签:sandal 预测标签:sandal
真实标签:sneaker 预测标签:sandal
真实标签:dress 预测标签:dress
真实标签:coat 预测标签:coat
真实标签:trouser 预测标签:trouser
真实标签:pullover 预测标签:pullover
真实标签:coat 预测标签:pullover
真实标签:bag 预测标签:bag
真实标签:t-shirt 预测标签:t-shirt
真实标签:pullover 预测标签:pullover
真实标签:sandal 预测标签:sneaker
真实标签:sneaker 预测标签:sneaker
真实标签:ankle boot 预测标签:sneaker
真实标签:trouser 预测标签:trouser
真实标签:coat 预测标签:pullover
真实标签:shirt 预测标签:shirt
真实标签:t-shirt 预测标签:t-shirt
真实标签:ankle boot 预测标签:ankle boot
真实标签:dress 预测标签:dress
真实标签:bag 预测标签:bag
真实标签:bag 预测标签:bag
线性回归实际上就是一个线性神经元。多变量线性回归实现了加权求和计算。增加了激活函数的神经元直接称为神经元。
创建文件plot_sigmoid.py
添加代码如下:
# -*- coding: utf-8 -*-
"""
绘制Sigmoid激活函数
"""
import matplotlib.pyplot as plt
import numpy as np
import torch
# x取值,start到stop范围内间隔均匀的100个数
x_vals = np.linspace(start=-10., stop=10., num=100)
activation = torch.nn.Sigmoid()
# Sigmoid激活函数
print(activation(torch.Tensor([-1., 0., 1.])))
y_vals = activation(torch.Tensor(x_vals))
# 绘制Sigmoid激活函数图像
plt.plot(x_vals, y_vals, "r--", label="Sigmoid", linewidth=1.5)
plt.ylim([-0.5, 1.5])
plt.legend(loc="upper left")
plt.grid(axis="y")
plt.show()
它是Sigmoid函数的变体,放大并平移
# -*- coding: utf-8 -*-
"""
绘制双曲正切Tanh激活函数
"""
import matplotlib.pyplot as plt
import numpy as np
import torch
# x取值,start到stop范围内间隔均匀的100个数
x_vals = np.linspace(start=-10., stop=10., num=100)
activation = torch.nn.Tanh()
# 双曲正切激活函数
print(activation(torch.Tensor([-1., 0., 1.])))
y_vals = activation(torch.Tensor(x_vals))
# 绘制Tanh激活函数图像
plt.plot(x_vals, y_vals, "r--", label="Tanh", linewidth=1.5)
plt.ylim([-1.5, 1.5])
plt.legend(loc="upper left")
plt.grid(axis="y")
plt.show()
创建文件display_olympics_freestyle100m.py
添加代码如下:
# -*- coding: utf-8 -*-
"""
绘制ReLU激活函数
"""
import matplotlib.pyplot as plt
import numpy as np
import torch
# x取值,start到stop范围内间隔均匀的100个数
x_vals = np.linspace(start=-10., stop=10., num=100)
relu = torch.nn.ReLU()
relu6 = torch.nn.ReLU6()
# ReLU激活函数
print("ReLU: ", relu(torch.Tensor([-5., 5., 10.])))
y_relu = relu(torch.Tensor(x_vals))
# ReLU-6激活函数
print("ReLU-6: ", relu6(torch.Tensor([-5., 5., 10.])))
y_relu6 = relu6(torch.Tensor(x_vals))
# 绘制ReLU激活函数图像
plt.plot(x_vals, y_relu, "r:", label="ReLU", linewidth=1.5)
plt.plot(x_vals, y_relu6, "b-.", label="ReLU6", linewidth=1.5)
plt.ylim([-1, 8])
plt.legend(loc="upper left")
plt.grid(axis="y")
plt.show()
运行结果:
创建文件display_olympics_freestyle100m.py
添加代码如下:
# -*- coding: utf-8 -*-
"""
绘制LeakyReLU激活函数
"""
import matplotlib.pyplot as plt
import numpy as np
import torch
# x取值,start到stop范围内间隔均匀的100个数
x_vals = np.linspace(start=-10., stop=10., num=100)
leaky_relu = torch.nn.LeakyReLU()
elu = torch.nn.ELU()
# ReLU激活函数
print("Leaky ReLU: ", leaky_relu(torch.Tensor([-5., 5., 10.])))
y_leaky_relu = leaky_relu(torch.Tensor(x_vals))
# ELU激活函数
print("ELU: ", elu(torch.Tensor([-5., 5., 10.])))
y_elu = elu(torch.Tensor(x_vals))
# 绘制ReLU激活函数图像
plt.plot(x_vals, y_leaky_relu, "r:", label="Leaky ReLU", linewidth=1.5)
plt.plot(x_vals, y_elu, "b-.", label="ELU", linewidth=1.5)
plt.ylim([-1, 8])
plt.legend(loc="upper left")
plt.grid(axis="y")
plt.show()
运行结果:
主要用于神经网络最后一层,解决多元分类问题
中间层选ReLU;二元分类选Sigmoid;多元分类选Softmax函数;深层网络一般避免Tanh函数和Sigmoid函数;如果出现较多死亡神经元,用Leaky ReLU和ELU函数代替
主要思想:
1.神经网络表示和结构;
2.前向传播;
3.代价函数:训练权重和偏置。一般采用交叉熵代价函数
4.BP算法:即反向传播算法。用于优化神经网络参数算法,推荐使用Pytorch的自动求导解决方案。
nn.Sequential定义一个简单的神经网络模型;
另一种方式是使用nn.Module定义神经网络。
两种方式实现如下,
创建文件neural_networks_initialization.py
添加代码如下:
# -*- coding: utf-8 -*-
"""
PyTorch定义神经网络及参数初始化
"""
import torch
from torch import nn
from torch.nn import init
num_inputs, num_hiddens, num_outputs = 4, 5, 3
# nn.Sequential定义一个简单的神经网络模型
seq_model = nn.Sequential(
nn.Linear(num_inputs, num_hiddens),
nn.ReLU(),
nn.Linear(num_hiddens, num_hiddens),
nn.ReLU(),
nn.Linear(num_hiddens, num_outputs),
nn.Softmax(dim=1),
)
class Net(nn.Module):
""" 定义一个简单的神经网络模型 """
def __init__(self):
super().__init__()
self.fc1 = nn.Linear(num_inputs, num_hiddens)
self.act1 = nn.ReLU()
self.fc2 = nn.Linear(num_hiddens, num_hiddens)
self.act2 = nn.ReLU()
self.fc3 = nn.Linear(num_hiddens, num_outputs)
self.act3 = nn.Softmax(dim=1)
def forward(self, x):
x = self.act1(self.fc1(x))
x = self.act2(self.fc2(x))
return self.act3(self.fc3(x))
def weights_init(m):
""" 初始化网络权重 """
if isinstance(m, nn.Conv2d):
init.normal_(m.weight.data)
# init.xavier_normal_(m.weight.data)
# init.kaiming_normal_(m.weight.data) # 卷积层参数初始化
m.bias.data.fill_(0)
elif isinstance(m, nn.Linear):
m.weight.data.normal_() # 全连接层参数初始化
def main():
# 初始化
for params in seq_model.parameters():
init.normal_(params, mean=0, std=0.01)
print("nn.Sequential定义的网络:")
print(seq_model)
for name, param in seq_model.named_parameters():
print(name, param.shape)
# 随机初始化输入x
x = torch.randn(10, 4)
y_hat = seq_model(x)
print(y_hat)
model = Net()
# 网络参数初始化
model.apply(weights_init) # apply函数会递归搜索网络中的子模块并应用初始化
print("nnn.Module定义的网络:")
print(model)
y_hat = model(x)
print(y_hat)
if __name__ == "__main__":
main()
需求:FashionMNIST分类问题。
创建文件neural_networks_demo1.py
添加代码如下:
# -*- coding: utf-8 -*-
"""
PyTorch实现神经网络示例一
"""
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
from torch.utils import data
from torch.nn import init
num_inputs, num_hiddens, num_outputs = 784, 256, 10
# nn.Sequential模型
net = nn.Sequential(
nn.Linear(num_inputs, num_hiddens),
nn.ReLU(),
nn.Linear(num_hiddens, num_outputs),
)
for params in net.parameters():
init.normal_(params, mean=0, std=0.01)
def train(epochs, train_loader, model, optimizer, loss_fn, print_every):
""" 迭代训练模型 """
for epoch in range(epochs):
loss_acc = 0.0 # 累计损失
# 每次输入第batch_idx批数据
for batch_idx, (data, target) in enumerate(train_loader):
# 梯度清零
optimizer.zero_grad()
# 前向传播
output = model(data.view(data.shape[0], -1))
# 损失
loss = loss_fn(output, target)
# 反向传播
loss.backward()
# 更新参数
optimizer.step()
loss_acc += loss.item()
if batch_idx % print_every == print_every - 1:
print('[%d, %5d] 损失: %.3f' % (epoch + 1, batch_idx + 1, loss_acc / print_every))
loss_acc = 0.0
print('完成训练!')
def test(model, test_loader):
""" 模型测试 """
correct = 0
total = 0
# 预测不需要梯度来修改参数
with torch.no_grad():
for data in test_loader:
images, labels = data
outputs = model(images.view(images.shape[0], -1))
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('在测试集中的预测准确率: {:.2%}'.format(correct / total))
def main():
# 超参数
num_epochs = 5
batch_size = 16
print_every = 200
learning_rate = 0.001
# 加载数据集
mnist_train = torchvision.datasets.FashionMNIST(
root='../datasets/FashionMNIST', train=True, download=False,
transform=transforms.ToTensor())
train_loader = data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
mnist_test = torchvision.datasets.FashionMNIST(
root='../datasets/FashionMNIST', train=False, download=False,
transform=transforms.ToTensor())
test_loader = data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False)
# 交叉熵损失函数
loss_fn = nn.CrossEntropyLoss()
# 优化算法
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)
# 训练与评估
train(num_epochs, train_loader, net, optimizer, loss_fn, print_every)
test(net, test_loader)
if __name__ == '__main__':
main()
示例二使用nn.Module定义网络模型
创建文件neural_networks_demo2.py
添加代码如下:
# -*- coding: utf-8 -*-
"""
PyTorch实现神经网络示例二
"""
import torch
import torchvision
import torchvision.transforms as transforms
from torch import nn
from torch.utils import data
import torch.nn.functional as F
num_inputs, num_hiddens, num_outputs = 784, 256, 10
class Net(nn.Module):
""" nn.Module定义网络模型 """
def __init__(self):
super(Net, self).__init__()
self.fc1 = nn.Linear(num_inputs, num_hiddens)
self.fc2 = nn.Linear(num_hiddens, num_outputs)
def forward(self, x):
# 展平数据 (n, 1, 28, 28) --> (n, 784)
x = x.view(x.shape[0], -1)
x = F.relu(self.fc1(x))
return F.log_softmax(self.fc2(x), dim=1)
def train(epochs, train_loader, model, optimizer, loss_fn, print_every):
""" 迭代训练模型 """
for epoch in range(epochs):
loss_acc = 0.0 # 累计损失
# 每次输入第batch_idx批数据
for batch_idx, (data, target) in enumerate(train_loader):
# 梯度清零
optimizer.zero_grad()
# 前向传播
output = model(data)
# 损失
loss = loss_fn(output, target)
# 反向传播
loss.backward()
# 更新参数
optimizer.step()
loss_acc += loss.item()
if batch_idx % print_every == print_every - 1:
print('[%d, %5d] 损失: %.3f' % (epoch + 1, batch_idx + 1, loss_acc / print_every))
loss_acc = 0.0
print('完成训练!')
def test(model, test_loader):
""" 模型测试 """
correct = 0
total = 0
# 预测不需要梯度来修改参数
with torch.no_grad():
for data in test_loader:
images, labels = data
outputs = model(images)
_, predicted = torch.max(outputs.data, 1)
total += labels.size(0)
correct += (predicted == labels).sum().item()
print('在测试集中的预测准确率: {:.2%}'.format(correct / total))
def main():
# 超参数
num_epochs = 5
batch_size = 16
print_every = 200
learning_rate = 0.001
# 加载数据集
mnist_train = torchvision.datasets.FashionMNIST(
root='../datasets/FashionMNIST', train=True, download=False,
transform=transforms.ToTensor())
train_loader = data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
mnist_test = torchvision.datasets.FashionMNIST(
root='../datasets/FashionMNIST', train=False, download=False,
transform=transforms.ToTensor())
test_loader = data.DataLoader(mnist_test, batch_size=batch_size, shuffle=False)
model = Net()
# 负对数似然损失函数
loss_fn = nn.NLLLoss()
# 优化算法
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# 训练与评估
train(num_epochs, train_loader, model, optimizer, loss_fn, print_every)
test(model, test_loader)
if __name__ == '__main__':
main()
# -*- coding: utf-8 -*-
"""
两层神经网络解决异或问题
"""
import torch
from torch import nn
def main():
# 设置随机数种子
seed = 1
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
# 定义输入单元数、隐藏层单元数、输出层单元数
n_in, n_h, n_out = 2, 2, 1
# 创建XOR数据集
x = torch.tensor([[0.0, 0.0], [0.0, 1.0], [1.0, 0.0], [1.0, 1.0]])
y = torch.tensor([[0.0], [1.0], [1.0], [0.0]])
# 创建模型
model = nn.Sequential(nn.Linear(n_in, n_h),
nn.Sigmoid(),
nn.Linear(n_h, n_out),
nn.Sigmoid())
# 损失函数
criterion = nn.MSELoss()
# 优化器
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
# 梯度下降
for epoch in range(1, 1001):
# 前向传播
y_pred = model(x)
# 计算损失
loss = criterion(y_pred, y)
if epoch == 1 or epoch % 100 == 0:
print(f'轮次:{epoch},\t损失:{loss.item()}')
# 梯度置零
optimizer.zero_grad()
# 反向传播
loss.backward()
# 更新参数
optimizer.step()
# 预测
y_pred = model(x)
print("输入为:\n{}".format(x))
print('输出为:\n{}'.format(y_pred))
if __name__ == "__main__":
main()
# -*- coding: utf-8 -*-
"""
Fizz Buzz神经网络实现
"""
import numpy as np
import torch
def binary_encode(i, num_digits):
""" 将输入的十进制i转换为位数是num_digits的二进制 """
return np.array([i >> d & 1 for d in range(num_digits)])
def fizz_buzz_encode(i):
""" 将输入数字编码为标签 """
if i % 3 == 0 and i % 5 == 0:
return 3
elif i % 5 == 0:
return 2
elif i % 3 == 0:
return 1
else:
return 0
def fizz_buzz_decode(i, encode):
""" 解码为人类可读的输出 """
return [str(i), "Fizz", "Buzz", "FizzBuzz"][encode]
# 超参数
learning_rate = 0.05
epochs = 3000
batch_size = 128
num_digits = 10
num_hidden = 100
# 构建训练集。使用101~1024作为训练数据
train_x = torch.tensor([binary_encode(i, num_digits) for i in range(101, 2 ** num_digits)]).float()
train_y = torch.tensor([fizz_buzz_encode(i) for i in range(101, 2 ** num_digits)]).long()
# 随机置乱
torch.manual_seed(123)
rand_idx = torch.randperm(len(train_y))
train_x = train_x[rand_idx, :]
train_y = train_y[rand_idx]
# 定义网络模型
model = torch.nn.Sequential(
torch.nn.Linear(num_digits, num_hidden),
torch.nn.ReLU(),
torch.nn.Linear(num_hidden, 4)
)
# 定义损失函数和优化器
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
# 模型训练
model.train()
for epoch in range(epochs):
for start in range(0, len(train_x), batch_size):
end = start + batch_size
batch_x = train_x[start:end]
batch_y = train_y[start:end]
y_hat = model(batch_x)
loss = criterion(y_hat, batch_y)
optimizer.zero_grad()
loss.backward()
optimizer.step()
# 计算训练损失
loss = criterion(model(train_x), train_y).item()
print(f'Epoch:{epoch}\tLoss:{loss}')
# 预测1~100
model.eval()
test_x = torch.tensor([binary_encode(i, num_digits) for i in range(1, 101)]).float()
test_y = torch.tensor([fizz_buzz_encode(i) for i in range(1, 101)]).long()
with torch.no_grad():
pred_y = model(test_x)
predictions = zip(range(1, 101), list(pred_y.max(1)[1].data.tolist()))
print("预测:", [fizz_buzz_decode(i, x) for (i, x) in predictions])
acc = np.sum(pred_y.max(1)[1].numpy() == np.array(test_y)) / len(test_y)
print("准确率:{:.2%}".format(acc))