Pytorch官网在线文档
Torch 意义上类似于TensorFlow中的Tensor,可以看做是能在GPU中计算的矩阵;
熟悉numpy的也可以理解为ndarray的GPU版;
使用该深度学习框架后,我们所需要做的就是设计任务流程,设计网络框架;
CPU版本:
pip install torch1.3.0+cpu torchvision0.4.1+cpu -f https://download.pytorch.org/whl/torch_stable.html
GPU版本:
pip install torch1.3.0 torchvision0.4.1 -f https://download.pytorch.org/whl/torch_stable.html (默认是CUDA10版本)
具体安装可以查看官网,所学视频的版本是1.3.0的,实际我本机安装的是1.7.0版本(应该是安装的GPU版本,没有特意执行安装CPU版本的命令),主要还是为了学习pytorch的使用;
CUDA:Nvidia显卡的GPU加速库,下载链接
import torch
torch.__version__
torch.version.cuda
torch.cuda.is_available()
'1.7.0'
!pip show torch
Name: torch
Version: 1.7.0
Summary: Tensors and Dynamic neural networks in Python with strong GPU acceleration
Home-page: https://pytorch.org/
Author: PyTorch Team
Author-email: [email protected]
License: BSD-3
Location: /Users/huaqiang/anaconda3/lib/python3.7/site-packages
Requires: future, numpy, typing-extensions, dataclasses
Required-by: torchvision, torchaudio, pytorch-pretrained-bert
我们注意到,本地依赖torch的库:torchvision, torchaudio, pytorch-pretrained-bert
# 创建一个矩阵
x = torch.empty(5, 3)
x # 类型为tensor,即张量
# 数据类型转换
x.to(torch.float)
tensor([[1.1210e-44, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00]])
# 随机生成一个矩阵
torch.rand(5,3)
tensor([[0.4939, 0.6952, 0.3724],
[0.3051, 0.1697, 0.6733],
[0.2311, 0.2673, 0.2252],
[0.0205, 0.5017, 0.8799],
[0.6741, 0.4258, 0.1572]])
# 全0矩阵
torch.zeros(5,3,dtype=torch.long)
tensor([[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0],
[0, 0, 0]])
# 使用上很多方法与numpy很类似
torch.randn_like(x, dtype=torch.float)
tensor([[-0.7219, -1.0058, 0.1401],
[-0.1806, -0.3656, 0.8092],
[ 0.8398, 0.2060, 0.9734],
[-0.1092, 0.4415, -0.0103],
[-0.6904, -1.5415, 0.1186]])
torch.randn_like?
# 检查矩阵大小
x.size()
torch.Size([5, 3])
# 基本运算
x + x
torch.add(x,x)
tensor([[2.2421e-44, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00],
[0.0000e+00, 0.0000e+00, 0.0000e+00]])
# 索引和切片
x[:, 1]
tensor([0., 0., 0., 0., 0.])
# 常用方法
# view:改变矩阵维度
x = torch.randn(4,4)
y = x.view(16)
z = x.view(-1, 8)
z
tensor([[ 0.2263, -0.2230, -0.1979, 1.1429, -0.6950, 0.2761, -0.1115, -0.1601],
[ 0.5172, 0.3535, 1.6254, 0.2054, 0.5812, 0.3431, 0.1358, -1.4275]])
# 与numpy的协同操作
a = torch.ones(5)
a
tensor([1., 1., 1., 1., 1.])
b = a.numpy()
b
array([1., 1., 1., 1., 1.], dtype=float32)
c = torch.from_numpy(b)
c
tensor([1., 1., 1., 1., 1.])
反向传播算法
import torch
# 指定求导张量:方法1
x = torch.randn(3,4,requires_grad=True)
# 指定求导张量:方法2
y = torch.randn(3,4)
y.requires_grad = True
# 定义算式过程
t = x + y
t = t.sum()
# 调用反向传播
t.backward()
# 查看 梯度(自动计算求导)
# t.retain_grad
t.grad
)>
y = w * x
z = y + b
# 用z对x求偏导 = 用z对y求偏导 * 用y对x求偏导
x = torch.rand(1)
x
tensor([0.6581])
b = torch.rand(1, requires_grad=True)
w = torch.rand(1, requires_grad=True)
y = w * x
z = y + b
z
tensor([1.3465], grad_fn=)
# 查看是否需要计算梯度
x.requires_grad,b.requires_grad,w.requires_grad, y.requires_grad
(False, True, True, True)
# 查看是否是叶节点(不重要)
x.is_leaf, b.is_leaf, w.is_leaf, y.is_leaf, z.is_leaf
(True, True, True, False, False)
# 反向传播:梯度清0 如果不清空 梯度会累加
z.backward(retain_graph=True)
# 计算梯度
w.grad
tensor([0.6581])
b.grad # 反向传播中 如果梯度不清零 多次计算梯度的结果会累加
tensor([1.])
# 清零
# 计算
# 更新
import torch
import torch.nn as nn
import numpy as np
x_values = [i for i in range(11)]
x_train = np.array(x_values, dtype=np.float32)
x_train = x_train.reshape(-1,1)
x_train.shape
(11, 1)
y_values = [2*i + 1 for i in x_values]
y_train = np.array(y_values, dtype=np.float32)
y_train = y_train.reshape(-1,1)
y_train.shape
(11, 1)
class LinearRegressionModel(nn.Module):
"""
线性回归模型
"""
def __init__(self, input_dim, output_dim):
super(LinearRegressionModel, self).__init__()
# 全连接层
self.linear = nn.Linear(input_dim, output_dim)
def forward(self, x):
"""
前向传播过程
"""
out = self.linear(x)
return out
input_dim = 1
output_dim = 1
# 一元一次方程 输入输出参数维度都是1
model = LinearRegressionModel(input_dim, output_dim)
epochs = 1000
learning_rate = 0.01
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate) # 指定要优化的模型参数和学习率
criterion = nn.MSELoss() # 回归的损失函数 使用均方差
for epoch in range(epochs):
# 转张量
inputs = torch.from_numpy(x_train)
labels = torch.from_numpy(y_train)
# 梯度清零(每一次迭代都要做)
optimizer.zero_grad()
# 前向传播
outputs = model(inputs)
# 计算损失
loss = criterion(outputs, labels)
# 反向传播
loss.backward()
# 更新权重参数
optimizer.step()
if (epoch % 50) == 0:
print("epoch {}, loss {}".format(epoch, loss.item()))
epoch 0, loss 1.5593505653388462e-11
epoch 50, loss 1.5593505653388462e-11
epoch 100, loss 1.5593505653388462e-11
epoch 150, loss 1.5593505653388462e-11
epoch 200, loss 1.5593505653388462e-11
epoch 250, loss 1.5593505653388462e-11
epoch 300, loss 1.5593505653388462e-11
epoch 350, loss 1.5593505653388462e-11
epoch 400, loss 1.5593505653388462e-11
epoch 450, loss 1.5593505653388462e-11
epoch 500, loss 1.5593505653388462e-11
epoch 550, loss 1.5593505653388462e-11
epoch 600, loss 1.5593505653388462e-11
epoch 650, loss 1.5593505653388462e-11
epoch 700, loss 1.5593505653388462e-11
epoch 750, loss 1.5593505653388462e-11
epoch 800, loss 1.5593505653388462e-11
epoch 850, loss 1.5593505653388462e-11
epoch 900, loss 1.5593505653388462e-11
epoch 950, loss 1.5593505653388462e-11
# 测试数据 转张量 取消梯度
# 支持数据集的批量预测
predicted = model(torch.from_numpy(x_train).requires_grad_(False))
# 结果转为numpy
predicted = predicted.data.numpy()
# 打印
predicted
array([[ 0.99999267],
[ 2.9999938 ],
[ 4.999995 ],
[ 6.9999967 ],
[ 8.999997 ],
[10.999998 ],
[13. ],
[15.000001 ],
[17.000002 ],
[19.000004 ],
[21.000004 ]], dtype=float32)
torch.save(model.state_dict(), './model.pkl') # 使用 .pkl .pth 后缀均可
model.load_state_dict(torch.load('./model.pkl'))
只需要把数据
和模型
传入到cuda
里面就可以了
注意:只要想用GPU做训练,model和输入张量 就需要 .to(device)
下,用cpu基本可以忽略!
train_on_gpu = torch.cuda.is_available()
if not train_on_gpu:
print('CUDA is not available, use CPU')
else:
print('CUDA is available, use GPU')
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 模型
model.to(device)
LinearRegressionModel(
(linear): Linear(in_features=1, out_features=1, bias=True)
)
# 数据
# ...
for epoch in range(epochs):
# 转张量
inputs = torch.from_numpy(x_train).to(device)
labels = torch.from_numpy(y_train).to(device)
# ...
这是一个最简单的结果,后续的会进行延伸
from torch import tensor
x = tensor(100) # scalar
x
tensor(100)
x = tensor([1,2,3,4,5,6]) # vector
x.dim(), x.size()
(1, torch.Size([6]))
x = tensor([[1,2],[3,4]]) # matrix
x.dim(), x.size()
(2, torch.Size([2, 2]))
# 矩阵运算
x.matmul(x) # 矩阵相乘:行列 * 列行 = 行行
x*x # 矩阵内积:元素相乘(逐个对应)
tensor([[ 1, 4],
[ 9, 16]])
x = tensor([[[1,2],[3,4]],[[1,2],[3,4]]]) # 3-dimensional tensor
x.dim(), x.size()
(3, torch.Size([2, 2, 2]))
调用集成可用的网络模型架构及参数 直接使用;
比如 基于ResNet进行了图形分割 获取到的一个 同簇下的模型,就可以使用hub模块进行加载,然后直接使用;
import torch
# 实例1
model = torch.hub.load("pytorch/vision:v:0.4.2", "deeplabv3_resnet101", pretrained=True)
使用Pytorch训练模型,一定要注意一个细节:有时候明明训练很好,测试时候出问题?
这时候我们要找一下Model里是否有BN或者 Dropout层,如果存在了,那就要小心了!!!
测试之前加入下面这句话!!!! 注意为了排除BN和Dropout对测试影响
model = model.eval()
# 实例2
# 具体怎么用 请参考官网 这里只是一个使用示例
# 要注意 resnet默认的卷积核是3*3 也就是说 对应的是图片的3通道 如果要修改卷积核大小 可以下载源码然后自定义
model = torch.hub.load('pytroch/vision:v0.4.2', 'resnet18', pretrained = True)
# 查看指定版本所支持的模型
torch.hub.list("pytorch/vision:v1.7.0")
from torchvision import transforms
preprocess = transforms.Compose([
transforms.Normalize(mean=[0.485,0.456,0.406], std=[0.229,0.224,0.225]),
transforms.ToTensor(),
lambda x: torch.tensor(x), # or
])
# 读取数据
features = pd.read_csv('temps.csv') # 数据来源未知 仅仅是个示例
features.head()
# 数据可视化
import matplotlib.pyplot as plt
plt.style.use('fivethirtyeight')
# 设置布局
fig, ((ax1,ax2),(ax3,ax4)) = plt.subplots(nrows=2, ncols=2, figsize=(10,10))
fig.autofmt_xdate(rotation = 45)
# 标签值
ax1.plot(dates, features['temp_1']) # 这里 dates 是一个 datetime.datetime类型的 Series
ax1.set_xlabel('')
ax1.set_ylabel('Temp')
ax1.set_title('temp_1')
...
plt.tight_layout(pad=2)
# 实质性数据 量化处理:one-hot编码
features = pd.get_dummies(features) # 会自动将 实质性数据(即分类数据)进行one-hot编码 有几个类就会加几列;
# 标签
labels = np.array(features['actual'])
# 特征
features = features.drop('actual', axis=1)
features = np.array(features)
这是一个专门的领域,这里仅做简单的标准化处理
关于归一化:
标准差标准化(standardScale)使得经过处理的数据符合标准正态分布,即均值为0,标准差为1(减均值除方差)
注意:会对每个特征都进行处理
from sklearn import preprocessing
# 预处理模块的标准化处理
scaler = preprocessing.StandardScaler()
input_features = scaler.fit_transform(features)
# 尝试构建网络模型
# 特征和标签 转张量
x = torch.tensor(input_features, dtype=torch.float)
y = torch.tensor(labels, dtype=torch.float)
# 权重参数初始化
w1 = torch.randn((14,128), dtype=float, requires_grad=True) # 输入输出节点数 即对应权重参数数
b1 = torch.randn(128, dtype=float, requires_grad=True) # 偏置参数个数 与 输出节点数同
w2 = torch.randn((128,1), dtype=float, requires_grad=True)
b2 = torch.randn(1, dtype=float, requires_grad=True)
learning_rate = 0.01
losses = []
for i in range(1000):
# 前向传播
# 计算隐藏层
hidden1 = x.mm(w1) + b1
# 激活函数
hidden1 = torch.relu(hidden1)
# 计算输出层(输出层 一般不加激活函数)
predictions = hidden.mm(w2) + b2
# 计算损失
loss = torch.mean((predictions - y) ** 2)
losses.append(loss.data.numpy())
if i%100 == 0:
print('loss ', loss)
# 反向传播:计算更新后w和b的梯度值
loss.backward()
# 更新参数:使用计算后的梯度值进行更新
w1.data.add_(- learning_rate * w1.grad.data) # 沿着梯度反方向 更新 即 梯度下降!
b1.data.add_(- learning_rate * b1.grad.data)
w2.data.add_(- learning_rate * w2.grad.data)
b1.data.add_(- learning_rate * b2.grad.data)
# 每次迭代将梯度清空
w1.grad.data.zero_()
b1.grad.data.zero_()
w2.grad.data.zero_()
b2.grad.data.zero_()
input_size = input_features.shape[1] # 14
hidden_size = 128
output_size = 1
batch_size = 16
my_nn = torch.nn.Sequential(
torch.nn.Linear(input_size, hidden_size), # 14 128
torch.nn.Sigmoid(),
torch.nn.Linear(hidden_size, output_size) # 128 1
)
cost = torch.nn.MSELoss(reduction='mean')
# Adam 支持动态衰减 学习率
optimizer = torch.optim.Adam(my_nn.parameters(), lr = 0.001)
losses = []
for i in range(1000):
batch_loss = []
for start in range(0, len(input_features), batch_size):
end = start + batch_size if start + batch_size < len(input_features) else len(input_features)
xx = torch.tensor(input_features[start:end], dtype=torch.float, requires_grad=True)
yy = torch.tensor(labels[start:end], dtype=torch.float, requires_grad=True)
prediction = my_nn(xx)
# 计算损失
loss = cost(prediction, yy)
# 梯度清零
optimizer.zero_grad()
# 反向传播
loss.backward(retain_graph=True)
# 更新权重
optimizer.step()
batch_loss.append(loss.data.numpy())
losses.append(np.mean(batch_loss))
# 假如你有两个Loss,先执行第一个的backward,再执行第二个backward
loss1.backward(retain_graph=True)
loss2.backward() # 执行完这个后,所有中间变量都会被释放,以便下一次的循环
optimizer.step() # 更新参数
predictions_data = pd.DataFrame(data={'date': test_dates, 'predictions':predict.reshape(-1)})
# 真实值
plt.plot(true_data['date'], true_data['actual'], 'b-', label='actual')
# 预测值
plt.plot(predictions_data['date'], predictions_data['prediction'], 'ro', label='prediction')
plt.xticks(rotation='60')
plt.legend()
# 图名
plt.xlabel('Date')
plt.ylabel('Maximum Temperature(F)')
plt.title('Actual and Predicted Values')
from pathlib import Path
import requests
DATA_PATH = Path("./")
PATH = DATA_PATH.joinpath("mnist")
PATH.mkdir(parents=True, exist_ok = True)
URL = "https://deeplearning.net/data/mnist/"
FILENAME = "mnist.pkl.gz"
if not PATH.joinpath(FILENAME).exists():
content = requests.get(URL + FILENAME).content
PATH.joinpath(FILENAME).open("wb").write(content)
import pickle
import gzip
with gzip.open(PATH.joinpath(FILENAME).as_posix(), 'rb') as f:
((x_train, y_train),(x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")
import torch
x_train, y_train, x_valid, y_valid = map(torch.tensor, (x_train, y_train, x_valid, y_valid))
n,c = x_train.shape # 50000 748
torch.nn.functional 和 nn.Module 中有许多相同的层和函数
一般情况下,如果模型有可学习的参数,使用nn.Module(全连接层、卷积层),其他情况使用nn.functional(激活函数层等) 会简单一些
import torch.nn.functional as F
loss_func = F.cross_entropy # 交叉熵损失函数
from torch import nn
class Mnist_NN(nn.Module):
def __init__(self):
super().__init__()
self.hidden1 = nn.Linear(784,128)
self.hidden2 = nn.Linear(128, 256)
self.out = nn.Linear(256, 10)
def forward(self, x):
x = F.relu(self.hidden1(x))
x = F.relu(self.hidden2(x))
x = self.out(x)
return x
net = Mnist_NN()
net
Mnist_NN(
(hidden1): Linear(in_features=784, out_features=128, bias=True)
(hidden2): Linear(in_features=128, out_features=256, bias=True)
(out): Linear(in_features=256, out_features=10, bias=True)
)
for name, parameter in net.named_parameters():
print(name, parameter)
# 可以看到 权重和偏置 已经做了 默认的初始化
hidden1.weight Parameter containing:
tensor([[-0.0121, -0.0119, 0.0291, ..., -0.0314, -0.0081, -0.0311],
[ 0.0301, -0.0210, 0.0006, ..., 0.0050, -0.0212, 0.0179],
[ 0.0057, -0.0355, 0.0155, ..., 0.0067, 0.0139, 0.0002],
...,
[-0.0029, 0.0183, 0.0194, ..., -0.0106, 0.0158, 0.0061],
[-0.0013, -0.0339, -0.0330, ..., -0.0333, -0.0178, -0.0026],
[ 0.0004, -0.0095, 0.0294, ..., 0.0269, 0.0053, -0.0250]],
requires_grad=True)
hidden1.bias Parameter containing:
tensor([-0.0079, 0.0189, -0.0224, 0.0196, -0.0167, -0.0351, 0.0348, 0.0326,
-0.0246, 0.0104, -0.0343, -0.0244, 0.0128, -0.0209, -0.0303, -0.0273,
0.0288, -0.0331, -0.0252, 0.0125, -0.0058, 0.0228, 0.0015, -0.0196,
0.0012, -0.0315, 0.0192, 0.0124, 0.0351, -0.0166, -0.0168, 0.0273,
-0.0088, -0.0256, -0.0308, -0.0045, -0.0281, -0.0104, 0.0344, 0.0009,
-0.0109, -0.0161, -0.0107, 0.0178, -0.0305, -0.0202, 0.0267, 0.0192,
0.0105, 0.0046, 0.0307, 0.0040, 0.0148, 0.0258, 0.0095, 0.0023,
-0.0240, -0.0101, -0.0061, 0.0294, 0.0022, -0.0062, 0.0230, 0.0247,
0.0153, -0.0237, 0.0122, -0.0292, -0.0139, -0.0119, -0.0081, -0.0264,
-0.0348, 0.0222, 0.0169, 0.0255, -0.0256, -0.0245, -0.0203, -0.0322,
0.0117, -0.0348, 0.0005, 0.0271, 0.0070, -0.0210, -0.0135, 0.0231,
0.0313, 0.0170, 0.0075, 0.0045, -0.0162, -0.0270, -0.0287, 0.0178,
0.0266, 0.0202, 0.0132, -0.0266, -0.0147, -0.0355, 0.0305, -0.0153,
0.0170, 0.0196, -0.0052, 0.0135, -0.0041, -0.0311, 0.0151, 0.0299,
0.0164, -0.0266, 0.0298, 0.0089, 0.0040, 0.0215, -0.0292, 0.0261,
-0.0068, 0.0134, -0.0175, 0.0100, -0.0259, -0.0343, 0.0221, 0.0091],
requires_grad=True)
hidden2.weight Parameter containing:
tensor([[-0.0138, 0.0018, 0.0492, ..., -0.0544, 0.0216, -0.0444],
[-0.0704, 0.0180, 0.0561, ..., 0.0159, -0.0545, 0.0343],
[ 0.0589, -0.0858, 0.0026, ..., 0.0580, -0.0159, 0.0037],
...,
[ 0.0330, 0.0457, 0.0251, ..., -0.0283, 0.0518, -0.0401],
[-0.0650, 0.0187, 0.0630, ..., -0.0114, 0.0528, -0.0251],
[-0.0845, 0.0079, -0.0572, ..., 0.0079, 0.0322, 0.0063]],
requires_grad=True)
hidden2.bias Parameter containing:
tensor([ 0.0530, 0.0186, 0.0146, 0.0608, 0.0069, -0.0686, 0.0218, 0.0767,
-0.0182, 0.0708, 0.0669, 0.0129, -0.0360, 0.0675, -0.0438, 0.0881,
0.0467, -0.0576, 0.0810, -0.0279, 0.0005, 0.0056, -0.0721, 0.0251,
-0.0234, -0.0450, -0.0055, 0.0360, -0.0597, 0.0589, 0.0472, 0.0255,
-0.0277, 0.0169, -0.0694, -0.0523, 0.0286, -0.0680, -0.0882, -0.0283,
-0.0865, -0.0615, 0.0689, -0.0313, -0.0140, 0.0227, -0.0170, 0.0283,
-0.0658, -0.0867, 0.0062, 0.0749, -0.0255, -0.0078, 0.0012, -0.0393,
-0.0592, 0.0813, -0.0329, 0.0652, -0.0711, 0.0228, 0.0639, -0.0544,
0.0190, -0.0730, -0.0472, 0.0656, -0.0265, -0.0491, -0.0242, 0.0071,
-0.0104, 0.0037, -0.0688, 0.0876, 0.0622, 0.0402, 0.0303, -0.0660,
-0.0626, -0.0795, -0.0596, 0.0621, -0.0872, -0.0303, 0.0277, 0.0455,
-0.0697, -0.0115, -0.0614, 0.0848, -0.0765, 0.0294, 0.0193, -0.0664,
-0.0789, 0.0371, -0.0728, 0.0078, 0.0364, 0.0207, -0.0679, 0.0656,
-0.0081, -0.0842, 0.0132, 0.0061, 0.0040, 0.0557, -0.0358, 0.0005,
0.0851, 0.0861, 0.0835, 0.0575, 0.0181, -0.0221, 0.0345, 0.0641,
-0.0793, -0.0544, 0.0100, 0.0471, -0.0876, -0.0841, -0.0258, -0.0244,
-0.0377, -0.0069, 0.0318, -0.0057, 0.0261, -0.0152, 0.0860, 0.0839,
-0.0253, -0.0428, 0.0522, 0.0066, 0.0391, -0.0203, 0.0230, 0.0775,
-0.0704, -0.0413, 0.0795, -0.0632, 0.0198, 0.0659, 0.0117, 0.0151,
-0.0242, 0.0247, -0.0596, 0.0510, 0.0175, 0.0616, -0.0332, 0.0247,
-0.0575, 0.0602, 0.0005, 0.0414, 0.0765, -0.0860, 0.0755, -0.0076,
0.0344, -0.0461, 0.0870, -0.0586, -0.0855, 0.0680, 0.0575, 0.0854,
0.0273, -0.0400, 0.0722, 0.0444, -0.0481, -0.0644, -0.0326, -0.0254,
-0.0647, -0.0219, -0.0749, 0.0125, -0.0190, -0.0629, -0.0741, 0.0216,
-0.0523, -0.0616, 0.0121, -0.0336, 0.0537, 0.0562, 0.0806, -0.0404,
-0.0225, -0.0065, 0.0344, 0.0081, -0.0157, 0.0564, 0.0677, 0.0241,
0.0397, -0.0017, 0.0182, 0.0116, -0.0565, 0.0758, -0.0114, 0.0069,
0.0124, -0.0581, -0.0884, 0.0070, -0.0547, 0.0024, 0.0799, 0.0262,
0.0043, -0.0258, -0.0785, 0.0143, 0.0109, -0.0842, 0.0127, 0.0413,
0.0400, -0.0521, -0.0245, -0.0350, -0.0184, -0.0392, 0.0776, 0.0390,
-0.0140, -0.0051, 0.0574, -0.0570, 0.0646, 0.0629, -0.0366, -0.0699,
0.0290, 0.0873, 0.0322, -0.0728, -0.0201, 0.0787, -0.0738, 0.0686],
requires_grad=True)
out.weight Parameter containing:
tensor([[ 0.0306, 0.0503, 0.0477, ..., 0.0012, -0.0360, -0.0068],
[ 0.0572, 0.0507, 0.0242, ..., -0.0033, -0.0352, 0.0509],
[-0.0387, 0.0337, -0.0617, ..., -0.0443, -0.0426, 0.0191],
...,
[-0.0314, 0.0423, -0.0113, ..., 0.0493, 0.0156, -0.0470],
[ 0.0421, 0.0125, 0.0003, ..., 0.0182, -0.0492, 0.0498],
[ 0.0048, 0.0066, 0.0072, ..., -0.0420, 0.0363, 0.0458]],
requires_grad=True)
out.bias Parameter containing:
tensor([-0.0589, 0.0040, -0.0272, -0.0404, 0.0200, 0.0508, -0.0154, -0.0271,
0.0022, 0.0566], requires_grad=True)
from torch.utils.data import TensorDataset, DataLoader, Dataset
# 注:这里 x_train, y_train 是两个张量!
train_ds = TensorDataset(x_train, y_train) # 也可以自定义一个数据集
train_dl = DataLoader(train_ds, batch_size=12, shuffle=True)
valid_ds = TensorDataset(x_valid, y_valid)
valid_dl = DataLoader(valid_ds, batch_size=1)
def loss_batch(model, loss_func, xb, yb, opt=None):
loss = loss_func(model(xb),yb)
if opt is not None:
loss.backward()
opt.step()
opt.zero_grad()
return loss.item(), len(xb)
# 自定义fit
def fit(steps, model, loss_func, opt, train_dl, valid_dl):
for step in range(steps):
model.train()
# 取的是一个batch
for xb, yb in train_dl:
loss_batch(model, loss_func, xb, yb, opt)
# 边训练 边评估
model.eval()
with torch.no_grad():
losses, nums = zip(*[loss_batch(model, loss_func, xb, yb) for xb,yb in valid_dl])
# np.multiply 计算内积:仔细理解下 这里为什么要计算内积的和
val_loss = np.sum(np.multiply(losses, nums)) / np.sum(nums)
class PAD2000(Dataset):
def __init__(self, mode="train", train_type="1", feature_c="A"):
self.indir = "../data/npy_data_mel_92/" # 特征矩阵存放路径
self.mode = mode
if mode == "train":
filepath = "../data_list/train_" + train_type + ".txt" # 训练数据列表存放路径
elif mode == "val":
filepath = "../data_list/val_" + train_type + ".txt" # 验证数据列表存放路径
elif mode == "test":
filepath = "../data_list/test_" + train_type + ".txt" # 测试数据列表存放路径
elif mode == "use":
filepath = "../data_list/use_" + train_type + ".txt" # 实际数据列表存放路径
with open(filepath, "r") as fp:
self.file_list = [line.rstrip() for line in fp]
self.train_type = train_type
self.feature_c = feature_c
def __getitem__(self, index):
# 训练数据经过的变化
transform_train = transforms.Compose(
[
# 局部归一化的方式
lambda x: x.astype(np.float32) / (np.max(np.abs(x))),
lambda x: torch.Tensor(x),
]
)
# 测试数据经过的变化
transform_test = transforms.Compose(
[
lambda x: x.astype(np.float32) / (np.max(np.abs(x))),
lambda x: torch.Tensor(x),
]
)
file_name = self.file_list[index].strip()
file_id, v_A, v_D, v_P = file_id.split(";")
# 转float 并 全局归一化(大多数场景的数据 很有可能在预处理阶段就已经完成了归一化!)
v_A = (float(v_A)- 1) / 8.0 # 打分值转换为浮点数
v_D = (float(v_D)- 1) / 8.0
v_P = (float(v_P)- 1) / 8.0
d_label = v_A
if self.feature_c == "A":
d_label = v_A
elif self.feature_c == "D":
d_label = v_D
elif self.feature_c == "P":
d_label = v_P
# in_path = self.indir+filename+'.npy'
in_path = self.indir + file_id + ".npy"
data = np.load(in_path)
if self.mode == "train":
data = transform_train(data)
else:
data = transform_test(data)
if self.mode != "use":
return data, d_label, 0
else:
return data, d_label, int(file_id)
def __len__(self):
return len(self.file_list)
import re
test = "awefa:bwefawe:cefaewf"
# 替换掉 从开始到 第一个 中文冒号的部分 为空串
item = re.sub(r"^((?!:).)+:", "", test.strip())
item
'bwefawe:cefaewf'
# 替换字符串中出现的第一个中文冒号
test.replace(":","",1)
'awefaawefawe:aefaewf'
# np.array的 纵向堆叠
test1 = np.array([[1,2,3,4,5]])
test1
array([[1, 2, 3, 4, 5]])
test2 = np.array([[6,2,3,4,5]])
test2
array([[6, 2, 3, 4, 5]])
np.hstack((test1,test2))
array([[1, 2, 3, 4, 5, 6, 2, 3, 4, 5]])
卷积运算 提取特征 利用GPU并行计算;
线性层做输入,需要把数据拉平;而卷积层 则可以直接对高维矩阵进行 卷积运算;
N * 28 * 28 * C
F - a - b - C1
F - c - d - C2
C通道数 经过一个 Filter计算 会得到1个值,有多少个Filter,新的值就有几个新通道;
H2 = (H1 - Fh + 2P)/S + 1
W2 = (W1 - Fw + 2P)/S + 1
其中
参数共享,也叫权值共享,即每个卷积核与矩阵子区域进行卷积运算时,使用的卷积核参数是相同的;
下采样(downsampling),可以理解为压缩,即在原始的特征中进行筛选;
筛选后会损失特征,一般通过扩大输出维度(卷积核数)来弥补;
注意:池化层不涉及到任何的矩阵计算;
conv 提取特征
relu 非线性激活函数
conv
relu
pool 对提取的特征进行下采样
conv
relu
conv
relu
pool
conv
relu
conv
relu
pool
拉成向量 可以用 自适应池化,也可以直接打平已提取的特征矩阵,以对接fc
fc 接全连接层 增强表现力
神经网络的一层一般是指 带参数计算的一层网络:
H(x) = F(x) + x
H(x) = F(x) + x1(x拼接其1*1卷积,实现扩维)
增加保底参数;这使得神经网络 可以堆叠出更深的深度,网络并不是越深越好,而resnet解决了这个问题,堆叠出几十层甚至上百层,而不增加损失;
这是一个经典网络 日常任务可以优先使用;
决定一个任务是回归还是分类,主要取决于 损失函数 以及 最后的全连接层是怎么连的;因此可以把ResNet理解为一个通用的网络;
还有个 Inception的网络结构,感兴趣的也可以查下,是谷歌的
当前层的一个卷积结果,是前一层的h*w
计算而来的,感受野就是h*w
;即当前的点能感受到原始区域的大小;
使用小的卷积核 实际所需参数更小些,特征提取也越细致;卷积过程越多,加入的非线性变换也会增加,还不会增大权重参数个数(这也是VGG网络的出发点,用小的卷积核来完成特征提取)
训练模块与传统神经网络一致;
卷积网构建:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# 还是使用Mnist
input_size = 28
num_classes = 10
num_epochs = 3
batch_size = 64
# 使用集成模块 会自动下载Mnist
train_dataset = datasets.MNIST(root="./data",train=True,transform=transforms.ToTensor(),download=True)
test_dataset = datasets.MNIST(root="./data",train=False,transform=transforms.ToTensor())
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)
class CNNClassificationNet(nn.Module):
def __init__(self):
super(CNNClassificationNet, self).__init__()
self.conv1 = nn.Sequential(
nn.Conv2d(
in_channels = 1, # Mnist是灰度图 通道数为1
out_channels = 16,
kernel_size= 5,
stride = 1,
padding = 2,
), # 通过公式计算 这里输出图 为 16*28*28
nn.ReLU(),
nn.MaxPool2d(kernel_size=2) # 进行池化操作的区域为2*2,输出结果为 16*14*14
)
self.conv2 = nn.Sequential(
nn.Conv2d(16,32,5,1,2), # 输出 32*14*14
nn.ReLU(),
nn.MaxPool2d(2) # 输出 32*7*7
)
self.out = nn.Linear(32*7*7, 10)
def forward(self,x):
x = self.conv1(x)
x = self.conv2(x)
x = x.view(x.size(0), -1) # 展开flatten操作:batch_size * (32*7*7)
output = self.out(x)
return output
# 自定义一个 评估准确率的方法
def accuracy(predects, labels):
# 注意:torch.max(predects.data, 1) 这个方法 会返回两个值的元组 标识预测结果
preds = torch.max(predects.data, 1)[1]
rights = preds.eq(labels.data.view_as(preds)).sum()
return rights, len(labels)
net = CNNClassificationNet() # 实例化网络
criterion = nn.CrossEntropyLoss() # 损失函数
optimizer = optim.Adam(net.parameters(), lr=0.001) # 定义优化器 普通的随机梯度下降
for epoch in range(num_epochs):
train_rights = [] # 当前epoch的结果
for batch_idx,(data, target) in enumerate(train_loader):
net.train() # 训练模式
output = net(data)
loss = criterion(output, target)
optimizer.zero_grad() # 梯度清零
loss.backward() # 反向传播
optimizer.step() # 更新权重
right = accuracy(output, target) # 评估当前barch的 准确率
train_rights.append(right)
if batch_idx % 100 == 0:
net.eval() # 评估模式
val_rights = []
for (data, target) in test_loader:
output = net(data)
right = accuracy(output, target)
val_rights.append(right)
# 计算 训练集 和验证集中的准确率
train_r = (sum([tup[0] for tup in train_rights]), sum([tup[1] for tup in train_rights]))
val_r = (sum([tup[0] for tup in val_rights]), sum([tup[1] for tup in val_rights]))
# 训练进度
print("epoch = {} [{}/{}]".format(epoch, batch_idx * batch_size, len(train_loader.dataset)))
# 损失 和 准确率
print("loss = {:.6f}\t train_acc = {:.2f}\t val_acc = {:.2f}".format(
loss.data,
1.0 * train_r[0].numpy() / train_r[1],
1.0 * val_r[0].numpy() / val_r[1],
))
# 如果要保存模型 一般保存的就是 在验证集上表现最后的 这里就是准确率最高的,也就是说验证集是用来选模型的
epoch = 0 [0/60000]
loss = 2.334581 train_acc = 0.08 val_acc = 0.15
epoch = 0 [6400/60000]
loss = 0.193672 train_acc = 0.76 val_acc = 0.92
epoch = 0 [12800/60000]
loss = 0.136826 train_acc = 0.85 val_acc = 0.96
epoch = 0 [19200/60000]
loss = 0.195767 train_acc = 0.88 val_acc = 0.97
epoch = 0 [25600/60000]
loss = 0.181388 train_acc = 0.90 val_acc = 0.97
epoch = 0 [32000/60000]
loss = 0.131864 train_acc = 0.92 val_acc = 0.98
epoch = 0 [38400/60000]
loss = 0.151021 train_acc = 0.93 val_acc = 0.98
epoch = 0 [44800/60000]
loss = 0.035240 train_acc = 0.93 val_acc = 0.98
epoch = 0 [51200/60000]
loss = 0.071616 train_acc = 0.94 val_acc = 0.98
epoch = 0 [57600/60000]
loss = 0.076871 train_acc = 0.94 val_acc = 0.98
epoch = 1 [0/60000]
loss = 0.062000 train_acc = 0.97 val_acc = 0.98
epoch = 1 [6400/60000]
loss = 0.132351 train_acc = 0.98 val_acc = 0.98
epoch = 1 [12800/60000]
loss = 0.028411 train_acc = 0.98 val_acc = 0.98
epoch = 1 [19200/60000]
loss = 0.134709 train_acc = 0.98 val_acc = 0.99
epoch = 1 [25600/60000]
loss = 0.040207 train_acc = 0.98 val_acc = 0.99
epoch = 1 [32000/60000]
loss = 0.059077 train_acc = 0.98 val_acc = 0.99
epoch = 1 [38400/60000]
loss = 0.032424 train_acc = 0.98 val_acc = 0.98
epoch = 1 [44800/60000]
loss = 0.089060 train_acc = 0.98 val_acc = 0.99
epoch = 1 [51200/60000]
loss = 0.021472 train_acc = 0.98 val_acc = 0.99
epoch = 1 [57600/60000]
loss = 0.029995 train_acc = 0.98 val_acc = 0.98
epoch = 2 [0/60000]
loss = 0.014603 train_acc = 1.00 val_acc = 0.99
epoch = 2 [6400/60000]
loss = 0.034530 train_acc = 0.99 val_acc = 0.99
epoch = 2 [12800/60000]
loss = 0.020201 train_acc = 0.99 val_acc = 0.99
epoch = 2 [19200/60000]
loss = 0.037817 train_acc = 0.99 val_acc = 0.99
epoch = 2 [25600/60000]
loss = 0.068633 train_acc = 0.99 val_acc = 0.99
epoch = 2 [32000/60000]
loss = 0.110888 train_acc = 0.99 val_acc = 0.99
epoch = 2 [38400/60000]
loss = 0.142145 train_acc = 0.99 val_acc = 0.99
epoch = 2 [44800/60000]
loss = 0.006270 train_acc = 0.99 val_acc = 0.99
epoch = 2 [51200/60000]
loss = 0.012815 train_acc = 0.99 val_acc = 0.99
epoch = 2 [57600/60000]
loss = 0.019446 train_acc = 0.99 val_acc = 0.99
torchvision.datasets
常用数据集torchvision.models
经典网络模型的实现(支持预训练模型的加载,使用预训练模型更有益于收敛)具体使用 直接看官方文档;注意:torchvision 是单独安装;
torchvision.transforms
数据预处理模块,提供常用的标准化、数据增强方法import torchvision
from torchvision import transforms, models, datasets
train_transforms = transforms.Compose([
transforms.RandomRotation(45), # -45 ~45度之间 随机
transforms.CenterCrop(224), # 从中心开始剪切
transforms.RandomHorizontalFlip(p=0.5), # 随机水平翻转 p是概率
transforms.RandomVerticalFlip(p=0.5), # 随机垂直翻转 p是概率
# 参数依次为 亮度 对比度 饱和度 色相
transforms.ColorJitter(brightness=0.2, contrast=0.1, saturation=0.1, hue=0.1),
transforms.RandomGrayscale(p=0.025), # 概率转灰度图,多通道就相当于各通道值同
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225]) # 标准化 这里均值 标准差 是ImageNet计算好的
])
val_transforms = transforms.Compose([
transforms.Resize(256), # 256*256的缩放 这样可以加快验证
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485,0.456,0.406],[0.229,0.224,0.225])
])
# 被预处理过后的数据还原
image = tensor.to("cpu").clone().detach()# 载入cpu 拷贝 分离出来无梯度的张量
image = image.numpy().squeeze() # 转回numpy 去掉 空维度
image = image.transpose(1,2,0) # 交换维度:c*h*w => h*w*c
image = image * np.array((0.229,0.224,0.225)) + np.array((0.485,0.456,0.406)) # 逆归一化
image = image.clip(0,1) # 数据截取:比0小的设置为0 比1大的设置为1
使用迁移学习 有助于模型更快收敛
model = models.resnet152(pretrained=True)
,这里会有一个自动下载权重的过程(.pth
);set_parameter_requires_grad
方法;def set_parameter_requires_grad(model, feature_extaction):
if feature_extaction:
for param in model.parameters():
param.requires_grad = False # 取消梯度 不参与更新
# 拿到 原来对接全连接层的输入
num_ftrs = model.fc.in_features
# 替换原来的全连接层 修改输出节点数为10; 打印model可以查看具体层的名称
# Softmax(dim=1):对每一行操作 并使得每一行所有元素和为1
# LogSoftmax 就是对 Softmax取对数 值域为 ( − ∞ , 0 ]
model.fc = nn.Sequential(nn.Linear(num_ftrs, 10), nn.LogSoftmax(dim=1))
# 这里注意:
# 使用LogSoftmax激活函数,需要使用F.nll_loss计算损失
# 如果直接使用交叉熵计算损失 这里就不需要 LogSoftmax函数做激活
# 交叉熵 CrossEntropyLoss 相当于 LogSoftmax + NLLLoss
# 打印需要更新权重的层
params_to_update = []
for name, param in model.named_parameters():
if param.requires_grad == True:
print(name)
params_to_update.append(param)
optimizer = optim.Adam(params_to_update, lr=1e-2)
# 学习率动态衰减:每7个epoch 动态衰减为原来的0.1倍
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1)
# 对应LogSoftmax激活函数 损失函数需要使用 nn.NLLLoss
criterion = nn.NLLLoss()
# 具体的在训练过程中(或训练完成)的学习率 可以使用如下方式获取
LR = [optimizer.param_groups[0]['lr']]
# 获取模型参数并保存(举例)
best_model_wb = copy.deepcopy(model.state_dict())
state = {
'state_dict': model.state_dict(),
'best_acc':best_acc,
'optimizer':optimizer.state_dict()
}
torch.save(state, filepath)
# 训练过程中 可以通过 with语句 包括需要更新权重的范围
with torch.set_grad_enabled(True):
# 训练模式 需为True
model(inputs)
loss...
梯度清零、反向传播、参数更新
# 每个训练epoch结束后 需要计算针对当前验证数据集的loss
# 然后需要使用如下方法 完成学习率的衰减更新
scheduler.step(epoch_loss)
# 载入 权重(举例)
model.load_state_dict(best_model_wb) # 注意 这是个方法调用 不要在意返回值 也不要为model重新赋值
# 加载已保存的模型 继续训练
checkpoint = torch.load(filename)
best_acc = checkpoint['best_acc']
model.load_state_dict(checkpoint['state_dict'])
optimizer.load_state_dict(checkpoint['optimizer'])
# 预测数据
image = ...
model.eval()
if train_on_gpu:
output = model(image.cuda())
else:
output = model(image)
之所以省略是因为,文本处理有Bert,其他暂时用不到
使用目前正在用的模板,逐步进行理解、升级和优化即可。