# 定义一个可以设置随机种子的函数
def setup_seed(seed):
torch.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
np.random.seed(seed)
random.seed(seed)
torch.backends.cudnn.deterministic = True
# 设置随机数种子
setup_seed(1234)
# 预处理数据以及训练模型
# ...
# ...
如果仅用到pytorch,我认为是没有必要给numpy、random这些库同时也设置上随机种子的,同时我也不知道、不确定torch.backends.cudnn.deterministic = True这行代码有何作用,因为我在调用的时候报错了!总之,如果只是用到pytorch,那么我会调用下面的代码:
import torch
# 指定seed
m_seed = 20
# 设置seed
torch.manual_seed(m_seed)
torch.cuda.manual_seed_all(m_seed)
为了测试pytorch设置随机种子是否有效,我写了以下两份代码,以供大家参考学习。
①简单梯度下降demo
import torch
# 设置torch打印选项
torch.set_printoptions(linewidth=2048, precision=4, sci_mode=False)
# 指定seed
m_seed = 20
# 设置seed
torch.manual_seed(m_seed)
torch.cuda.manual_seed_all(m_seed)
# 测试seed是否起作用(简单梯度下降小例子)
# 一个简单的二次函数
def x_pow(x):
return (x - 2) * (x - 2) - 2.0
# 指定训练目标
objective = -2.0
# 随机生成一个训练参数(这里的训练参数仅有x)
random_value = 100 * torch.rand(1, 1)
print('random value: ', random_value.item())
parameter = torch.tensor(random_value, requires_grad=True)
print('parameter: ', parameter)
# 设置优化器
optimizer = torch.optim.Adam([parameter], lr=0.001)
for epoch in range(70001):
# 前向计算损失
loss = x_pow(parameter) - objective
optimizer.zero_grad()
# 反向更新参数
loss.backward()
optimizer.step()
if epoch % 10000 == 0:
print('epoch[%d], loss: %f' % (epoch, loss.item()))
# 打印优化后的最终参数
print('parameter (after optimize): ', parameter)
执行第1次,输出
random value: 56.14835739135742
parameter: tensor([[56.1484]], requires_grad=True)
epoch[0], loss: 2932.044678
epoch[10000], loss: 1963.557739
epoch[20000], loss: 1194.294312
epoch[30000], loss: 618.942871
epoch[40000], loss: 235.499451
epoch[50000], loss: 38.484642
epoch[60000], loss: 0.000442
epoch[70000], loss: 0.000000
parameter (after optimize): tensor([[2.0000]], requires_grad=True)
执行第2次,输出
random value: 56.14835739135742
parameter: tensor([[56.1484]], requires_grad=True)
epoch[0], loss: 2932.044678
epoch[10000], loss: 1963.557739
epoch[20000], loss: 1194.294312
epoch[30000], loss: 618.942871
epoch[40000], loss: 235.499451
epoch[50000], loss: 38.484642
epoch[60000], loss: 0.000442
epoch[70000], loss: 0.000000
parameter (after optimize): tensor([[2.0000]], requires_grad=True)
可以观察到这2次输出结果相同,那么证明了给pytorch设置随机种子确实起了作用。但是有时候我们需要用到dataloader,设置随机种子能否做到让dataloader加载的数据序列也是一样的呢?请看我写的第二份代码。
②Dataloader加载数据demo
import torch
from torch.utils.data import DataLoader, Dataset
# 设置torch打印选项(暂时与本博客的主题无关,可忽略)
torch.set_printoptions(linewidth=2048, precision=4, sci_mode=False)
# 指定seed
m_seed = 20
# 设置seed
torch.manual_seed(m_seed)
torch.cuda.manual_seed_all(m_seed)
# 定义一个dataset
class DIYDataset(Dataset):
def __init__(self):
self.n = 6
self.x_data = torch.tensor([1, 2, 4, 9, 7, 8]) # 训练数据特征
self.y_data = torch.tensor([0, 0, 0, 1, 1, 1]) # 训练数据标签
def __getitem__(self, index):
return self.x_data[index], self.y_data[index]
def __len__(self):
return self.n
# 创建一个dataset对象
diy_dataset = DIYDataset()
# 创建一个dataloader对象
loader = DataLoader(dataset=diy_dataset, batch_size=4, shuffle=True)
# 模拟训练:不断地加载数据
for epoch in range(4):
for each_batch, each_batch_data in enumerate(loader):
x, label = each_batch_data[0].tolist(), each_batch_data[1].tolist()
print('the %dth batch:' % each_batch, x, label, end=' ')
print()
执行第1次,输出
the 0th batch: [1, 4, 7, 9] [0, 0, 1, 1] the 1th batch: [2, 8] [0, 1]
the 0th batch: [7, 9, 8, 2] [1, 1, 1, 0] the 1th batch: [4, 1] [0, 0]
the 0th batch: [8, 1, 9, 4] [1, 0, 1, 0] the 1th batch: [7, 2] [1, 0]
the 0th batch: [8, 1, 9, 2] [1, 0, 1, 0] the 1th batch: [7, 4] [1, 0]
执行第2次,输出
the 0th batch: [1, 4, 7, 9] [0, 0, 1, 1] the 1th batch: [2, 8] [0, 1]
the 0th batch: [7, 9, 8, 2] [1, 1, 1, 0] the 1th batch: [4, 1] [0, 0]
the 0th batch: [8, 1, 9, 4] [1, 0, 1, 0] the 1th batch: [7, 2] [1, 0]
the 0th batch: [8, 1, 9, 2] [1, 0, 1, 0] the 1th batch: [7, 4] [1, 0]
两次输出结果相同,再次证明了给pytorch设置随机种子确实起了作用。
pytorch用同一个随机种子,使每次训练结果相同
DataLoader中的shuffer与随机种子