导入常用的包
import os
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset,DataLoader
import torch.optim as optimizer
##此外还有比较
使用GPU设置
device = torch.device("cuda:1" if torch.cuda.is_available)
# 方案1常用:使用“device”
os.environ['CUDA_VISBLE_DEVICE'] = '0,1'
# 方案一 直接全局GPU设置
class MyDataset(Dataset):
def __init__(self,df):
def __getitem__(self, index):
## nlp任务最好在Dataset中就写好token
return dict
def __len__(self):
return len(self.df)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=4, shuffle=True, drop_last=True)
val_loader = torch.utils.data.DataLoader(val_data, batch_size=batch_size, num_workers=4, shuffle=False)
某些时候我们可以定义一些不含参数的层,这种一般直接写计算的python语句就好了
像这样就能直接定义一些带参数的层,一般都是自己调包~
class MyDictDense(nn.Module):
def __init__(self):
super(MyDictDense, self).__init__()
self.params = nn.ParameterDict({
'linear1': nn.Parameter(torch.randn(4, 4)),
'linear2': nn.Parameter(torch.randn(4, 1))
})
self.params.update({'linear3': nn.Parameter(torch.randn(4, 2))}) # 新增
def forward(self, x, choice='linear1'):
return torch.mm(x, self.params[choice])
net = MyDictDense()
print(net)
二维卷积层手写理解
import torch
from torch import nn
# 卷积运算(二维互相关)
def corr2d(X, K): ##假设X(3,3),k(2,2)
h, w = K.shape#(h=2,k=2)
X, K = X.float(), K.float()
Y = torch.zeros((X.shape[0] - h + 1, X.shape[1] - w + 1))##Y(2,2)
for i in range(Y.shape[0]):#i=0
for j in range(Y.shape[1]):#j=0
Y[i, j] = (X[i: i + h, j: j + w] * K).sum()#i行步长为2,j列步长2,扫描出一个跟k一样的矩阵然后和k做乘法再求和
return Y
# 二维卷积层
class Conv2D(nn.Module):
def __init__(self, kernel_size):
super(Conv2D, self).__init__()
#这里卷积核是可更新的参数矩阵
self.weight = nn.Parameter(torch.randn(kernel_size))
self.bias = nn.Parameter(torch.randn(1))
def forward(self, x):
return corr2d(x, self.weight) + self.bias
池化层(很好理解了)
import torch
from torch import nn
def pool2d(X, pool_size, mode='max'):
p_h, p_w = pool_size
Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1))
for i in range(Y.shape[0]):
for j in range(Y.shape[1]):
if mode == 'max':
Y[i, j] = X[i: i + p_h, j: j + p_w].max()
elif mode == 'avg':
Y[i, j] = X[i: i + p_h, j: j + p_w].mean()
return Y
模型形状pytorch都帮我们写好了~~
同样模型参数初始化往往有许多的方法
大伙往往这样写
def initialize_weights(self):
##将整个网络层遍历
for m in self.modules():
# 判断是否属于Conv2d
if isinstance(m, nn.Conv2d):
torch.nn.init.xavier_normal_(m.weight.data)
# 判断是否有偏置
if m.bias is not None:
torch.nn.init.constant_(m.bias.data,0.3)
elif isinstance(m, nn.Linear):
torch.nn.init.normal_(m.weight.data, 0.1)
if m.bias is not None:
torch.nn.init.zeros_(m.bias.data)
elif isinstance(m, nn.BatchNorm2d):
m.weight.data.fill_(1)
m.bias.data.zeros_()
当我们定义一个网络并创建实例 他就会自动实例化
class MLP(nn.Module):
def __init__(self, **kwargs):
super(MLP, self).__init__(**kwargs)
self.hidden = nn.Conv2d(1,1,3)
self.act = nn.ReLU()
self.output = nn.Linear(10,1)
def forward(self, x):
h = self.act(self.hidden(x))
return self.output(h)
这时我们再次调用初始化参数的方法
参数又就会发生变化
torch.nn.BCELoss(weight=None, size_average=None, reduce=None, reduction='mean')
torch.nn.CrossEntropyLoss(weight=None, size_average=None, ignore_index=-100, reduce=None, reduction='mean')
torch.nn.L1Loss(size_average=None, reduce=None, reduction='mean')
torch.nn.MSELoss(size_average=None, reduce=None, reduction='mean')
torch.nn.SmoothL1Loss(size_average=None, reduce=None, reduction='mean', beta=1.0)
整整数十种优化器,用哪种好呢?
基本的optim都有
class Optimizer(object):
def __init__(self, params, defaults):
self.defaults = defaults
##{'lr': 0.1, 'momentum': 0.9, 'dampening': 0, 'weight_decay': 0, 'nesterov': False}
self.state = defaultdict(dict)
##参数的缓存
self.param_groups = []
##管理的参数组,是一个list,其中每个元素是一个字典,顺序是params,lr,momentum,dampening,weight_decay,nesterov
optim的方法
optim.zero_grad()#清空梯度
optim.step()#参数更新
optim.state_dict()#获取参数字典信息
optim.load_state_dict()#加载状态参数字典,继续上次的参数进行训练
weight = weight - learning_rate * gradient
###代码版 伪代码
Class mymodel(nn.Mudule):
def __init__(self):
pass
def forward(self):
pass
return outputs
##loop
def train_loop(train_loader,optimizer):
for batch in train_loader:
batch = batch.to(CFG.device)
optimizer.zero_grad()
loss = model(**batch)
loss.backward()
optimizer.step()
train_loss += loss.item()*data.size(0)
train_loss = train_loss/len(train_loader.dataset)
print('Epoch: {} \tTraining Loss: {:.6f}'.format(epoch, train_loss))
def valid_loop(test_loader):
with torch.no_grad():
for batch in val_loader:
batch = batch.to(CFG.device)
loss, y_preds = model(**batch)
val_loss = val_loss/len(val_loader.dataset)
print('Epoch: {} \valid Loss: {:.6f}'.format(epoch, val_loss))