1.过拟合:泛化能力变差
目的:减少过拟合
使泛化函数的权重接近于0以减少模型复杂度
L2-regularization
device=torch.device('cuda:0')
net=MLP().to(device)
optimizer=optim.SGD(net.parameters(),lr=learning_rate,weight_decay=0.01)
criteon=nn.CrossEntropyLoss().to(device)
L1-regularization
#L1-regularization
regularization_loss=0
for param in model.parameters():
regularization_loss+=torch.sum(torch.abs(param))
classify_loss=criteon(logits,target)
loss=classify_loss+0.01*regularization_loss
optimizer.zero_grad()
loss.backward()
optimizer.step()
动量
##momentum
optimizer=torch.optim.SGD(model.parameters(),args.lr,momentum=args.momentum,weight_decay=args.weight_decay)
scheduler=ReduceLROnPlateau(optimizer,'min')
for epoch in xrange(args.start_epoch,args.epochs):
train(train_loader,model,criterion,optimizer,epoch)
result_avg,loss_val=validate(val_loader,model,criteon,epoch)
scheduler.step(loss_val)
学习率
###learningrate 学习率监听方案
##方案一,每衰减一次,监听一次
optimizer=torch.optim.SGD(model.parameters(),args.lr,momentum=args.momentum,weight_decay=args.weight_decay)
scheduler=ReduceLROnPlateau(optimizer,'min')
for epoch in xrange(args.start_epoch,args.epochs):
train(train_loader,model,criterion,optimizer,epoch)
result_avg,loss_val=validate(val_loader,model,criterion,epoch)
scheduler.step(loss_val)
##方案二
###规定步进,
scheduler=StepLR(optimizer,step_size=30,gamma=0.1)
for epoch in range(100):
scheduler.step()
train(...)
validate(...)
1.dropout
作用:learning less to learn better
Each connection has p=[0,1] to lose
##添加Dropout
net_dropped=torch.nn.Sequential(
torch.nn.Linear(784,200),
torch.nn.Dropout(0.5),
torch.nn.ReLU(),
torch.nn.Linear(200,200),
torch.nn.Dropout(0.5),
torch.nn.ReLU(),
torch.nn.Linear(200,10),
)
Clarification
torch.nn.Dropput(p=droppit_prob)
断掉的概率,P=0.1断掉的概率为0.1
tf.nn.droppit(keep_prob)
保持的概率,P=0.1,断掉的概率为0.9
Dropput在做test的时候要把连接全部用上,然后做test
for epoch in range(epochs):
#train
net_dropped.train()
for batch_idx,(data,target) in enumerate(train_loader):
...
##将断开的连接再 连上
net_dropped.eval()
test_loss=0
correct=0
for data,target in test_loader:
...
Stochastic Gradient Descent
Stochastic
not random!,符合f(x)→N(0,x)
Deterministic
##2维卷积层
layer=nn.Conv2d(1,3,kernel_size=3,stride=1,padding=0)
x=torch.rand(1,1,28,28)
out=layer.forward(x)
torch.Size([1,3,26,26])
layer=nn.Conv2d(1,3,kernel_size=3,stride=1,padding=1)
out=layer.forward(x)
torch.Size([1,3,28,28])
##size折半,可以起到降维的过程
layer=nn.Conv2d(1,3,KERNAL_SIZE=3,stride=2,padding=1)
out=layer.forward(x)
torch.Size([1,3,14,14])
out=layer(x)
torch.Size([1,3,14,14])
另一种方式调用卷积神经网络
###另一种调用接口
w=torch.rand(16,3,5,5)
b=torch.rand(16)
out=F.conv2d(x,w,b,stride=1,padding=1)
x=torch.randn(1,3,28,28)
out=F.conv2d(x,w,b,stride=1,padding=1)
out=F.conv2d(x,w,b,stride=2,padding=2)
BatchNormalize
收敛速度快
最优解
鲁棒性:稳定、更大的学习率
##BatchNormalization
y=torch.rand(100,16,784)
layer=nn.BatchNormid(16)
##μ
print(layer.running_mean)
##σ^2
print(layer.running_val)
深度残差网络结构
#!usr/bin/env python
# -*- coding:utf-8 _*-
"""
@author: JMS
@file: RESNET_1.py
@time: 2022/08/13
@desc:
"""
class ResBlk(nn.Module):
def __init__(self,ch_in,ch_out):
self.conv1=nn.Conv2d(ch_in,ch_out,kernel_size=3,stride=1,padding=1)
self.bn1=nn.BatchNorm2d(ch_out)
self.conv2=nn.Conv2d(ch_out,ch_out,kernel_size=3,stride=1,padding=1)
self.bn2=nn.BatchNorm2d(ch_out)
##如果ch_in!=ch_out,则将ch_in补全为ch_out
self.extra=nn.Sequential()
if ch_out!=ch_in:
self.extra=nn.Sequential(nn.Conv2d(ch_in,ch_out,kernel_size=1,stride=1),nn.BatchNorm2d(ch_out))
def forward(self,x):
out=F.relu(self.bn1(self.conv1(x)))
out=self.bn2(self.conv2(out))
out=self.extra(x)+out
return out
类nn.Module
提供基本类:nn.Linear\nn.BatchNorm2d\nn.Conv2d
nn.Module可以嵌套如nn.Module
优点:
1.包含神经网络模块:
Linear\ReLU\ Sigmoid \Conv2d \ ConvTransposed2d \ Dropout 等
2.Container
可以调用自己的类或默认的类,利用self.net(x)自动完成多次操作
3.parameters
对参数进行有效地管理
4.modules
可以直接使用直系子模块
class BasicNet(nn.Module):
def __init__(self):
super(BasicNet,self).__init__()
self.net=nn.Linear(4,3)
def forward(self,x):
return self.net(x)
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.net=nn.Sequential(BasicNet(),nn.ReLU(),nn.Linear(3,2))
def forward(self,x):
return self.net(x)
5.to(device)
转移到cuda 或者 gpu上
device=torch.device('cuda')
net=Net()
net.to(device)
6.save and load
加载和保存
##加载
net.load_state_dict(torch.load('ckpt.md1'))
##保存
torch.save(net.state_dict(),'ckpt.md1')
7.train/test
##train
net.train()
##test
net.eval()
8.实现自己的layer
class MyLinear(nn.Module):
def __init__(self,inp,outp):
super(MyLinear,self).__init__()
##requires_grad=True
self.w=nn.Parameter(torch.randn(doutp,inp))
self.b=nn.Parameter(torch.randn(outp))
def forward(self,x):
x=x@self.w.t()+self.b
return x
以图片数据为例
1.翻转 Flip
import torch.utils.data
train_loader=torch.utils.data.DataLoader(
datasets.MNIST('../data',train=True,download=True,
## 把所有的操作打包成一个操作
tramsform=transforms.Compose([
transforms.RandomHorizontalFlip(),
##水平翻转
transforms.RandomVerticalFlip(),
##竖直翻转
transfroms.ToTensor(),
])),
batch_size=batch_size,shuffle=True)
2.旋转 Rotate
train_loader=torch.utils.data.DataLoader(
datasets.MNIST('../data',train=True,download=True,
tramsform=transforms.Compose([
transforms.RandomRotation(15),##+15°,-15°
transforms.Ramdp,Rptatopm([90,150,270])
##竖直翻转
transfroms.ToTensor(),
])),
batch_size=batch_size,shuffle=True)
3.缩放Scale
train_loader=torch.utils.data.DataLoader(
datasets.MNIST('../data',train=True,download=True,
tramsform=transforms.Compose([
transforms.Resize([32,32]),
transfroms.ToTensor(),
])),
batch_size=batch_size,shuffle=True)
4.裁剪部分Crop Part
train_loader=torch.utils.data.DataLoader(
datasets.MNIST('../data',train=True,download=True,
tramsform=transforms.Compose([
transforms.RandomCrop([28,28]),
transfroms.ToTensor(),
])),
batch_size=batch_size,shuffle=True)
5.Noise 加入噪声