提示:文章写完后,目录可以自动生成,如何生成可参考右边的帮助文档
对于CV领域深度学习十分重要,Resnet作为开创先河的深度残差学习框架对深度的训练起到推动作用,使100层甚至1000层的网络训练成为可能。在此进行Renset对CIFAR-10(32乘32大小,10类别)或CIFAR-100数据集(32乘32大小,100类别)的训练。
本项目使用Resnet18和Resnet50对CIFAR10数据集进行分类。
ResNet网络提出是在2015年,论文网址如下: https://arxiv.org/abs/1512.03385
Resnet之前的网络进行层之前的堆叠,发现会产生梯度爆炸或者梯度消失问题,即使使用batch normalization能一定程度上解决问题,但是发现正确率比低层的网络还低。很多人就会思考如果低层网络加上一些恒等映射的层构成深的网络,效果不应该下降才是,这是因为SGD无法寻找到这样的结果,需要在训练时进行一定的引导,所以提出了残差网络模型。
论文中18和34层的残差块和50层以上的不太一样,如下图所示:
这里运算的是所求H(x)与输入x的差值F(x)=H(x)+x,结果是F(x)+x,在进行梯度运算时,F(x)+x 可以将上一层的梯度传递下来,一定程度上减小了求导链式法则(越乘越小)的问题。
对于网络的更深入了解可以看李沐老师的讲解:
https://www.bilibili.com/video/BV1Fb4y1h73E?spm_id_from=333.999.0.0
ResidualBlock_low是对18和34层的残差块的设计,ResidualBlock_high是对50层的残差块的设计。不同之处在于self.left不一样,也就是上图的两种残差块,最后加上shortcut即可。残差块是对图片的通道数进行不断地变化。
虽然Pytorch中有集成好的ResNet模型(https://pytorch.org/hub/pytorch_vision_resnet/),但是自己手写更能增加对模型的认识。
对于CIFAR-100的训练要将num_classes=10改为100。
代码如下:
import torch
import torch.nn as nn
import torch.nn.functional as F
class ResidualBlock_low(nn.Module):
def __init__(self, inchannel, outchannel, stride=1):
super(ResidualBlock_low, self).__init__()
self.left = nn.Sequential(
nn.Conv2d(inchannel, outchannel, kernel_size=3, stride=stride, padding=1, bias=False),
nn.BatchNorm2d(outchannel),
nn.ReLU(inplace=True),
nn.Conv2d(outchannel, outchannel, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(outchannel)
)
self.shortcut = nn.Sequential()
if stride != 1 or inchannel != outchannel:
self.shortcut = nn.Sequential(
nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(outchannel)
)
def forward(self, x):
out = self.left(x)
out += self.shortcut(x)
out = F.relu(out)
return out
class ResidualBlock_high(nn.Module):
def __init__(self, inchannel, outchannel, stride=1):
super(ResidualBlock_high, self).__init__()
self.left = nn.Sequential(
nn.Conv2d(inchannel, int(outchannel/4), kernel_size=1, stride=stride, padding=0, bias=False),
nn.BatchNorm2d(int(outchannel/4)),
nn.ReLU(inplace=True),
nn.Conv2d(int(outchannel/4), int(outchannel/4), kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(int(outchannel/4)),
nn.ReLU(inplace=True),
nn.Conv2d(int(outchannel/4), outchannel, kernel_size=1, stride=1, padding=0, bias=False),
nn.BatchNorm2d(outchannel),
)
self.shortcut = nn.Sequential()
if stride != 1 or inchannel != outchannel:
self.shortcut = nn.Sequential(
nn.Conv2d(inchannel, outchannel, kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(outchannel)
)
def forward(self, x):
out = self.left(x)
y = self.shortcut(x)
out += self.shortcut(x)
out = F.relu(out)
return out
class ResNet_18(nn.Module):
def __init__(self, ResidualBlock, num_classes=10):
super(ResNet_18, self).__init__()
self.inchannel = 64
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
)
self.layer1 = self.make_layer(ResidualBlock, 64, 2, stride=1)
self.layer2 = self.make_layer(ResidualBlock, 128, 2, stride=2)
self.layer3 = self.make_layer(ResidualBlock, 256, 2, stride=2)
self.layer4 = self.make_layer(ResidualBlock, 512, 2, stride=2)
self.fc = nn.Linear(512, num_classes)
def make_layer(self, block, channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1) #strides=[1,1]
layers = []
for stride in strides:
layers.append(block(self.inchannel, channels, stride))
self.inchannel = channels
return nn.Sequential(*layers)
def forward(self, x):#3*32*32
out = self.conv1(x)#64*32*32
out = self.layer1(out)#64*32*32
out = self.layer2(out)#128*16*16
out = self.layer3(out)#256*8*8
out = self.layer4(out)#512*4*4
out = F.avg_pool2d(out, 4)#512*1*1
out = out.view(out.size(0), -1)#512
out = self.fc(out)
return out
class ResNet_34(nn.Module):
def __init__(self, ResidualBlock, num_classes=10):
super(ResNet_34, self).__init__()
self.inchannel = 64
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
)
self.layer1 = self.make_layer(ResidualBlock, 64, 3, stride=1)
self.layer2 = self.make_layer(ResidualBlock, 128, 4, stride=2)
self.layer3 = self.make_layer(ResidualBlock, 256, 6, stride=2)
self.layer4 = self.make_layer(ResidualBlock, 512, 3, stride=2)
self.fc = nn.Linear(512, num_classes)
def make_layer(self, block, channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1) #strides=[1,1]
layers = []
for stride in strides:
layers.append(block(self.inchannel, channels, stride))
self.inchannel = channels
return nn.Sequential(*layers)
def forward(self, x):#3*32*32
out = self.conv1(x)#64*32*32
out = self.layer1(out)#64*32*32
out = self.layer2(out)#128*16*16
out = self.layer3(out)#256*8*8
out = self.layer4(out)#512*4*4
out = F.avg_pool2d(out, 4)#512*1*1
out = out.view(out.size(0), -1)#512
out = self.fc(out)
return out
class ResNet_50(nn.Module):
def __init__(self, ResidualBlock, num_classes=10):
super(ResNet_50, self).__init__()
self.inchannel = 64
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(64),
nn.ReLU(),
)
self.layer1 = self.make_layer(ResidualBlock, 256, 3, stride=1)
self.layer2 = self.make_layer(ResidualBlock, 512, 4, stride=2)
self.layer3 = self.make_layer(ResidualBlock, 1024, 6, stride=2)
self.layer4 = self.make_layer(ResidualBlock, 2048, 3, stride=2)
self.fc = nn.Linear(512*4, num_classes)
#**************************
def make_layer(self, block, channels, num_blocks, stride):
strides = [stride] + [1] * (num_blocks - 1) #strides=[1,1]
layers = []
for stride in strides:
layers.append(block(self.inchannel, channels, stride))
self.inchannel = channels
return nn.Sequential(*layers)
def forward(self, x):#3*32*32
out = self.conv1(x)#64*32*32
out = self.layer1(out)#64*32*32
out = self.layer2(out)#128*16*16
out = self.layer3(out)#256*8*8
out = self.layer4(out)#512*4*4
out = F.avg_pool2d(out, 4)#512*1*1
#print(out.size())
out = out.view(out.size(0), -1)#512
out = self.fc(out)
return out
def ResNet18():
return ResNet_18(ResidualBlock_low)
def ResNet34():
return ResNet_34(ResidualBlock_low)
def ResNet50():
return ResNet_50(ResidualBlock_high)
这里对于lr的调整不是完全正确的,还是要看自己训练的时候的收敛情况自己调整。模型保存在model文件夹里,日志保存在text文件夹里。中间可以终止训练,利用torch.load(‘model/net_019.pth’)直接加载预训练模型。
代码如下:
from resnet import *
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
max_epoch = 50
pre_epoch = 0
batch_size = 64
best_acc = 0.85
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transform_train = transforms.Compose([
transforms.RandomCrop(32, padding=4), #先四周填充0,在吧图像随机裁剪成32*32
transforms.RandomHorizontalFlip(), #图像一半的概率翻转,一半的概率不翻转
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)), #R,G,B每层的归一化用到的均值和方差
])
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.4914, 0.4822, 0.4465), (0.2023, 0.1994, 0.2010)),
])
train_set = torchvision.datasets.CIFAR10(root='./data',train = True,download=False,transform=transform_train)
train_loader = DataLoader(train_set,batch_size=batch_size,shuffle=True)
test_set = torchvision.datasets.CIFAR10(root='./data',train=False,download=False,transform=transform_test)
test_loader = DataLoader(test_set,batch_size=batch_size,shuffle=False)
#model = ResNet18().to(device)
#model = torch.load('model/net_019.pth')
#model = ResNet34().to(device)
model = ResNet50().to(device)
criterion = nn.CrossEntropyLoss()
if __name__ == "__main__":
with open('text/acc.txt','w') as f:
with open('text/log.txt','w') as f2:
for epoch in range(pre_epoch,max_epoch):
if epoch < 15:
lr = 0.1
elif epoch<30:
lr = 0.01
else:
lr = 0.001
optimizer = optim.SGD(model.parameters(),lr=lr,momentum=0.9,weight_decay=5e-4)
model.train()
train_loss = []
train_accs = []
for i,data in enumerate(train_loader,0):
inputs, labels = data
inputs, labels = inputs.to(device), labels.to(device)
outputs = model(inputs)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
optimizer.step()
acc = (outputs.argmax(dim=-1) == labels.to(device)).float().mean()
train_loss.append(loss.item())
train_accs.append(acc)
if i % 20 == 0:
print('[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
% (epoch + 1, (i + 1 + epoch * len(train_loader)), sum(train_loss) / len(train_loss), 100. * sum(train_accs) / len(train_accs)))
f2.write(
'[epoch:%d, iter:%d] Loss: %.03f | Acc: %.3f%% '
% (epoch + 1, (i + 1 + epoch * len(train_loader)), sum(train_loss) / len(train_loss), 100. * sum(train_accs) / len(train_accs))
)
f2.write('\n')
f2.flush()
print("**************************Waiting Test!****************************")
model.eval()
valid_loss = []
valid_accs = []
for data in test_loader:
images, labels = data
images, labels = images.to(device), labels.to(device)
with torch.no_grad():
outputs = model(images)
loss = criterion(outputs, labels.to(device))
acc = (outputs.argmax(dim=-1) == labels.to(device)).float().mean()
valid_loss.append(loss.item())
valid_accs.append(acc)
valid_loss = sum(valid_loss) / len(valid_loss)
valid_acc = sum(valid_accs) / len(valid_accs)
print(f"[ Valid | {epoch + 1:03d}/{max_epoch:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")
torch.save(model, 'model/net_%03d.pth' % ( epoch + 1))
f.write("epoch=%03d,Accuracy= %.3f%%" % (epoch + 1, valid_acc))
f.write('\n')
f.flush()
if valid_acc > best_acc:
f3 = open("text/best_acc.txt", "w")
f3.write("EPOCH=%d,best_acc= %.3f%%" % (epoch + 1, valid_acc))
f3.close()
best_acc = valid_acc
本来想跑ImageNet数据集的,可奈何文件太大,无奈选择小的CIFAR100数据集。
16层的数据正确率实现了93%正确率,50层的还未训练结束。
迈入深度学习第一步!!