pytorch代码。结合VGG16的网络结构非常便于理解。直接将这几个网络一网打尽了。最下面就抽SegNet为例做个解释,其他的照猫画虎即可。
import torch
import torch.nn as nn
import torch.nn.init as init
import torch.nn.functional as F
from torch.utils import model_zoo
from torchvision import models
class FCN8(nn.Module):
def __init__(self, num_classes):
super().__init__()
feats = list(models.vgg16(pretrained=True).features.children())
self.feats = nn.Sequential(*feats[0:9])
self.feat3 = nn.Sequential(*feats[10:16])
self.feat4 = nn.Sequential(*feats[17:23])
self.feat5 = nn.Sequential(*feats[24:30])
for m in self.modules():
if isinstance(m, nn.Conv2d):
m.requires_grad = False
self.fconn = nn.Sequential(
nn.Conv2d(512, 4096, 7),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Conv2d(4096, 4096, 1),
nn.ReLU(inplace=True),
nn.Dropout(),
)
self.score_feat3 = nn.Conv2d(256, num_classes, 1)
self.score_feat4 = nn.Conv2d(512, num_classes, 1)
self.score_fconn = nn.Conv2d(4096, num_classes, 1)
def forward(self, x):
feats = self.feats(x)
feat3 = self.feat3(feats)
feat4 = self.feat4(feat3)
feat5 = self.feat5(feat4)
fconn = self.fconn(feat5)
score_feat3 = self.score_feat3(feat3)
score_feat4 = self.score_feat4(feat4)
score_fconn = self.score_fconn(fconn)
score = F.upsample_bilinear(score_fconn, score_feat4.size()[2:])
score += score_feat4
score = F.upsample_bilinear(score, score_feat3.size()[2:])
score += score_feat3
return F.upsample_bilinear(score, x.size()[2:])
class FCN16(nn.Module):
def __init__(self, num_classes):
super().__init__()
feats = list(models.vgg16(pretrained=True).features.children())
self.feats = nn.Sequential(*feats[0:16])
self.feat4 = nn.Sequential(*feats[17:23])
self.feat5 = nn.Sequential(*feats[24:30])
self.fconn = nn.Sequential(
nn.Conv2d(512, 4096, 7),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Conv2d(4096, 4096, 1),
nn.ReLU(inplace=True),
nn.Dropout(),
)
self.score_fconn = nn.Conv2d(4096, num_classes, 1)
self.score_feat4 = nn.Conv2d(512, num_classes, 1)
def forward(self, x):
feats = self.feats(x)
feat4 = self.feat4(feats)
feat5 = self.feat5(feat4)
fconn = self.fconn(feat5)
score_feat4 = self.score_feat4(feat4)
score_fconn = self.score_fconn(fconn)
score = F.upsample_bilinear(score_fconn, score_feat4.size()[2:])
score += score_feat4
return F.upsample_bilinear(score, x.size()[2:])
class FCN32(nn.Module):
def __init__(self, num_classes):
super().__init__()
self.feats = models.vgg16(pretrained=True).features
self.fconn = nn.Sequential(
nn.Conv2d(512, 4096, 7),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Conv2d(4096, 4096, 1),
nn.ReLU(inplace=True),
nn.Dropout(),
)
self.score = nn.Conv2d(4096, num_classes, 1)
def forward(self, x):
feats = self.feats(x)
fconn = self.fconn(feats)
score = self.score(fconn)
return F.upsample_bilinear(score, x.size()[2:])
class UNetEnc(nn.Module):
def __init__(self, in_channels, features, out_channels):
super().__init__()
self.up = nn.Sequential(
nn.Conv2d(in_channels, features, 3),
nn.ReLU(inplace=True),
nn.Conv2d(features, features, 3),
nn.ReLU(inplace=True),
nn.ConvTranspose2d(features, out_channels, 2, stride=2),
nn.ReLU(inplace=True),
)
def forward(self, x):
return self.up(x)
class UNetDec(nn.Module):
def __init__(self, in_channels, out_channels, dropout=False):
super().__init__()
layers = [
nn.Conv2d(in_channels, out_channels, 3),
nn.ReLU(inplace=True),
nn.Conv2d(out_channels, out_channels, 3),
nn.ReLU(inplace=True),
]
if dropout:
layers += [nn.Dropout(.5)]
layers += [nn.MaxPool2d(2, stride=2, ceil_mode=True)]
self.down = nn.Sequential(*layers)
def forward(self, x):
return self.down(x)
class UNet(nn.Module):
def __init__(self, num_classes):
super().__init__()
self.dec1 = UNetDec(3, 64)
self.dec2 = UNetDec(64, 128)
self.dec3 = UNetDec(128, 256)
self.dec4 = UNetDec(256, 512, dropout=True)
self.center = nn.Sequential(
nn.Conv2d(512, 1024, 3),
nn.ReLU(inplace=True),
nn.Conv2d(1024, 1024, 3),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.ConvTranspose2d(1024, 512, 2, stride=2),
nn.ReLU(inplace=True),
)
self.enc4 = UNetEnc(1024, 512, 256)
self.enc3 = UNetEnc(512, 256, 128)
self.enc2 = UNetEnc(256, 128, 64)
self.enc1 = nn.Sequential(
nn.Conv2d(128, 64, 3),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, 3),
nn.ReLU(inplace=True),
)
self.final = nn.Conv2d(64, num_classes, 1)
def forward(self, x):
dec1 = self.dec1(x)
dec2 = self.dec2(dec1)
dec3 = self.dec3(dec2)
dec4 = self.dec4(dec3)
center = self.center(dec4)
enc4 = self.enc4(torch.cat([
center, F.upsample_bilinear(dec4, center.size()[2:])], 1))
enc3 = self.enc3(torch.cat([
enc4, F.upsample_bilinear(dec3, enc4.size()[2:])], 1))
enc2 = self.enc2(torch.cat([
enc3, F.upsample_bilinear(dec2, enc3.size()[2:])], 1))
enc1 = self.enc1(torch.cat([
enc2, F.upsample_bilinear(dec1, enc2.size()[2:])], 1))
return F.upsample_bilinear(self.final(enc1), x.size()[2:])
class SegNetEnc(nn.Module):
def __init__(self, in_channels, out_channels, num_layers):
super().__init__()
layers = [
nn.Conv2d(in_channels, in_channels // 2, 3, padding=1),
nn.BatchNorm2d(in_channels // 2),
nn.ReLU(inplace=True),
]
layers += [
nn.Conv2d(in_channels // 2, in_channels // 2, 3, padding=1),
nn.BatchNorm2d(in_channels // 2),
nn.ReLU(inplace=True),
] * num_layers
layers += [
nn.Conv2d(in_channels // 2, out_channels, 3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
]
self.encode = nn.Sequential(*layers)
def forward(self, x):
return self.encode(x)
class SegNet(nn.Module):
def __init__(self, classes):
super().__init__()
vgg16 = models.vgg16(pretrained=True)
features = vgg16.features
self.dec1 = features[0: 3]
self.dec2 = features[5: 8]
self.dec3 = features[10: 15]
self.dec4 = features[17: 22]
self.dec5 = features[24: 29]
for m in self.modules():
if isinstance(m, nn.Conv2d):
m.requires_grad = False
self.enc5 = SegNetEnc(512, 512, 1)
self.enc4 = SegNetEnc(512, 256, 1)
self.enc3 = SegNetEnc(256, 128, 1)
self.enc2 = SegNetEnc(128, 64, 0)
self.final = nn.Sequential(*[
nn.Conv2d(64, classes, 3, padding=1),
nn.BatchNorm2d(classes),
nn.ReLU(inplace=True)
])
def forward(self, x):
x1 = self.dec1(x)
d1, m1 = F.max_pool2d(x1, kernel_size=2, stride=2, return_indices=True)
x2 = self.dec2(d1)
d2, m2 = F.max_pool2d(x2, kernel_size=2, stride=2, return_indices=True)
x3 = self.dec3(d2)
d3, m3 = F.max_pool2d(x3, kernel_size=2, stride=2, return_indices=True)
x4 = self.dec4(d3)
d4, m4 = F.max_pool2d(x4, kernel_size=2, stride=2, return_indices=True)
x5 = self.dec5(d4)
d5, m5 = F.max_pool2d(x5, kernel_size=2, stride=2, return_indices=True)
def upsample(d):
e5 = self.enc5(F.max_unpool2d(d, m5, kernel_size=2, stride=2, output_size=x5.size()))
e4 = self.enc4(F.max_unpool2d(e5, m4, kernel_size=2, stride=2, output_size=x4.size()))
e3 = self.enc3(F.max_unpool2d(e4, m3, kernel_size=2, stride=2, output_size=x3.size()))
e2 = self.enc2(F.max_unpool2d(e3, m2, kernel_size=2, stride=2, output_size=x2.size()))
e1 = F.max_unpool2d(e2, m1, kernel_size=2, stride=2, output_size=x1.size())
return e1
e = upsample(d5)
return self.final(e)#最终上采样结果做一次卷积+BatchNorm2d+ReLU。#直接卷到类别数 用relu函数没用softmax函数做激励函数呢
class PSPDec(nn.Module):
def __init__(self, in_features, out_features, downsize, upsize=60):
super().__init__()
self.features = nn.Sequential(
nn.AvgPool2d(downsize, stride=downsize),
nn.Conv2d(in_features, out_features, 1, bias=False),
nn.BatchNorm2d(out_features, momentum=.95),
nn.ReLU(inplace=True),
nn.UpsamplingBilinear2d(upsize)
)
def forward(self, x):
return self.features(x)
class PSPNet(nn.Module):
def __init__(self, num_classes):
super().__init__()
'''
self.conv1 = nn.Sequential(
nn.Conv2d(3, 64, 3, stride=2, padding=1, bias=False),
nn.BatchNorm2d(64, momentum=.95),
nn.ReLU(inplace=True),
nn.Conv2d(64, 64, 3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(64, momentum=.95),
nn.ReLU(inplace=True),
nn.Conv2d(64, 128, 3, stride=1, padding=1, bias=False),
nn.BatchNorm2d(128, momentum=.95),
nn.ReLU(inplace=True),
nn.MaxPool2d(3, stride=2, padding=1),
)
'''
resnet = models.resnet101(pretrained=True)
self.conv1 = resnet.conv1
self.layer1 = resnet.layer1
self.layer2 = resnet.layer2
self.layer3 = resnet.layer3
self.layer4 = resnet.layer4
for m in self.modules():
if isinstance(m, nn.Conv2d):
m.stride = 1
m.requires_grad = False
if isinstance(m, nn.BatchNorm2d):
m.requires_grad = False
self.layer5a = PSPDec(2048, 512, 60)
self.layer5b = PSPDec(2048, 512, 30)
self.layer5c = PSPDec(2048, 512, 20)
self.layer5d = PSPDec(2048, 512, 10)
self.final = nn.Sequential(
nn.Conv2d(2048, 512, 3, padding=1, bias=False),
nn.BatchNorm2d(512, momentum=.95),
nn.ReLU(inplace=True),
nn.Dropout(.1),
nn.Conv2d(512, num_classes, 1),
)
def forward(self, x):
print('x', x.size())
x = self.conv1(x)
print('conv1', x.size())
x = self.layer1(x)
print('layer1', x.size())
x = self.layer2(x)
print('layer2', x.size())
x = self.layer3(x)
print('layer3', x.size())
x = self.layer4(x)
print('layer4', x.size())
x = self.final(torch.cat([
x,
self.layer5a(x),
self.layer5b(x),
self.layer5c(x),
self.layer5d(x),
], 1))
print('final', x.size())
return F.upsample_bilinear(final, x.size()[2:])
想理解代码需要配到VGG16的网络结构来看
VGG(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace)
(2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(3): ReLU(inplace)
(4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(6): ReLU(inplace)
(7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU(inplace)
(9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): ReLU(inplace)
(12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(13): ReLU(inplace)
(14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): ReLU(inplace)
(16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): ReLU(inplace)
(19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(20): ReLU(inplace)
(21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(22): ReLU(inplace)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): ReLU(inplace)
(26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(27): ReLU(inplace)
(28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(29): ReLU(inplace)
(30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=4096, bias=True)
(1): ReLU(inplace)
(2): Dropout(p=0.5)
(3): Linear(in_features=4096, out_features=4096, bias=True)
(4): ReLU(inplace)
(5): Dropout(p=0.5)
(6): Linear(in_features=4096, out_features=1000, bias=True)
)
)
对网络的理解 以SegNet为例吧
网络示意图
class SegNetEnc(nn.Module):
def __init__(self, in_channels, out_channels, num_layers):
super().__init__()
layers = [
nn.Conv2d(in_channels, in_channels // 2, 3, padding=1),
nn.BatchNorm2d(in_channels // 2),
nn.ReLU(inplace=True),
]
layers += [
nn.Conv2d(in_channels // 2, in_channels // 2, 3, padding=1),
nn.BatchNorm2d(in_channels // 2),
nn.ReLU(inplace=True),
] * num_layers
layers += [
nn.Conv2d(in_channels // 2, out_channels, 3, padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True),
]
self.encode = nn.Sequential(*layers)
def forward(self, x):
return self.encode(x)#三次卷积+BatchNorm2d+ReLU操作呗。对应示意图右侧卷积部分
class SegNet(nn.Module):
def __init__(self, classes):
super().__init__()
vgg16 = models.vgg16(pretrained=True)
features = vgg16.features
self.dec1 = features[0: 3]#VGG16的第一次卷积
self.dec2 = features[5: 8]#VGG16的第二次卷积
self.dec3 = features[10: 15]#VGG16第三次卷积
self.dec4 = features[17: 22]#VGG16第四次卷积
self.dec5 = features[24: 29]#VGG16第五次卷积
for m in self.modules():
if isinstance(m, nn.Conv2d):
m.requires_grad = False
self.enc5 = SegNetEnc(512, 512, 1)
self.enc4 = SegNetEnc(512, 256, 1)
self.enc3 = SegNetEnc(256, 128, 1)
self.enc2 = SegNetEnc(128, 64, 0)
self.final = nn.Sequential(*[
nn.Conv2d(64, classes, 3, padding=1),
nn.BatchNorm2d(classes),
nn.ReLU(inplace=True)
])
def forward(self, x):
#VGG16第一次卷积 这里面缺少一个BatchNormal步骤。很奇怪 应该是卷积完事后+BatchNormal + Relu才对 这个相当于简化了
x1 = self.dec1(x)
d1, m1 = F.max_pool2d(x1, kernel_size=2, stride=2, return_indices=True)#池化 d1 池化结果 m1 标签结果 下面依次类推
x2 = self.dec2(d1)
d2, m2 = F.max_pool2d(x2, kernel_size=2, stride=2, return_indices=True)
x3 = self.dec3(d2)
d3, m3 = F.max_pool2d(x3, kernel_size=2, stride=2, return_indices=True)
x4 = self.dec4(d3)
d4, m4 = F.max_pool2d(x4, kernel_size=2, stride=2, return_indices=True)
x5 = self.dec5(d4)
d5, m5 = F.max_pool2d(x5, kernel_size=2, stride=2, return_indices=True)
#上采样过程。先对最小图上采样池化,然后将其作为下一次上采样输入,最终得到原图大小的分割结果
def upsample(d):
e5 = self.enc5(F.max_unpool2d(d, m5, kernel_size=2, stride=2, output_size=x5.size()))
e4 = self.enc4(F.max_unpool2d(e5, m4, kernel_size=2, stride=2, output_size=x4.size()))
e3 = self.enc3(F.max_unpool2d(e4, m3, kernel_size=2, stride=2, output_size=x3.size()))
e2 = self.enc2(F.max_unpool2d(e3, m2, kernel_size=2, stride=2, output_size=x2.size()))
e1 = F.max_unpool2d(e2, m1, kernel_size=2, stride=2, output_size=x1.size())
return e1
e = upsample(d5)
return self.final(e)