首先需要导入需要的python包
import math # 导入数学运算库
import numpy as np # 导入numpy库,用于数组操作
import torch.nn as nn # 导入PyTorch的nn模块,用于构建神经网络
from torchsummary import summary # 从torchsummary库中导入summary函数,用于打印模型摘要
import torchvision.models as models # 导入torchvision的models模块,用于获取预训练模型
接下来是ResNet网络的bottleneck结构,ResNet中的Bottleneck结构是一种常见的结构,它出现在ResNet block中¹。Bottleneck结构由三层卷积组成,分别是1x1,3x3和1x1卷积,其中1x1层负责减少和增加(恢复)维度,使3x3层成为具有较小输入/输出维度的瓶颈。
Bottleneck结构的优点在于它可以灵活地设计网络并减小计算量。它使用1x1卷积来方便地改变维度。
class Bottleneck(nn.Module):
# 定义类变量expansion,用于计算输出通道数
expansion = 4
def __init__(self, inplanes, planes, stride=1, downsample=None):
super(Bottleneck, self).__init__()
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, stride=stride, bias=False)
self.bn1 = nn.BatchNorm2d(planes)
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(planes)
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
self.bn3 = nn.BatchNorm2d(planes * 4)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self, x):
residual = x
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
residual = self.downsample(x)
out += residual
out = self.relu(out)
return out
下面是Resnet的函数主体
class ResNet(nn.Module):
def __init__(self, block, layers, num_classes=1000):
self.inplanes = 64 # 定义类变量inplanes,表示输入通道数
super(ResNet, self).__init__()
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
self.bn1 = nn.BatchNorm2d(64)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=0, ceil_mode=True)
self.layer1 = self._make_layer(block, 64, layers[0])
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
self.avgpool = nn.AvgPool2d(7)
self.fc = nn.Linear(512 * block.expansion*4, num_classes)
for m in self.modules(): # 遍历所有模块
if isinstance(m, nn.Conv2d): # 如果模块是卷积层
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels # 计算权重数量
m.weight.data.normal_(0, math.sqrt(2. / n)) # 使用正态分布初始化权重
elif isinstance(m, nn.BatchNorm2d): # 如果模块是批量归一化层
m.weight.data.fill_(1) # 将权重初始化为1
m.bias.data.zero_() # 将偏置初始化为0
def _make_layer(self, block, planes, blocks, stride=1): # 定义_make_layer函数,接受block类型、输出通道数、block数量和步长作为参数
downsample = None # 定义downsample变量,用于存储下采样函数
if stride != 1 or self.inplanes != planes * block.expansion: # 如果步长不为1或输入通道数不等于输出通道数
downsample = nn.Sequential( # 定义下采样函数,使用1x1卷积核,步长为stride
nn.Conv2d(self.inplanes, planes * block.expansion,kernel_size=1, stride=stride, bias=False),
nn.BatchNorm2d(planes * block.expansion), # 定义批量归一化层
)
layers = [] # 定义layers列表,用于存储卷积层组中的所有层
layers.append(block(self.inplanes, planes, stride, downsample)) # 向layers列表中添加第一个block
self.inplanes = planes * block.expansion # 更新输入通道数
for i in range(1, blocks): # 遍历剩余的block数量
layers.append(block(self.inplanes, planes)) # 向layers列表中添加block
return nn.Sequential(*layers) # 返回一个Sequential模块,包含所有层
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
x = self.avgpool(x)
x = x.view(x.size(0), -1)
x = self.fc(x)
return x
模型构建函数
arch_settings = {
'resnet50': (Bottleneck, (3, 4, 6, 3)),
'resnet101': (Bottleneck, (3, 4, 23, 3)),
'resnet152': (Bottleneck, (3, 8, 36, 3))
}
def resnet(depth,pretrained = False):
if depth not in arch_settings:
raise KeyError(f'invalid depth {depth} for resnet')
Bottleneck,stage_blocks = arch_settings[depth]
model = ResNet(Bottleneck, stage_blocks)
load_key, no_load_key, temp_dict = [], [], {}
if pretrained and depth=="resnet50":
resnext50 = models.resnet50(pretrained=True) #获取预训练权重
#后续是权重的导入
pretrained_dict = resnext50.state_dict()
model_dict = model.state_dict()
for k, v in pretrained_dict.items():
if k in model_dict.keys() and np.shape(model_dict[k]) == np.shape(v):
temp_dict[k] = v
load_key.append(k)
else:
no_load_key.append(k)
model_dict.update(temp_dict)
model.load_state_dict(model_dict)
elif pretrained and depth == "resnet101":
resnext101 = models.resnet101(pretrained=True)
pretrained_dict = resnext101.state_dict()
model_dict = model.state_dict()
for k, v in pretrained_dict.items():
if k in model_dict.keys() and np.shape(model_dict[k]) == np.shape(v):
temp_dict[k] = v
load_key.append(k)
else:
no_load_key.append(k)
model_dict.update(temp_dict)
model.load_state_dict(model_dict)
elif pretrained and depth == "resnet152":
resnext101 = models.resnet152(pretrained=True)
pretrained_dict = resnext101.state_dict()
model_dict = model.state_dict()
load_key, no_load_key, temp_dict = [], [], {}
for k, v in pretrained_dict.items():
if k in model_dict.keys() and np.shape(model_dict[k]) == np.shape(v):
temp_dict[k] = v
load_key.append(k)
else:
no_load_key.append(k)
model_dict.update(temp_dict)
model.load_state_dict(model_dict)
#此部分是查看权重导入情况
print("load over")
print("\nSuccessful Load Key:", str(load_key)[:500], "……\nSuccessful Load Key Num:", len(load_key))
print("\nFail To Load Key:", str(no_load_key)[:500], "……\nFail To Load Key num:", len(no_load_key))
return model
if __name__ == "__main__":
model = resnet("resnet101",pretrained=False)
model = model.cuda()