ResNet的亮点:引入批标准化,不再是之前的普通标准化,加速训练。
ResNet的残差块还未把1*1卷积用来降维减少参数,到ResNeXt就把1*1用来升维和降维操作,称为PW卷积()
两种残差块,18层,34层使用第一种残差块,50层,101层,152层使用右边的残差块。
用到的迁移学习
参数表
model.py
import torch.nn as nn
import torch
class BasicBlock(nn.Module):
expansion = 1
#resnet18,resnet34的残差块结构定义
def __init__(self, in_channel, out_channel, stride=1, downsample=None, **kwargs):
super(BasicBlock, self).__init__()
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=out_channel,
kernel_size=3, stride=stride, padding=1, bias=False)
#resnet引入批标准化,加快训练速度
self.bn1 = nn.BatchNorm2d(out_channel)
self.relu = nn.ReLU()
self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel,
kernel_size=3, stride=1, padding=1, bias=False)
self.bn2 = nn.BatchNorm2d(out_channel)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
#残差连接,这里是直接相加,googlenet是堆叠不同尺度的输出层
out += identity
out = self.relu(out)
return out
class Bottleneck(nn.Module):
"""
resnet50,101,150残差结构定义
注意:原论文中,在虚线残差结构的主分支上,第一个1x1卷积层的步距是2,第二个3x3卷积层步距是1。
但在pytorch官方实现过程中是第一个1x1卷积层的步距是1,第二个3x3卷积层步距是2,
这么做的好处是能够在top1上提升大概0.5%的准确率。
可参考Resnet v1.5 https://ngc.nvidia.com/catalog/model-scripts/nvidia:resnet_50_v1_5_for_pytorch
"""
expansion = 4
def __init__(self, in_channel, out_channel, stride=1, downsample=None,
groups=1, width_per_group=64):
super(Bottleneck, self).__init__()
#
width = int(out_channel * (width_per_group / 64.)) * groups #resnet这里是64,
self.conv1 = nn.Conv2d(in_channels=in_channel, out_channels=width,
kernel_size=1, stride=1, bias=False) # squeeze channels
self.bn1 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv2 = nn.Conv2d(in_channels=width, out_channels=width, groups=groups,
kernel_size=3, stride=stride, bias=False, padding=1)
self.bn2 = nn.BatchNorm2d(width)
# -----------------------------------------
self.conv3 = nn.Conv2d(in_channels=width, out_channels=out_channel*self.expansion,
kernel_size=1, stride=1, bias=False) # unsqueeze channels
self.bn3 = nn.BatchNorm2d(out_channel*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
def forward(self, x):
identity = x
if self.downsample is not None:
identity = self.downsample(x)
out = self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out = self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,
block,
blocks_num,
num_classes=1000,
include_top=True,
groups=1,
width_per_group=64):
#groups是分组的组数,是实现resnext结构的参数,resnet设置为1即可
super(ResNet, self).__init__()
self.include_top = include_top
self.in_channel = 64
self.groups = groups
self.width_per_group = width_per_group #每组宽度,resnet直接默认64即可
self.conv1 = nn.Conv2d(3, self.in_channel, kernel_size=7, stride=2,
padding=3, bias=False) #out:112*112*64
self.bn1 = nn.BatchNorm2d(self.in_channel)
self.relu = nn.ReLU(inplace=True)
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) #out: 56*56*64
self.layer1 = self._make_layer(block, 64, blocks_num[0]) #out: 56*56*256
self.layer2 = self._make_layer(block, 128, blocks_num[1], stride=2)
self.layer3 = self._make_layer(block, 256, blocks_num[2], stride=2)
self.layer4 = self._make_layer(block, 512, blocks_num[3], stride=2)
if self.include_top:
self.avgpool = nn.AdaptiveAvgPool2d((1, 1)) # output size = (1, 1) (c,h,w)=(512*block.expansion,1,1)
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
def _make_layer(self, block, channel, block_num, stride=1):
#执行一次_make_layer代表创建一层,只有这一层的第一个块进行下采样操作,其他块不进行下采样操作
#一层一共有block_num个块,这个几个块的结构实在这个函数的循环里实现的
downsample = None
#残差连接提前提取出残差,也就是shortcut连接,之后用于传入block中,
if stride != 1 or self.in_channel != channel * block.expansion:
downsample = nn.Sequential(
nn.Conv2d(self.in_channel, channel * block.expansion, kernel_size=1, stride=stride, bias=False),
#input:64, 64*4,...
#out: 56*56*256 注意:这里生成的下采样
nn.BatchNorm2d(channel * block.expansion))
layers = []
layers.append(block(self.in_channel, #第一次in_channel是64
channel, # 第一次传入的channel是64,这个channel是传入的整个这一层的输出通道数
downsample=downsample, #shortcut连接部分
stride=stride,
groups=self.groups,
width_per_group=self.width_per_group))
#这个in_channel是下一次运行该层的输入通道数,256,输入256,再运行这一层第一个卷积输出64,
#这个in_channel不仅仅是这一层的第二个块的输入通道数
#同样是第二层的输入通道数,由于第一层的这个参数是256,这是个这个大类的变量,即使第二次调用这个构建层的函数in)_channel是保持不变的
#但是第二层由于in_channel=256,所以self.in_channel = channel * block.expansion,只能通过步长限制来对第二层第一个快进行
self.in_channel = channel * block.expansion
for _ in range(1, block_num):
layers.append(block(self.in_channel,
channel, #通用,这个channel是传入的整个这一层的输出通道数
groups=self.groups,
width_per_group=self.width_per_group))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x)
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x)
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = self.layer4(x)
# 这个全连接层通常用于将卷积层的输出特征映射转换为最终的预测或分类结果
if self.include_top:
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.fc(x)
return x
def resnet34(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet34-333f7ec4.pth
return ResNet(BasicBlock, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet50(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet50-19c8e357.pth
return ResNet(Bottleneck, [3, 4, 6, 3], num_classes=num_classes, include_top=include_top)
def resnet101(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnet101-5d3b4d8f.pth
return ResNet(Bottleneck, [3, 4, 23, 3], num_classes=num_classes, include_top=include_top)
def resnext50_32x4d(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnext50_32x4d-7cdf4587.pth
groups = 32
width_per_group = 4
return ResNet(Bottleneck, [3, 4, 6, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
def resnext101_32x8d(num_classes=1000, include_top=True):
# https://download.pytorch.org/models/resnext101_32x8d-8ba56ff5.pth
groups = 32
width_per_group = 8
return ResNet(Bottleneck, [3, 4, 23, 3],
num_classes=num_classes,
include_top=include_top,
groups=groups,
width_per_group=width_per_group)
train.py
resnet训练脚本中要注意的是官方权重下载和加载,修改,冻结部分的代码
# for param in net.parameters():
# param.requires_grad = False
开启上述代码的注释后,将冻结所有参数,只训练后续代码新添加的适应自定义数据集的全连接层。
import os
import sys
import json
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import transforms, datasets
from tqdm import tqdm
from model import resnet34
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),
"val": transforms.Compose([transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset)
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 16
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=batch_size, shuffle=False,
num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
net = resnet34()
# load pretrain weights
# download url: https://download.pytorch.org/models/resnet34-333f7ec4.pth
model_weight_path = "./resnet34-pre.pth"
assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
net.load_state_dict(torch.load(model_weight_path, map_location='cpu'))
# for param in net.parameters():
# param.requires_grad = False
# change fc layer structure
#修改模型全连接层
in_channel = net.fc.in_features
net.fc = nn.Linear(in_channel, 5)
net.to(device)
# define loss function
loss_function = nn.CrossEntropyLoss()
# construct an optimizer
params = [p for p in net.parameters() if p.requires_grad]
optimizer = optim.Adam(params, lr=0.0001)
epochs = 10
best_acc = 0.0
save_path = './resNet34.pth'
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
logits = net(images.to(device))
loss = loss_function(logits, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
# loss = loss_function(outputs, test_labels)
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_bar.desc = "valid epoch[{}/{}]".format(epoch + 1,
epochs)
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()
不冻结官方权重的训练结果,这个运行过程的结果找不到了,下面是使用这个训练出来的权重进行单张照片的预测结果。
class: daisy prob: 0.000361
class: dandelion prob: 2.27e-05
class: roses prob: 8.15e-06
class: sunflowers prob: 5.71e-05
class: tulips prob: 1.0
冻结的话训练还是很快的,以下是训练3轮的运行过程:
using cuda:0 device.
Using 8 dataloader workers every process
using 3306 images for training, 364 images for validation.
train epoch[1/10] loss:1.181: 100%|██████████| 207/207 [00:39<00:00, 5.26it/s]
valid epoch[1/10]: 100%|██████████| 23/23 [00:18<00:00, 1.25it/s]
[epoch 1] train_loss: 1.351 val_accuracy: 0.585
train epoch[2/10] loss:1.218: 100%|██████████| 207/207 [00:28<00:00, 7.37it/s]
valid epoch[2/10]: 100%|██████████| 23/23 [00:20<00:00, 1.12it/s]
[epoch 2] train_loss: 1.025 val_accuracy: 0.755
train epoch[3/10] loss:0.806: 100%|██████████| 207/207 [00:27<00:00, 7.66it/s]
valid epoch[3/10]: 100%|██████████| 23/23 [00:14<00:00, 1.60it/s]
[epoch 3] train_loss: 0.860 val_accuracy: 0.783
0%| | 0/207 [00:13, ?it/s]
以下是用冻结参数训练得到的权重的对单张图片的预测结果:
class: daisy prob: 0.0452
class: dandelion prob: 0.0243
class: roses prob: 0.034
class: sunflowers prob: 0.0241
class: tulips prob: 0.872
训练epoch比没冻结少7轮,也大概可以看出这个冻结训练权重的预测结果不如不冻结参数训练权重的预测结果。设备允许情况下针对不同数据集,尽量不要冻结参数,模型效果更好。
predict.py 单张图片的预测脚本
import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from model import resnet34
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
# load image
img_path = "./test.jpg"
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
# [N, C, H, W]
#数据预处理成模型能接受的输入
img = data_transform(img)
# expand batch dimension
#对单张图片进行维度适应变换,使其能适应批量训练的权重的参数结构
img = torch.unsqueeze(img, dim=0)
# read class_indict
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
# create model
model = resnet34(num_classes=5).to(device)
# load model weights
weights_path = "./resNet34-freeze-pre.pth"
assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
model.load_state_dict(torch.load(weights_path, map_location=device))
# prediction
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.title(print_res)
for i in range(len(predict)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
predict[i].numpy()))
plt.show()
if __name__ == '__main__':
main()
batch_predict.py
注意这段代码的成批打包图像传入模型中的代码实现
import os
import json
import torch
from PIL import Image
from torchvision import transforms
from model import resnet34
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize(256),
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])
# load image
# 指向需要遍历预测的图像文件夹
imgs_root = "./batch_predict_data/"
assert os.path.exists(imgs_root), f"file: '{imgs_root}' dose not exist."
# 读取指定文件夹下所有jpg图像路径
img_path_list = [os.path.join(imgs_root, i) for i in os.listdir(imgs_root) if i.endswith(".jpg")]
# read class_indict
json_path = './class_indices.json'
assert os.path.exists(json_path), f"file: '{json_path}' dose not exist."
json_file = open(json_path, "r")
class_indict = json.load(json_file)
# create model
model = resnet34(num_classes=5).to(device)
# load model weights
weights_path = "./resNet34.pth"
assert os.path.exists(weights_path), f"file: '{weights_path}' dose not exist."
model.load_state_dict(torch.load(weights_path, map_location=device))
# prediction
model.eval()
batch_size = 3 # 每次预测时将多少张图片打包成一个batch
with torch.no_grad():
#遍历所有批,分批预测
for ids in range(0, len(img_path_list) // batch_size):
img_list = []
#这段代码使用了一个 for 循环来遍历 img_path_list 中的图像路径。
# 循环的起始索引是 ids * batch_size,结束索引是 (ids + 1) * batch_size。
#在每次循环迭代中,img_path 会依次取得 img_path_list 中指定索引范围内的图像路径。
for img_path in img_path_list[ids * batch_size: (ids + 1) * batch_size]:
assert os.path.exists(img_path), f"file: '{img_path}' dose not exist."
img = Image.open(img_path)
img = data_transform(img)
img_list.append(img)
# batch img
# 将img_list列表中的所有图像打包成一个batch
batch_img = torch.stack(img_list, dim=0)
# predict class
output = model(batch_img.to(device)).cpu()
predict = torch.softmax(output, dim=1)
probs, classes = torch.max(predict, dim=1)
for idx, (pro, cla) in enumerate(zip(probs, classes)):
print("image: {} class: {} prob: {:.3}".format(img_path_list[ids * batch_size + idx],
class_indict[str(cla.numpy())],
pro.numpy()))
if __name__ == '__main__':
main()
预测结果:
image: ./batch_predict_data/chuju.jpg class: daisy prob: 1.0
image: ./batch_predict_data/meigui.jpg class: roses prob: 0.999
image: ./batch_predict_data/pugongying.jpg class: dandelion prob: 0.988
image: ./batch_predict_data/xiangrikui.jpg class: sunflowers prob: 0.993
image: ./batch_predict_data/xiangrikui2.jpg class: sunflowers prob: 1.0
image: ./batch_predict_data/yujinxiang.jpg class: tulips prob: 1.0