(1) 流程
goals :
读取datasets/train/cat,datasets/train/dog 文件里分类物体的类别及图片地址,
inputs :
datasets/train/cat,datasets/train/dog ,datasets/test/cat,datasets/test/dog
1. 分别遍历datasets里的train和test文件,获取cat和dog的文件名
2. 分别遍历train和test里的cat和dog文件,读取每个文件的图片名称,
注: pytorch有个特点,函数后跟两个括号,第一个括号里写参数,第二个括号输入变量。
(2) 代码
import os
classes = ['cat','dog']
sets = ['train','test']
def masklabels(classes,sets):
''' 读取文件下的图片信息,制作标签 '''
wd = os.getcwd()
for set in sets:
list_file = open('LS' + set + '.txt','w')
types_name = os.listdir('datasets/'+set) # types_name:['cat', '.DS_Store', 'dog']
for type_name in types_name:
if type_name not in classes:
cls_id = classes.index(type_name) # type_name='cat',cls_id =0;type_name='dog',cls_id = 1
photos_path = os.path.join('datasets',set,type_name) # eg:photos_path='datasets/train/cat'
photos_name = os.listdir(photos_path)
for photo_name in photos_name:
_,postfix = os.path.splitext(photo_name) # _,postfix = ('cat.6', '.jpg')
if postfix not in ['.jpg', '.png', '.jpeg']:
list_file.write(str(cls_id)+';' + '%s/%s'%(wd, os.path.join(photos_path,photo_name))+'\n') # 0;/Users/LS/cls_LS/datasets/train/cat/cat.6.jpg
if __name__ == '__main__':
(1) 流程
1. get_random_data()
(1) 对图像进行缩放并且进行长和宽的扭曲;
(2) 将图像多余的部分加上灰条。图像扭曲后,宽高发生变化,加上加上灰条,图片的宽高仍是(224, 224)。
(3) 图像翻转
(4) 图像旋转
(5) 色域扭曲
(2) 代码
import cv2
import numpy as np
from PIL import Image
from random import shuffle
import torch.utils.data as data
from utils.utils_ls import letterbox_image
def _preprocess_input(x):
# 图像数据归一化到0~1
x /= 127.5
x -= 1.
return x
def rand(a=0,b=1):
# 数据归一化到a~b
return np.random.rand()*(b-a) + a
def get_random_data(image,input_shape,jitter=.3, hue=.1, sat=1.5, val=1.5):
image = image.convert("RGB")
h, w = input_shape
# 1.1 对图像进行缩放并且进行长和宽的扭曲
new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
scale = rand(.75, 1.25)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw,nh), Image.BICUBIC)
# 1.2 将图像多余的部分加上灰条。图像扭曲后,宽高发生变化,加上加上灰条,图片的宽高仍是(224, 224)
dx = int(rand(0, w-nw))
dy = int(rand(0, h-nh))
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image = new_image
# 翻转图像
flip = rand()<.5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
# 旋转
rotate = rand()<.5
if rotate:
angle = np.random.randint(-15,15)
a,b = w/2,h/2
M = cv2.getRotationMatrix2D((a,b),angle,1) # 旋转矩阵
image=cv2.warpAffine(np.array(image),M,(w,h),borderValue=[128,128,128]) # 仿射变换
# 色域扭曲
# hue = rand(-hue, hue)
sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
val = rand(1, val) if rand()<.5 else 1/rand(1, val)
x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
# x[..., 0] *= hue
x[..., 1] *= sat
x[..., 2] *= val
x[x[:,:, 0]>360, 0] = 360
x[:, :, 1:][x[:, :, 1:]>1] = 1
x[x<0] = 0
image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255
return image_data
class DataGenerator(data.Dataset):
def __init__(self, input_shape, lines, random=True):
self.input_shape = input_shape
self.lines = lines
self.random = random
def __len__(self):
return len(self.lines)
def get_len(self):
return len(self.lines)
def __getitem__(self, index):
if index == 0:
annotation_path = self.lines[index].split(';')[1].split()[0] # '/Users/LS/cls_LS/datasets/train/cat/cat.6.jpg'
img = Image.open(annotation_path)
if self.random:
img = get_random_data(img, [self.input_shape[0],self.input_shape[1]])
img = letterbox_image(img, [self.input_shape[0],self.input_shape[1]])
img = np.array(img).astype(np.float32)
img = _preprocess_input(img)
img = np.transpose(img,[2,0,1]) # 转换通道数
y = int(self.lines[index].split(';')[0])
return img, y # img.shape, y ((3, 224, 224), 0)
def detection_collate(batch):
images = []
targets = []
for img, y in batch:
images = np.array(images)
targets = np.array(targets)
return images, targets
if __name__ == '__main__':
# from torch.utils.data import DataLoader
input_shape = [224,224,3]
with open(r"./cls_train.txt","r") as f:
lines = f.readlines()
num_val = int(len(lines)*0.1) # 6
num_train = len(lines) - num_val # 54
train_dataset = DataGenerator(input_shape,lines[:6])
images, targets = detection_collate(train_dataset)
print(images.shape, targets.shape)
(6, 3, 224, 224) (6,)
(1) 思路
''' model
1. VGG网络构架:
features(x) + avgpool(x) + flatten(x, 1)+ classifier(x)
2. 代码思路:
(1)features(x):features = make_layers(cfgs['D'])
[Conv2d(k=3,s=1) + (BN) + ReLU + MaxPool2d(k=2,s=2)] * 5
[b,3,224,224] -> [b,64,224,224]-> [b,64,112,112] -> [b,128,112,112] -> [b,128,56,56] -> [b,256,56,56]->
[b,256,28,28] -> [b,512,28,28] -> [b,512,14,14] -> [b,512,14,14] -> [b,512,7,7]
(2)avgpool(x): avgpool = AdaptiveAvgPool2d(7,7)
[b,7,7,512] -> [b,7,7,512]
(3)flatten(x, 1): [b,7,7,512] -> [b,25088]
(4)classifier(x): classifier = [Linear + ReLU + Dropout]*2 + Linear
[b,25088] -> [b,4096] -> [b,4096] -> [b,1000]
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(12): ReLU(inplace=True)
(13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(16): ReLU(inplace=True)
(17): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): ReLU(inplace=True)
(20): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(21): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(26): ReLU(inplace=True)
(27): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(28): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(29): ReLU(inplace=True)
(30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(31): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(32): ReLU(inplace=True)
(33): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(34): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(35): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(36): ReLU(inplace=True)
(37): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(38): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(39): ReLU(inplace=True)
(40): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(41): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(42): ReLU(inplace=True)
(43): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=4096, bias=True)
(1): ReLU(inplace=True)
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=4096, out_features=4096, bias=True)
(4): ReLU(inplace=True)
(5): Dropout(p=0.5, inplace=False)
(6): Linear(in_features=4096, out_features=10, bias=True)
(2) 代码
vgg16网络架构相对简单,参数量是真的大。在刚开始出现时是有价值的,用3x3的卷积核代替大的卷积核,两个3x3相当与一个5x5的卷积核的卷积效果,减少参数量,也变相加深网络深度。通过设置cgfs 的方式,让模型可以细化成不同的版本。通过vgg16 的学习,学习到网络架构、模型初始化参数、冻结参数、加载参数、改变分类数目等方法。
import torch
import torch.nn as nn
from torchvision.models.utils import load_state_dict_from_url
model_urls = {'vgg16':'https://download.pytorch.org/models/vgg16-397923af.pth'}
cfgs = {
'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
def make_layers(cfg,batch_norm=True):
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2,stride=2)]
conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding=1)
if batch_norm:
layers += [conv2d,nn.BatchNorm2d(v),nn.ReLU(True)]
layers += [conv2d,nn.ReLU(True)]
in_channels = v
return nn.Sequential(*layers)
class VGG(nn.Module):
def __init__(self,features,num_classes=1000,init_weights=True):
super(VGG, self).__init__()
self.features = features
self.avgpool = nn.AdaptiveAvgPool2d((7,7))
self.classifier = nn.Sequential(
if init_weights:
def forward(self,x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m,nn.Conv2d):
if m.bias is not None:
elif isinstance(m,nn.BatchNorm2d):
elif isinstance(m,nn.Linear):
def freeze_backbone(self):
for param in self.features.parameters():
param.requires_grad = False
def Unfreeze_backbone(self):
for param in self.features.parameters():
param.requires_grad = True
def vgg16(pretrained= False,progress=True,num_classes=1000):
model = VGG(make_layers(cfgs['D']))
if pretrained:
state_dict = load_state_dict_from_url(model_urls['vgg16'],
progress = progress)
if num_classes != 1000:
model.classifier = nn.Sequential(
return model
if __name__ == '__main__':
x = torch.rand([2,3,224,224])
model = vgg16(num_classes=10)
y = model(x)
(1) 思路
1. ResNet :
1.1 主要模块:ConvBlock + IdentityBlock
ConvBlock : x + [(cnv(1x1)+bn+relu) + (cnv(3x3)+bn+relu) +(cnv(1x1)+bn+relu) ]
IdentityBlock : downsample(x) + [(cnv(1x1)+bn+relu) + (cnv(3x3)+bn+relu) +(cnv(1x1)+bn+relu) ]
layer : ConvBlock + IdentityBlock * n
1.2 网络结构:
(cnv(1x1)+bn+relu+maxpool) + layer*4 + avgpool + fc
2. resnet50 :
2.1 流程:
model -> pretrained -> num_classes
2.2 网络结构:
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): Bottleneck(
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(2): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(layer2): Sequential(
(0): Bottleneck(
(conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(2): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(3): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(layer3): Sequential(
(0): Bottleneck(
(conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(2): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(3): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(4): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(5): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(layer4): Sequential(
(0): Bottleneck(
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(1): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(2): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=2048, out_features=2, bias=True)
(2) 代码
import torch
import torch.nn as nn
from torchvision.models.utils import load_state_dict_from_url
model_urls = {'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth'}
def conv3x3(in_planes,out_planes,stride=1,groups=1,dilation=1):
return nn.Conv2d(in_planes,out_planes,kernel_size=3,stride=stride,
def conv1x1(in_planes,out_planes,stride=1):
return nn.Conv2d(in_planes,out_planes,kernel_size=1,stride=stride,bias=False)
class Bottleneck(nn.Module):
expansion = 4
def __init__(self,inplanes,planes,stride=1,downsample=None,groups=1,
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes*(base_width/64.))*groups
self.conv1 = conv1x1(inplanes,width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width,width,stride,groups,dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width,planes*self.expansion)
self.bn3 = norm_layer(planes*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self,x):
identity = x
out =self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out =self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,block, layers, num_classes=1000, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.block = block
self.groups = groups
self.base_width = width_per_group
# [1, 3, 214, 214] --> [1, 64, 107, 107]
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
# [1, 64, 107, 107] --> [1, 64, 54, 54]
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# [1, 64, 54, 54] --> [1, 256, 54, 54]
self.layer1 = self._make_layer(block, 64, layers[0])
# [1, 256, 54, 54] --> [1, 512, 27, 27]
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
# [1, 512, 27, 27] --> [1, 1024, 14, 14]
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
# [1, 1024, 14, 14] --> [1, 2048, 7, 7]
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
# [1, 2048, 7, 7] --> [1, 2048, 1, 1]
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
# [1, 2048, 1, 1] --> flatten [1, 2048] --> [1, 10]
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
layers = []
# Conv_block
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
# identity_block
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x) # [1, 3, 214, 214] --> [1, 64, 107, 107]
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x) # [1, 64, 107, 107] --> [1, 64, 54, 54]
x = self.layer1(x) # [1, 64, 54, 54] --> [1, 256, 54, 54]
x = self.layer2(x) # [1, 256, 54, 54] --> [1, 512, 27, 27]
x = self.layer3(x) # [1, 512, 27, 27] --> [1, 1024, 14, 14]
x = self.layer4(x) # [1, 1024, 14, 14] --> [1, 2048, 7, 7]
x = self.avgpool(x) # [1, 2048, 7, 7] --> [1, 2048, 1, 1]
x = torch.flatten(x, 1) # [1, 2048, 1, 1] --> [1, 2048]
x = self.fc(x) # [1, 2048] --> [1, 10]
return x
def freeze_backbone(self):
backbone = [self.conv1, self.bn1, self.layer1, self.layer2, self.layer3, self.layer4]
for module in backbone:
for param in module.parameters():
param.requires_grad = False
def Unfreeze_backbone(self):
backbone = [self.conv1, self.bn1, self.layer1, self.layer2, self.layer3, self.layer4]
for module in backbone:
for param in module.parameters():
param.requires_grad = True
def resnet50(pretrained=False, progress=False, num_classes=1000):
model = ResNet(Bottleneck, [3, 4, 6, 3])
if pretrained:
state_dict = load_state_dict_from_url(model_urls['resnet50'], model_dir='./model_data',
if num_classes != 1000:
model.fc = nn.Linear(512 * model.block.expansion, num_classes)
return model
if __name__ == '__main__':
x = torch.rand([1,3,214,214])
model = resnet50(num_classes=10)
y = model(x)
torch.Size([2, 10])
Process finished with exit code 0
(1) 思路
1. MobileNetV2:
1.1 主要结构
InvertedResidual : (Conv(3x3)BNReLU --> Conv(1x1)BNReLU)
(Conv(3x3)BNReLU --> Conv(1x1)BNReLU + x)
(Conv(1x1)BNReLU --> Conv(3x3)BNReLU --> Conv(1x1)BNReLU )
(Conv(1x1)BNReLU --> Conv(3x3)BNReLU --> Conv(1x1)BNReLU + x)
1.2 网络构架
net : features(x) + x.mean + classifier(x)
features(x) : ConvBNReLU + InvertedResidual*7 + ConvBNReLU
x.mean : x.mean([2,3])
classifier(x) : Dropout + Linear
2. mobilenet_v2
2.1 流程
(features): Sequential(
(0): ConvBNReLU(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
(2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96, bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(144, 144, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=144, bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(4): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=144, bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(6): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(7): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=192, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(8): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(9): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(10): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(11): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(12): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=576, bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(13): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=576, bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(14): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(576, 576, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=576, bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(576, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(16): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(17): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(1): ConvBNReLU(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(2): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(18): ConvBNReLU(
(0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1280, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
(classifier): Sequential(
(0): Dropout(p=0.2, inplace=False)
(1): Linear(in_features=1280, out_features=10, bias=True)
Process finished with exit code 0
(2) 代码
import torch
from torch import nn
from torchvision.models.utils import load_state_dict_from_url
__all__ = ['MobileNetV2', 'mobilenet_v2']
model_urls = {
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
def _make_divisible(v, divisor, min_value=None):
''' 调整通道数,使其是 divisor 的整数倍 '''
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
padding = (kernel_size - 1) // 2
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = 32
last_channel = 1280
if inverted_residual_setting is None:
inverted_residual_setting = [
# t, c, n, s
# 112, 112, 32 -> 112, 112, 16
[1, 16, 1, 1],
# 112, 112, 16 -> 56, 56, 24
[6, 24, 2, 2],
# 56, 56, 24 -> 28, 28, 32
[6, 32, 3, 2],
# 28, 28, 32 -> 14, 14, 64
[6, 64, 4, 2],
# 14, 14, 64 -> 14, 14, 96
[6, 96, 3, 1],
# 14, 14, 96 -> 7, 7, 160
[6, 160, 3, 2],
# 7, 7, 160 -> 7, 7, 320
[6, 320, 1, 1],
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
raise ValueError("inverted_residual_setting should be non-empty "
"or a 4-element list, got {}".format(inverted_residual_setting))
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
# 224, 224, 3 -> 112, 112, 32
features = [ConvBNReLU(3, input_channel, stride=2)]
for t, c, n, s in inverted_residual_setting:
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# 7, 7, 320 -> 7,7,1280
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
self.features = nn.Sequential(*features)
self.classifier = nn.Sequential(
nn.Linear(self.last_channel, num_classes),
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
elif isinstance(m, nn.BatchNorm2d):
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
def forward(self, x):
x = self.features(x) # [2, 3, 224, 224] --> [2, 1280, 7, 7]
x = x.mean([2, 3]) # [2, 1280, 7, 7] --> [2, 1280]
x = self.classifier(x) # [2, 1280] --> [2, 10]
return x
def freeze_backbone(self):
for param in self.features.parameters():
param.requires_grad = False
def Unfreeze_backbone(self):
for param in self.features.parameters():
param.requires_grad = True
def mobilenet_v2(pretrained=False, progress=True, num_classes=1000):
model = MobileNetV2()
if pretrained:
state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], model_dir='./model_data',
if num_classes!=1000:
model.classifier = nn.Sequential(
nn.Linear(model.last_channel, num_classes),
return model
if __name__ == '__main__':
x = torch.rand([2,3,224,224])
model = mobilenet_v2(num_classes=10)
y = model(x)
loss = (-y_true*log(y_pred)).mean()
loss = (-x[class]+log(exp(x).sum())).mean()
if __name__ == '__main__':
import torch
import torch.nn as nn
outputs = torch.tensor([[3.9383, 0.0983],
[0.0465, 5.9902]])
targets = torch.Tensor([0,1]).long()
# method_1
loss = nn.CrossEntropyLoss()(outputs, targets)
print(loss) # tensor(0.0119)
# method_2
print(nn.NLLLoss()(nn.LogSoftmax(dim=1)(outputs), targets))
# method_3
y = torch.zeros_like(outputs)
for i,j in enumerate(targets):
y[i,j]=1 # one_hot
# method_4
1. 设置参数
2. 加载模型
2.1 通过网页下载参数
2.2 上一步不成功,参数初始化。
2.3 迁移学习。a. 加载训练好的参数,取出未训练模型参数。b.取出模型参数和预训练模型参数shape相同的参数。c.把上一步取出的参数加载到未训练的模型上。
3. 读取数据及数据预处理
4. 设置优化器和学习率
5. 分批次训练数据
(1) 加载模型代码
assert backbone in ["mobilenet", "resnet50", "vgg16"]
# 1. pretrained = True ,则通过网页下载参数
model = get_model_from_name[backbone](num_classes=num_classes,pretrained=pretrained)
# 2. pretrained = False
if not pretrained:
# 3. 迁移学习
model_path = 'model_data/mobilenet_catvsdog.pth'
print('Loading weights into state dict...')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pretrained_dict = torch.load(model_path, map_location=device)
model_dict = model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
(2) 训练代码
import torch
import numpy as np
from torch import nn
from tqdm import tqdm
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader
from nets.mobilenet_ls import mobilenet_v2
from nets.resnet50_ls import resnet50
from nets.vgg16_ls import vgg16
from utils.utils_ls import weights_init
from utils.dataloader_ls import DataGenerator, detection_collate
get_model_from_name = {
'resnet50' :resnet50,
'vgg16' : vgg16}
freeze_layers = {
'mobilenet': 81,
'resnet50' : 173,
'vgg16' : 19}
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def get_classes(classes_path):
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def fit_one_epoch(net, epoch, epoch_size, epoch_size_val, gen, genval, Epoch, cuda):
total_loss = 0
total_accuracy = 0
val_total_loss = 0
with tqdm(total = epoch_size,desc=f'Epoch{epoch+1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
for iteration,batch in enumerate(gen):
if iteration >= epoch_size:
images, targets = batch
with torch.no_grad():
images = torch.from_numpy(images).type(torch.FloatTensor)
targets = torch.from_numpy(targets).type(torch.FloatTensor).long()
if cuda:
images = images.cuda()
targets = targets.cuda()
outputs = net(images)
loss = nn.CrossEntropyLoss()(outputs, targets)
total_loss += loss.item()
with torch.no_grad():
accuracy = torch.mean((torch.argmax(F.softmax(outputs, dim=-1), dim=-1) == targets).type(torch.FloatTensor))
total_accuracy += accuracy.item()
pbar.set_postfix(**{'total_loss': total_loss / (iteration + 1),
'accuracy' : total_accuracy / (iteration + 1),
'lr' : get_lr(optimizer)})
print('Start Validation')
with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
for iteration, batch in enumerate(genval):
if iteration >= epoch_size_val:
images, targets = batch
with torch.no_grad():
images = torch.from_numpy(images).type(torch.FloatTensor)
targets = torch.from_numpy(targets).type(torch.FloatTensor).long()
if cuda:
images = images.cuda()
targets = targets.cuda()
outputs = net(images)
val_loss = nn.CrossEntropyLoss()(outputs, targets)
val_total_loss += val_loss.item()
pbar.set_postfix(**{'total_loss': val_total_loss / (iteration + 1),
'lr' : get_lr(optimizer)})
print('Finish Validation')
print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_total_loss/(epoch_size_val+1)))
print('Saving state, iter:', str(epoch+1))
torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch+1),total_loss/(epoch_size+1),val_total_loss/(epoch_size_val+1)))
if __name__ == '__main__':
log_dir = './logs/'
backbone = 'mobilenet'
input_shape = [224,224,3]
Cuda = False
pretrained = False
classes_path = './model_data/cls_classes_ls.txt'
class_names = get_classes(classes_path) # ['cat', 'dog']
num_classes = len(class_names)
assert backbone in ["mobilenet", "resnet50", "vgg16"]
model = get_model_from_name[backbone](num_classes=num_classes,pretrained=pretrained)
if not pretrained:
# # 加快模型训练的效率
# model_path = "model_data/Omniglot_vgg.pth" # 'model_data/mobilenet_catvsdog.pth'
# print('Loading weights into state dict...')
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# pretrained_dict = torch.load(model_path, map_location=device)
# model_dict = model.state_dict()
# pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
# model_dict.update(pretrained_dict)
# model.load_state_dict(model_dict)
with open(r"./cls_train.txt","r") as f:
lines = f.readlines()
num_val = int(len(lines)*0.1)
num_train = len(lines) - num_val
net = model.train()
if Cuda:
net = torch.nn.DataParallel(model)
cudnn.benchmark = True
net = net.cuda()
# 主干特征提取网络特征通用,冻结训练可以加快训练速度
# 也可以在训练初期防止权值被破坏。
# Init_Epoch为起始世代
# Freeze_Epoch为冻结训练的世代
# Epoch总训练世代
# 提示OOM或者显存不足请调小Batch_size
if True:
# BATCH_SIZE不要太小,不然训练效果很差
lr = 1e-3
Batch_size = 32 # 128
Init_Epoch = 0 # 0
Freeze_Epoch = 50 # 50
optimizer = optim.Adam(net.parameters(),lr,weight_decay=5e-4)
lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
train_dataset = DataGenerator(input_shape,lines[:num_train])
val_dataset = DataGenerator(input_shape,lines[num_train:], False)
gen = DataLoader(train_dataset, batch_size=Batch_size, num_workers=4, pin_memory=True,
drop_last=True, collate_fn=detection_collate)
gen_val = DataLoader(val_dataset, batch_size=Batch_size, num_workers=4, pin_memory=True,
drop_last=True, collate_fn=detection_collate)
epoch_size = train_dataset.get_len()//Batch_size
epoch_size_val = val_dataset.get_len()//Batch_size
if epoch_size == 0 or epoch_size_val == 0:
raise ValueError("数据集过小,无法进行训练,请扩充数据集。")
# 冻结一定部分训练
for epoch in range(Init_Epoch,Freeze_Epoch):
if True:
# BATCH_SIZE不要太小,不然训练效果很差
lr = 1e-4
Batch_size = 32 # 128
Freeze_Epoch = 50 # 50
Epoch = 100 # 100
optimizer = optim.Adam(net.parameters(),lr,weight_decay=5e-4)
lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
train_dataset = DataGenerator(input_shape,lines[:num_train])
val_dataset = DataGenerator(input_shape,lines[num_train:], False)
gen = DataLoader(train_dataset, batch_size=Batch_size, num_workers=2, pin_memory=True,
drop_last=True, collate_fn=detection_collate)
gen_val = DataLoader(val_dataset, batch_size=Batch_size, num_workers=2, pin_memory=True,
drop_last=True, collate_fn=detection_collate)
epoch_size = train_dataset.get_len()//Batch_size
epoch_size_val = val_dataset.get_len()//Batch_size
if epoch_size == 0 or epoch_size_val == 0:
raise ValueError("数据集过小,无法进行训练,请扩充数据集。")
# 解冻后训练
for epoch in range(Freeze_Epoch,Epoch):
1. 分类实例化
2. 打开图片
3. 图片识别
from PIL import Image
from classification_ls import Classification
classification = Classification()
while True :
img = input('Input image filename')
image = Image.open(img)
print('Open Error! Try again!')
class_name = classification.detect_image(image)
1. 加载图片、加灰条、归一化
2. 加载模型,预测
3. 显示预测结果
import os, copy, torch
import numpy as np
from torch import nn
import matplotlib.pyplot as plt
from torch.autograd import Variable
from nets.vgg16_ls import vgg16
from nets.resnet50_ls import resnet50
from nets.mobilenet_ls import mobilenet_v2
from utils.utils_ls import letterbox_image
get_model_from_name = {"vgg16":vgg16,
def _preprocess_input(x):
x /= 127.5
x -= 1.
return x
class Classification(object):
_defaults = {
"cuda" : False,
"backbone" : 'mobilenet',
"input_shape" : [224,224,3],
"classes_path" : 'model_data/cls_classes.txt',
"model_path" : 'model_data/mobilenet_catvsdog.pth',
def get_defaults(cls,n):
if n in cls._defaults:
return cls._defaults[n]
return "Unrecognized attribute name '" + n + "'"
# 初始化classification
def __init__(self,**kwargs):
self.class_names = self._get_class()
# 获得所有的分类名称
def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
# 加载模型
def generate(self):
model_path = os.path.expanduser(self.model_path)
self.num_classes = len(self.class_names)
assert self.backbone in ["mobilenet", "resnet50", "vgg16"]
self.model = get_model_from_name[self.backbone](num_classes=self.num_classes, pretrained=False)
self.model = self.model.eval() ####################################
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
state_dict = torch.load(self.model_path,map_location=device)
if self.cuda:
self.model = nn.DataParallel(self.model)
self.model = self.model.cuda()
print('{} model, and classes loaded.'.format(model_path))
# 检测图片
def detect_image(self,image):
old_image = copy.deepcopy(image)
crop_img = letterbox_image(image, [self.input_shape[0],self.input_shape[1]])
photo = np.array(crop_img,dtype=np.float32)
photo = np.reshape(_preprocess_input(photo),[1,self.input_shape[0],self.input_shape[1],self.input_shape[2]])
photo = np.transpose(photo,(0,3,1,2))
with torch.no_grad():
photo = Variable(torch.from_numpy(photo).type(torch.FloatTensor))
if self.cuda:
photo = photo.cuda()
preds = torch.softmax(self.model(photo)[0],dim=-1).cpu().numpy()
class_name = self.class_names[np.argmax(preds)]
probability = np.max(preds)
plt.title('Class:%s Probability:%.3f' %(class_name, probability))
return class_name
if __name__ == '__main__':
from PIL import Image
img = Image.open('img/cat.jpg')
clas = Classification()
class_name = clas.detect_image(img)
(1) 训练流程
1. 导入图片流,得到预测结果。
2. 根据预测值和真实值,计算正确预测的样本数。
3. Top1 = 正确预测的样本数/总样本
(2) 代码
''' 评价'''
import numpy as np
import torch
from PIL import Image
from torch.autograd import Variable
from classification_ls import Classification, _preprocess_input
from utils.utils_ls import letterbox_image
class top1_Classification(Classification):
def detect_image(self, image):
crop_img = letterbox_image(image, [self.input_shape[0],self.input_shape[1]])
photo = np.array(crop_img,dtype = np.float32)
photo = np.reshape(_preprocess_input(photo),[1,self.input_shape[0],self.input_shape[1],self.input_shape[2]])
photo = np.transpose(photo,(0,3,1,2))
with torch.no_grad():
photo = Variable(torch.from_numpy(photo).type(torch.FloatTensor))
if self.cuda:
photo = photo.cuda()
preds = torch.softmax(self.model(photo)[0], dim=-1).cpu().numpy()
arg_pred = np.argmax(preds)
return arg_pred
def evaluteTop1(classfication, lines):
correct = 0
total = len(lines)
for index, line in enumerate(lines):
annotation_path = line.split(';')[1].split()[0]
x = Image.open(annotation_path)
y = int(line.split(';')[0])
pred = classfication.detect_image(x)
correct += pred == y
if index % 100 == 0:
return correct / total
if __name__ == '__main__':
classfication = top1_Classification()
with open(r"./cls_test.txt","r") as f:
lines = f.readlines()
top1 = evaluteTop1(classfication, lines)
print("top-1 accuracy = %.2f%%" % (top1*100))
model_data/mobilenet_catvsdog.pth model, and classes loaded.
top-1 accuracy = 100.00%
Process finished with exit code 0
(1) 训练流程
1. 导入图片流,得到预测结果。按照概率对预测结果从大到小排列,取出前5个预测结果。
2. 如果前5个预测结果有预测正确的,作为预测正确,记录正确预测的样本数。
3. Top5 = 正确预测的样本数/总样本
(2) 代码
import numpy as np
import torch
from PIL import Image
from torch.autograd import Variable
from classification_ls import Classification, _preprocess_input
from utils.utils_ls import letterbox_image
class top5_Classification(Classification):
def detect_image(self, image):
crop_img = letterbox_image(image, [self.input_shape[0],self.input_shape[1]])
photo = np.array(crop_img,dtype = np.float32)
# 图片预处理,归一化
photo = np.reshape(_preprocess_input(photo),[1,self.input_shape[0],self.input_shape[1],self.input_shape[2]])
photo = np.transpose(photo,(0,3,1,2))
with torch.no_grad():
photo = Variable(torch.from_numpy(photo).type(torch.FloatTensor))
if self.cuda:
photo = photo.cuda()
preds = torch.softmax(self.model(photo)[0], dim=-1).cpu().numpy()
arg_pred = np.argsort(preds)[::-1]
arg_pred_top5 = arg_pred[:5]
# print(111)
return arg_pred_top5
def evaluteTop5(classfication, lines):
correct = 0
total = len(lines)
for index, line in enumerate(lines):
annotation_path = line.split(';')[1].split()[0]
x = Image.open(annotation_path)
y = int(line.split(';')[0])
pred = classfication.detect_image(x)
correct += y in pred
if index % 100 == 0:
return correct / total
if __name__ == '__main__':
classfication = top5_Classification()
with open(r"./cls_test.txt","r") as f:
lines = f.readlines()
top5 = evaluteTop5(classfication, lines)
print("top-5 accuracy = %.2f%%" % (top5*100))
model_data/mobilenet_catvsdog.pth model, and classes loaded.
top-5 accuracy = 100.00%
Process finished with exit code 0