(1) 流程
文件夹:
datasets
train
cat
cat0.jpg
cat1.jpg
......
dog
dog0.jpg
dog1.jpg
......
test
cat
cat100.jpg
cat345.jpg
......
dog
dog198.jpg
dog209.jpg
......
'''
No1.
goals :
读取datasets/train/cat,datasets/train/dog 文件里分类物体的类别及图片地址,
并放在cls_train.txt中,对待test数据亦是如此。
inputs :
datasets/train/cat,datasets/train/dog ,datasets/test/cat,datasets/test/dog
outputs:
cls_train.txt,cls_test.txt.每个.txt文件里存放的是所有物体类别和图片地址。
eg:0;/Users/LS/cls_LS/datasets/test/cat/14.jpg
processes:
1. 分别遍历datasets里的train和test文件,获取cat和dog的文件名
2. 分别遍历train和test里的cat和dog文件,读取每个文件的图片名称,
如果是cat文件,cls_id为0,写入物体类别和图片名称。
注: pytorch有个特点,函数后跟两个括号,第一个括号里写参数,第二个括号输入变量。
'''
(2) 代码
import os
classes = ['cat','dog']
sets = ['train','test']
def masklabels(classes,sets):
''' 读取文件下的图片信息,制作标签 '''
wd = os.getcwd()
for set in sets:
list_file = open('LS' + set + '.txt','w')
types_name = os.listdir('datasets/'+set) # types_name:['cat', '.DS_Store', 'dog']
for type_name in types_name:
if type_name not in classes:
continue
cls_id = classes.index(type_name) # type_name='cat',cls_id =0;type_name='dog',cls_id = 1
photos_path = os.path.join('datasets',set,type_name) # eg:photos_path='datasets/train/cat'
photos_name = os.listdir(photos_path)
for photo_name in photos_name:
_,postfix = os.path.splitext(photo_name) # _,postfix = ('cat.6', '.jpg')
if postfix not in ['.jpg', '.png', '.jpeg']:
continue
list_file.write(str(cls_id)+';' + '%s/%s'%(wd, os.path.join(photos_path,photo_name))+'\n') # 0;/Users/LS/cls_LS/datasets/train/cat/cat.6.jpg
list_file.close()
if __name__ == '__main__':
masklabels(classes,sets)
(1) 流程
'''
1. get_random_data()
(1) 对图像进行缩放并且进行长和宽的扭曲;
(2) 将图像多余的部分加上灰条。图像扭曲后,宽高发生变化,加上加上灰条,图片的宽高仍是(224, 224)。
(3) 图像翻转
(4) 图像旋转
(5) 色域扭曲
'''
(2) 代码
import cv2
import numpy as np
from PIL import Image
from random import shuffle
import torch.utils.data as data
from utils.utils_ls import letterbox_image
def _preprocess_input(x):
# 图像数据归一化到0~1
x /= 127.5
x -= 1.
return x
def rand(a=0,b=1):
# 数据归一化到a~b
return np.random.rand()*(b-a) + a
def get_random_data(image,input_shape,jitter=.3, hue=.1, sat=1.5, val=1.5):
image = image.convert("RGB")
h, w = input_shape
# 1.1 对图像进行缩放并且进行长和宽的扭曲
new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
scale = rand(.75, 1.25)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
else:
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw,nh), Image.BICUBIC)
# 1.2 将图像多余的部分加上灰条。图像扭曲后,宽高发生变化,加上加上灰条,图片的宽高仍是(224, 224)
dx = int(rand(0, w-nw))
dy = int(rand(0, h-nh))
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image = new_image
# 翻转图像
flip = rand()<.5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
# 旋转
rotate = rand()<.5
if rotate:
angle = np.random.randint(-15,15)
a,b = w/2,h/2
M = cv2.getRotationMatrix2D((a,b),angle,1) # 旋转矩阵
image=cv2.warpAffine(np.array(image),M,(w,h),borderValue=[128,128,128]) # 仿射变换
# 色域扭曲
# hue = rand(-hue, hue)
sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
val = rand(1, val) if rand()<.5 else 1/rand(1, val)
x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
# x[..., 0] *= hue
x[..., 1] *= sat
x[..., 2] *= val
x[x[:,:, 0]>360, 0] = 360
x[:, :, 1:][x[:, :, 1:]>1] = 1
x[x<0] = 0
image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255
return image_data
class DataGenerator(data.Dataset):
def __init__(self, input_shape, lines, random=True):
self.input_shape = input_shape
self.lines = lines
self.random = random
def __len__(self):
return len(self.lines)
def get_len(self):
return len(self.lines)
def __getitem__(self, index):
if index == 0:
shuffle(self.lines)
annotation_path = self.lines[index].split(';')[1].split()[0] # '/Users/LS/cls_LS/datasets/train/cat/cat.6.jpg'
img = Image.open(annotation_path)
if self.random:
img = get_random_data(img, [self.input_shape[0],self.input_shape[1]])
else:
img = letterbox_image(img, [self.input_shape[0],self.input_shape[1]])
img = np.array(img).astype(np.float32)
img = _preprocess_input(img)
img = np.transpose(img,[2,0,1]) # 转换通道数
y = int(self.lines[index].split(';')[0])
return img, y # img.shape, y ((3, 224, 224), 0)
def detection_collate(batch):
images = []
targets = []
for img, y in batch:
images.append(img)
targets.append(y)
images = np.array(images)
targets = np.array(targets)
return images, targets
if __name__ == '__main__':
# from torch.utils.data import DataLoader
input_shape = [224,224,3]
with open(r"./cls_train.txt","r") as f:
lines = f.readlines()
num_val = int(len(lines)*0.1) # 6
num_train = len(lines) - num_val # 54
train_dataset = DataGenerator(input_shape,lines[:6])
images, targets = detection_collate(train_dataset)
print(images.shape, targets.shape)
'''
(6, 3, 224, 224) (6,)
'''
(1) 思路
''' model
1. VGG网络构架:
features(x) + avgpool(x) + flatten(x, 1)+ classifier(x)
2. 代码思路:
(1)features(x):features = make_layers(cfgs['D'])
[Conv2d(k=3,s=1) + (BN) + ReLU + MaxPool2d(k=2,s=2)] * 5
[b,3,224,224] -> [b,64,224,224]-> [b,64,112,112] -> [b,128,112,112] -> [b,128,56,56] -> [b,256,56,56]->
[b,256,28,28] -> [b,512,28,28] -> [b,512,14,14] -> [b,512,14,14] -> [b,512,7,7]
(2)avgpool(x): avgpool = AdaptiveAvgPool2d(7,7)
[b,7,7,512] -> [b,7,7,512]
(3)flatten(x, 1): [b,7,7,512] -> [b,25088]
(4)classifier(x): classifier = [Linear + ReLU + Dropout]*2 + Linear
[b,25088] -> [b,4096] -> [b,4096] -> [b,1000]
(5)_initialize_weights
3.vgg16
下载模型参数后,又重新定义分类层,实现迁移学习,
可以在原有模型参数基础上,训练自有的数据。
VGG(
(features): Sequential(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU(inplace=True)
(3): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(5): ReLU(inplace=True)
(6): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(7): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(9): ReLU(inplace=True)
(10): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(11): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(12): ReLU(inplace=True)
(13): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(14): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(15): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(16): ReLU(inplace=True)
(17): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(18): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(19): ReLU(inplace=True)
(20): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(21): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(22): ReLU(inplace=True)
(23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(24): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(25): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(26): ReLU(inplace=True)
(27): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(28): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(29): ReLU(inplace=True)
(30): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(31): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(32): ReLU(inplace=True)
(33): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(34): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(35): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(36): ReLU(inplace=True)
(37): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(38): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(39): ReLU(inplace=True)
(40): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(41): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(42): ReLU(inplace=True)
(43): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=4096, bias=True)
(1): ReLU(inplace=True)
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=4096, out_features=4096, bias=True)
(4): ReLU(inplace=True)
(5): Dropout(p=0.5, inplace=False)
(6): Linear(in_features=4096, out_features=10, bias=True)
)
)
'''
(2) 代码
vgg16网络架构相对简单,参数量是真的大。在刚开始出现时是有价值的,用3x3的卷积核代替大的卷积核,两个3x3相当与一个5x5的卷积核的卷积效果,减少参数量,也变相加深网络深度。通过设置cgfs 的方式,让模型可以细化成不同的版本。通过vgg16 的学习,学习到网络架构、模型初始化参数、冻结参数、加载参数、改变分类数目等方法。
import torch
import torch.nn as nn
from torchvision.models.utils import load_state_dict_from_url
model_urls = {'vgg16':'https://download.pytorch.org/models/vgg16-397923af.pth'}
cfgs = {
'D': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
}
def make_layers(cfg,batch_norm=True):
layers = []
in_channels = 3
for v in cfg:
if v == 'M':
layers += [nn.MaxPool2d(kernel_size=2,stride=2)]
else:
conv2d = nn.Conv2d(in_channels,v,kernel_size=3,padding=1)
if batch_norm:
layers += [conv2d,nn.BatchNorm2d(v),nn.ReLU(True)]
else:
layers += [conv2d,nn.ReLU(True)]
in_channels = v
return nn.Sequential(*layers)
class VGG(nn.Module):
def __init__(self,features,num_classes=1000,init_weights=True):
super(VGG, self).__init__()
self.features = features
self.avgpool = nn.AdaptiveAvgPool2d((7,7))
self.classifier = nn.Sequential(
nn.Linear(512*7*7,4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096,4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096,num_classes))
if init_weights:
self._initialize_weights()
def forward(self,x):
x = self.features(x)
x = self.avgpool(x)
x = torch.flatten(x, 1)
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m,nn.Conv2d):
nn.init.kaiming_normal_(m.weight,mode='fan_out',nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias,0)
elif isinstance(m,nn.BatchNorm2d):
nn.init.constant_(m.weight,1)
nn.init.constant_(m.bias,0)
elif isinstance(m,nn.Linear):
nn.init.normal_(m.weight,0,0.01)
nn.init.constant_(m.bias,0)
def freeze_backbone(self):
for param in self.features.parameters():
param.requires_grad = False
def Unfreeze_backbone(self):
for param in self.features.parameters():
param.requires_grad = True
def vgg16(pretrained= False,progress=True,num_classes=1000):
model = VGG(make_layers(cfgs['D']))
if pretrained:
state_dict = load_state_dict_from_url(model_urls['vgg16'],
model_dir='./model_data',
progress = progress)
model.load_state_dict(state_dict,strict=False)
if num_classes != 1000:
model.classifier = nn.Sequential(
nn.Linear(512*7*7,4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096,4096),
nn.ReLU(True),
nn.Dropout(),
nn.Linear(4096,num_classes))
return model
if __name__ == '__main__':
x = torch.rand([2,3,224,224])
model = vgg16(num_classes=10)
y = model(x)
print(y.shape)
(1) 思路
'''
1. ResNet :
1.1 主要模块:ConvBlock + IdentityBlock
ConvBlock : x + [(cnv(1x1)+bn+relu) + (cnv(3x3)+bn+relu) +(cnv(1x1)+bn+relu) ]
IdentityBlock : downsample(x) + [(cnv(1x1)+bn+relu) + (cnv(3x3)+bn+relu) +(cnv(1x1)+bn+relu) ]
layer : ConvBlock + IdentityBlock * n
1.2 网络结构:
(cnv(1x1)+bn+relu+maxpool) + layer*4 + avgpool + fc
2. resnet50 :
2.1 流程:
model -> pretrained -> num_classes
2.2 网络结构:
ResNet(
(conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
(layer1): Sequential(
(0): Bottleneck(
(conv1): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(2): Bottleneck(
(conv1): Conv2d(256, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(64, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(layer2): Sequential(
(0): Bottleneck(
(conv1): Conv2d(256, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(2): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(3): Bottleneck(
(conv1): Conv2d(512, 128, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(128, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(layer3): Sequential(
(0): Bottleneck(
(conv1): Conv2d(512, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(512, 1024, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(2): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(3): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(4): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(5): Bottleneck(
(conv1): Conv2d(1024, 256, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(256, 1024, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(1024, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(layer4): Sequential(
(0): Bottleneck(
(conv1): Conv2d(1024, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
(downsample): Sequential(
(0): Conv2d(1024, 2048, kernel_size=(1, 1), stride=(2, 2), bias=False)
(1): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(1): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
(2): Bottleneck(
(conv1): Conv2d(2048, 512, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn1): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv2): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
(bn2): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(conv3): Conv2d(512, 2048, kernel_size=(1, 1), stride=(1, 1), bias=False)
(bn3): BatchNorm2d(2048, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(relu): ReLU(inplace=True)
)
)
(avgpool): AdaptiveAvgPool2d(output_size=(1, 1))
(fc): Linear(in_features=2048, out_features=2, bias=True)
)
'''
(2) 代码
import torch
import torch.nn as nn
from torchvision.models.utils import load_state_dict_from_url
model_urls = {'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth'}
def conv3x3(in_planes,out_planes,stride=1,groups=1,dilation=1):
return nn.Conv2d(in_planes,out_planes,kernel_size=3,stride=stride,
padding=dilation,groups=groups,bias=False,dilation=dilation)
def conv1x1(in_planes,out_planes,stride=1):
return nn.Conv2d(in_planes,out_planes,kernel_size=1,stride=stride,bias=False)
class Bottleneck(nn.Module):
expansion = 4
def __init__(self,inplanes,planes,stride=1,downsample=None,groups=1,
base_width=64,dilation=1,norm_layer=None):
super(Bottleneck, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
width = int(planes*(base_width/64.))*groups
self.conv1 = conv1x1(inplanes,width)
self.bn1 = norm_layer(width)
self.conv2 = conv3x3(width,width,stride,groups,dilation)
self.bn2 = norm_layer(width)
self.conv3 = conv1x1(width,planes*self.expansion)
self.bn3 = norm_layer(planes*self.expansion)
self.relu = nn.ReLU(inplace=True)
self.downsample = downsample
self.stride = stride
def forward(self,x):
identity = x
out =self.conv1(x)
out = self.bn1(out)
out = self.relu(out)
out =self.conv2(out)
out = self.bn2(out)
out = self.relu(out)
out = self.conv3(out)
out = self.bn3(out)
if self.downsample is not None:
identity = self.downsample(x)
out += identity
out = self.relu(out)
return out
class ResNet(nn.Module):
def __init__(self,block, layers, num_classes=1000, zero_init_residual=False,
groups=1, width_per_group=64, replace_stride_with_dilation=None,
norm_layer=None):
super(ResNet, self).__init__()
if norm_layer is None:
norm_layer = nn.BatchNorm2d
self._norm_layer = norm_layer
self.inplanes = 64
self.dilation = 1
if replace_stride_with_dilation is None:
replace_stride_with_dilation = [False, False, False]
if len(replace_stride_with_dilation) != 3:
raise ValueError("replace_stride_with_dilation should be None "
"or a 3-element tuple, got {}".format(replace_stride_with_dilation))
self.block = block
self.groups = groups
self.base_width = width_per_group
# [1, 3, 214, 214] --> [1, 64, 107, 107]
self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=7, stride=2, padding=3,
bias=False)
self.bn1 = norm_layer(self.inplanes)
self.relu = nn.ReLU(inplace=True)
# [1, 64, 107, 107] --> [1, 64, 54, 54]
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
# [1, 64, 54, 54] --> [1, 256, 54, 54]
self.layer1 = self._make_layer(block, 64, layers[0])
# [1, 256, 54, 54] --> [1, 512, 27, 27]
self.layer2 = self._make_layer(block, 128, layers[1], stride=2,
dilate=replace_stride_with_dilation[0])
# [1, 512, 27, 27] --> [1, 1024, 14, 14]
self.layer3 = self._make_layer(block, 256, layers[2], stride=2,
dilate=replace_stride_with_dilation[1])
# [1, 1024, 14, 14] --> [1, 2048, 7, 7]
self.layer4 = self._make_layer(block, 512, layers[3], stride=2,
dilate=replace_stride_with_dilation[2])
# [1, 2048, 7, 7] --> [1, 2048, 1, 1]
self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
# [1, 2048, 1, 1] --> flatten [1, 2048] --> [1, 10]
self.fc = nn.Linear(512 * block.expansion, num_classes)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
nn.init.constant_(m.weight, 1)
nn.init.constant_(m.bias, 0)
if zero_init_residual:
for m in self.modules():
if isinstance(m, Bottleneck):
nn.init.constant_(m.bn3.weight, 0)
def _make_layer(self, block, planes, blocks, stride=1, dilate=False):
norm_layer = self._norm_layer
downsample = None
previous_dilation = self.dilation
if dilate:
self.dilation *= stride
stride = 1
if stride != 1 or self.inplanes != planes * block.expansion:
downsample = nn.Sequential(
conv1x1(self.inplanes, planes * block.expansion, stride),
norm_layer(planes * block.expansion),
)
layers = []
# Conv_block
layers.append(block(self.inplanes, planes, stride, downsample, self.groups,
self.base_width, previous_dilation, norm_layer))
self.inplanes = planes * block.expansion
for _ in range(1, blocks):
# identity_block
layers.append(block(self.inplanes, planes, groups=self.groups,
base_width=self.base_width, dilation=self.dilation,
norm_layer=norm_layer))
return nn.Sequential(*layers)
def forward(self, x):
x = self.conv1(x) # [1, 3, 214, 214] --> [1, 64, 107, 107]
x = self.bn1(x)
x = self.relu(x)
x = self.maxpool(x) # [1, 64, 107, 107] --> [1, 64, 54, 54]
x = self.layer1(x) # [1, 64, 54, 54] --> [1, 256, 54, 54]
x = self.layer2(x) # [1, 256, 54, 54] --> [1, 512, 27, 27]
x = self.layer3(x) # [1, 512, 27, 27] --> [1, 1024, 14, 14]
x = self.layer4(x) # [1, 1024, 14, 14] --> [1, 2048, 7, 7]
x = self.avgpool(x) # [1, 2048, 7, 7] --> [1, 2048, 1, 1]
x = torch.flatten(x, 1) # [1, 2048, 1, 1] --> [1, 2048]
x = self.fc(x) # [1, 2048] --> [1, 10]
return x
def freeze_backbone(self):
backbone = [self.conv1, self.bn1, self.layer1, self.layer2, self.layer3, self.layer4]
for module in backbone:
for param in module.parameters():
param.requires_grad = False
def Unfreeze_backbone(self):
backbone = [self.conv1, self.bn1, self.layer1, self.layer2, self.layer3, self.layer4]
for module in backbone:
for param in module.parameters():
param.requires_grad = True
def resnet50(pretrained=False, progress=False, num_classes=1000):
model = ResNet(Bottleneck, [3, 4, 6, 3])
if pretrained:
state_dict = load_state_dict_from_url(model_urls['resnet50'], model_dir='./model_data',
progress=progress)
model.load_state_dict(state_dict)
if num_classes != 1000:
model.fc = nn.Linear(512 * model.block.expansion, num_classes)
return model
if __name__ == '__main__':
x = torch.rand([1,3,214,214])
model = resnet50(num_classes=10)
y = model(x)
'''
torch.Size([2, 10])
Process finished with exit code 0
'''
(1) 思路
'''
1. MobileNetV2:
1.1 主要结构
InvertedResidual : (Conv(3x3)BNReLU --> Conv(1x1)BNReLU)
(Conv(3x3)BNReLU --> Conv(1x1)BNReLU + x)
(Conv(1x1)BNReLU --> Conv(3x3)BNReLU --> Conv(1x1)BNReLU )
(Conv(1x1)BNReLU --> Conv(3x3)BNReLU --> Conv(1x1)BNReLU + x)
1.2 网络构架
net : features(x) + x.mean + classifier(x)
features(x) : ConvBNReLU + InvertedResidual*7 + ConvBNReLU
x.mean : x.mean([2,3])
classifier(x) : Dropout + Linear
2. mobilenet_v2
2.1 流程
(1)导入模型。(2)导入参数。(3)修改检测类别数目。
MobileNetV2(
(features): Sequential(
(0): ConvBNReLU(
(0): Conv2d(3, 32, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), bias=False)
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(32, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=32, bias=False)
(1): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): Conv2d(32, 16, kernel_size=(1, 1), stride=(1, 1), bias=False)
(2): BatchNorm2d(16, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(2): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(16, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(96, 96, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=96, bias=False)
(1): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(96, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(3): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(144, 144, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=144, bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(144, 24, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(24, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(4): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(24, 144, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(144, 144, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=144, bias=False)
(1): BatchNorm2d(144, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(144, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(5): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(6): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=192, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(192, 32, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(32, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(7): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(32, 192, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(192, 192, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=192, bias=False)
(1): BatchNorm2d(192, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(8): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(9): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(10): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(384, 64, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(11): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(64, 384, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(384, 384, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=384, bias=False)
(1): BatchNorm2d(384, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(384, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(12): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=576, bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(13): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(576, 576, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=576, bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(576, 96, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(96, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(14): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(96, 576, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(576, 576, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1), groups=576, bias=False)
(1): BatchNorm2d(576, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(576, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(15): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(16): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(960, 160, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(160, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(17): InvertedResidual(
(conv): Sequential(
(0): ConvBNReLU(
(0): Conv2d(160, 960, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(1): ConvBNReLU(
(0): Conv2d(960, 960, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), groups=960, bias=False)
(1): BatchNorm2d(960, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
(2): Conv2d(960, 320, kernel_size=(1, 1), stride=(1, 1), bias=False)
(3): BatchNorm2d(320, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
)
)
(18): ConvBNReLU(
(0): Conv2d(320, 1280, kernel_size=(1, 1), stride=(1, 1), bias=False)
(1): BatchNorm2d(1280, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): ReLU6(inplace=True)
)
)
(classifier): Sequential(
(0): Dropout(p=0.2, inplace=False)
(1): Linear(in_features=1280, out_features=10, bias=True)
)
)
Process finished with exit code 0
'''
(2) 代码
import torch
from torch import nn
from torchvision.models.utils import load_state_dict_from_url
__all__ = ['MobileNetV2', 'mobilenet_v2']
model_urls = {
'mobilenet_v2': 'https://download.pytorch.org/models/mobilenet_v2-b0353104.pth',
}
def _make_divisible(v, divisor, min_value=None):
''' 调整通道数,使其是 divisor 的整数倍 '''
if min_value is None:
min_value = divisor
new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
if new_v < 0.9 * v:
new_v += divisor
return new_v
class ConvBNReLU(nn.Sequential):
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
padding = (kernel_size - 1) // 2
super(ConvBNReLU, self).__init__(
nn.Conv2d(in_planes, out_planes, kernel_size, stride, padding, groups=groups, bias=False),
nn.BatchNorm2d(out_planes),
nn.ReLU6(inplace=True)
)
class InvertedResidual(nn.Module):
def __init__(self, inp, oup, stride, expand_ratio):
super(InvertedResidual, self).__init__()
self.stride = stride
assert stride in [1, 2]
hidden_dim = int(round(inp * expand_ratio))
self.use_res_connect = self.stride == 1 and inp == oup
layers = []
if expand_ratio != 1:
layers.append(ConvBNReLU(inp, hidden_dim, kernel_size=1))
layers.extend([
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
nn.Conv2d(hidden_dim, oup, 1, 1, 0, bias=False),
nn.BatchNorm2d(oup),
])
self.conv = nn.Sequential(*layers)
def forward(self, x):
if self.use_res_connect:
return x + self.conv(x)
else:
return self.conv(x)
class MobileNetV2(nn.Module):
def __init__(self, num_classes=1000, width_mult=1.0, inverted_residual_setting=None, round_nearest=8):
super(MobileNetV2, self).__init__()
block = InvertedResidual
input_channel = 32
last_channel = 1280
if inverted_residual_setting is None:
inverted_residual_setting = [
# t, c, n, s
# 112, 112, 32 -> 112, 112, 16
[1, 16, 1, 1],
# 112, 112, 16 -> 56, 56, 24
[6, 24, 2, 2],
# 56, 56, 24 -> 28, 28, 32
[6, 32, 3, 2],
# 28, 28, 32 -> 14, 14, 64
[6, 64, 4, 2],
# 14, 14, 64 -> 14, 14, 96
[6, 96, 3, 1],
# 14, 14, 96 -> 7, 7, 160
[6, 160, 3, 2],
# 7, 7, 160 -> 7, 7, 320
[6, 320, 1, 1],
]
if len(inverted_residual_setting) == 0 or len(inverted_residual_setting[0]) != 4:
raise ValueError("inverted_residual_setting should be non-empty "
"or a 4-element list, got {}".format(inverted_residual_setting))
input_channel = _make_divisible(input_channel * width_mult, round_nearest)
self.last_channel = _make_divisible(last_channel * max(1.0, width_mult), round_nearest)
# 224, 224, 3 -> 112, 112, 32
features = [ConvBNReLU(3, input_channel, stride=2)]
for t, c, n, s in inverted_residual_setting:
output_channel = _make_divisible(c * width_mult, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t))
input_channel = output_channel
# 7, 7, 320 -> 7,7,1280
features.append(ConvBNReLU(input_channel, self.last_channel, kernel_size=1))
self.features = nn.Sequential(*features)
self.classifier = nn.Sequential(
nn.Dropout(0.2),
nn.Linear(self.last_channel, num_classes),
)
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out')
if m.bias is not None:
nn.init.zeros_(m.bias)
elif isinstance(m, nn.BatchNorm2d):
nn.init.ones_(m.weight)
nn.init.zeros_(m.bias)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.zeros_(m.bias)
def forward(self, x):
x = self.features(x) # [2, 3, 224, 224] --> [2, 1280, 7, 7]
x = x.mean([2, 3]) # [2, 1280, 7, 7] --> [2, 1280]
x = self.classifier(x) # [2, 1280] --> [2, 10]
return x
def freeze_backbone(self):
for param in self.features.parameters():
param.requires_grad = False
def Unfreeze_backbone(self):
for param in self.features.parameters():
param.requires_grad = True
def mobilenet_v2(pretrained=False, progress=True, num_classes=1000):
model = MobileNetV2()
if pretrained:
state_dict = load_state_dict_from_url(model_urls['mobilenet_v2'], model_dir='./model_data',
progress=progress)
model.load_state_dict(state_dict)
if num_classes!=1000:
model.classifier = nn.Sequential(
nn.Dropout(0.2),
nn.Linear(model.last_channel, num_classes),
)
return model
if __name__ == '__main__':
x = torch.rand([2,3,224,224])
model = mobilenet_v2(num_classes=10)
y = model(x)
多分类问题采用交叉熵作为损失函数。y_pred[batch_size,cls],y_true[cls]
'''
y_pred取值在0~1之间
loss = (-y_true*log(y_pred)).mean()
y_pred取值实数
loss = (-x[class]+log(exp(x).sum())).mean()
'''
if __name__ == '__main__':
import torch
import torch.nn as nn
torch.random.seed()
outputs = torch.tensor([[3.9383, 0.0983],
[0.0465, 5.9902]])
targets = torch.Tensor([0,1]).long()
# method_1
loss = nn.CrossEntropyLoss()(outputs, targets)
print(loss) # tensor(0.0119)
# method_2
print(nn.NLLLoss()(nn.LogSoftmax(dim=1)(outputs), targets))
# method_3
y = torch.zeros_like(outputs)
for i,j in enumerate(targets):
y[i,j]=1 # one_hot
print(-(torch.log(torch.exp(outputs)/(torch.exp(outputs).sum(1).expand_as(outputs)))*y).sum()/2)
# method_4
print(((torch.log(torch.exp(outputs).sum(1)).expand_as(outputs)-outputs)*y).sum()/len(outputs))
'''
1. 设置参数
2. 加载模型
2.1 通过网页下载参数
2.2 上一步不成功,参数初始化。
2.3 迁移学习。a. 加载训练好的参数,取出未训练模型参数。b.取出模型参数和预训练模型参数shape相同的参数。c.把上一步取出的参数加载到未训练的模型上。
3. 读取数据及数据预处理
4. 设置优化器和学习率
5. 分批次训练数据
'''
(1) 加载模型代码
'''
assert backbone in ["mobilenet", "resnet50", "vgg16"]
# 1. pretrained = True ,则通过网页下载参数
model = get_model_from_name[backbone](num_classes=num_classes,pretrained=pretrained)
# 2. pretrained = False
if not pretrained:
weights_init(model)
# 3. 迁移学习
model_path = 'model_data/mobilenet_catvsdog.pth'
print('Loading weights into state dict...')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
pretrained_dict = torch.load(model_path, map_location=device)
model_dict = model.state_dict()
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
model_dict.update(pretrained_dict)
model.load_state_dict(model_dict)
'''
(2) 训练代码
import torch
import numpy as np
from torch import nn
from tqdm import tqdm
import torch.optim as optim
import torch.nn.functional as F
import torch.backends.cudnn as cudnn
from torch.utils.data import DataLoader
from nets.mobilenet_ls import mobilenet_v2
from nets.resnet50_ls import resnet50
from nets.vgg16_ls import vgg16
from utils.utils_ls import weights_init
from utils.dataloader_ls import DataGenerator, detection_collate
get_model_from_name = {
'mobilenet':mobilenet_v2,
'resnet50' :resnet50,
'vgg16' : vgg16}
freeze_layers = {
'mobilenet': 81,
'resnet50' : 173,
'vgg16' : 19}
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
def get_classes(classes_path):
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
def fit_one_epoch(net, epoch, epoch_size, epoch_size_val, gen, genval, Epoch, cuda):
total_loss = 0
total_accuracy = 0
val_total_loss = 0
with tqdm(total = epoch_size,desc=f'Epoch{epoch+1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
for iteration,batch in enumerate(gen):
if iteration >= epoch_size:
break
images, targets = batch
with torch.no_grad():
images = torch.from_numpy(images).type(torch.FloatTensor)
targets = torch.from_numpy(targets).type(torch.FloatTensor).long()
if cuda:
images = images.cuda()
targets = targets.cuda()
optimizer.zero_grad()
outputs = net(images)
loss = nn.CrossEntropyLoss()(outputs, targets)
loss.backward()
optimizer.step()
total_loss += loss.item()
with torch.no_grad():
accuracy = torch.mean((torch.argmax(F.softmax(outputs, dim=-1), dim=-1) == targets).type(torch.FloatTensor))
total_accuracy += accuracy.item()
pbar.set_postfix(**{'total_loss': total_loss / (iteration + 1),
'accuracy' : total_accuracy / (iteration + 1),
'lr' : get_lr(optimizer)})
pbar.update(1)
print('Start Validation')
with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
for iteration, batch in enumerate(genval):
if iteration >= epoch_size_val:
break
images, targets = batch
with torch.no_grad():
images = torch.from_numpy(images).type(torch.FloatTensor)
targets = torch.from_numpy(targets).type(torch.FloatTensor).long()
if cuda:
images = images.cuda()
targets = targets.cuda()
optimizer.zero_grad()
outputs = net(images)
val_loss = nn.CrossEntropyLoss()(outputs, targets)
val_total_loss += val_loss.item()
pbar.set_postfix(**{'total_loss': val_total_loss / (iteration + 1),
'lr' : get_lr(optimizer)})
pbar.update(1)
print('Finish Validation')
print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_total_loss/(epoch_size_val+1)))
print('Saving state, iter:', str(epoch+1))
torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch+1),total_loss/(epoch_size+1),val_total_loss/(epoch_size_val+1)))
if __name__ == '__main__':
log_dir = './logs/'
backbone = 'mobilenet'
input_shape = [224,224,3]
Cuda = False
pretrained = False
classes_path = './model_data/cls_classes_ls.txt'
class_names = get_classes(classes_path) # ['cat', 'dog']
num_classes = len(class_names)
assert backbone in ["mobilenet", "resnet50", "vgg16"]
model = get_model_from_name[backbone](num_classes=num_classes,pretrained=pretrained)
if not pretrained:
weights_init(model)
# # 加快模型训练的效率
# model_path = "model_data/Omniglot_vgg.pth" # 'model_data/mobilenet_catvsdog.pth'
# print('Loading weights into state dict...')
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# pretrained_dict = torch.load(model_path, map_location=device)
# model_dict = model.state_dict()
# pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
# model_dict.update(pretrained_dict)
# model.load_state_dict(model_dict)
with open(r"./cls_train.txt","r") as f:
lines = f.readlines()
np.random.seed(10101)
np.random.shuffle(lines)
np.random.seed(None)
num_val = int(len(lines)*0.1)
num_train = len(lines) - num_val
net = model.train()
if Cuda:
net = torch.nn.DataParallel(model)
cudnn.benchmark = True
net = net.cuda()
#------------------------------------------------------#
# 主干特征提取网络特征通用,冻结训练可以加快训练速度
# 也可以在训练初期防止权值被破坏。
# Init_Epoch为起始世代
# Freeze_Epoch为冻结训练的世代
# Epoch总训练世代
# 提示OOM或者显存不足请调小Batch_size
#------------------------------------------------------#
if True:
#--------------------------------------------#
# BATCH_SIZE不要太小,不然训练效果很差
#--------------------------------------------#
lr = 1e-3
Batch_size = 32 # 128
Init_Epoch = 0 # 0
Freeze_Epoch = 50 # 50
optimizer = optim.Adam(net.parameters(),lr,weight_decay=5e-4)
lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
train_dataset = DataGenerator(input_shape,lines[:num_train])
val_dataset = DataGenerator(input_shape,lines[num_train:], False)
gen = DataLoader(train_dataset, batch_size=Batch_size, num_workers=4, pin_memory=True,
drop_last=True, collate_fn=detection_collate)
gen_val = DataLoader(val_dataset, batch_size=Batch_size, num_workers=4, pin_memory=True,
drop_last=True, collate_fn=detection_collate)
epoch_size = train_dataset.get_len()//Batch_size
epoch_size_val = val_dataset.get_len()//Batch_size
if epoch_size == 0 or epoch_size_val == 0:
raise ValueError("数据集过小,无法进行训练,请扩充数据集。")
#------------------------------------#
# 冻结一定部分训练
#------------------------------------#
model.freeze_backbone()
for epoch in range(Init_Epoch,Freeze_Epoch):
fit_one_epoch(model,epoch,epoch_size,epoch_size_val,gen,gen_val,Freeze_Epoch,Cuda)
lr_scheduler.step()
if True:
#--------------------------------------------#
# BATCH_SIZE不要太小,不然训练效果很差
#--------------------------------------------#
lr = 1e-4
Batch_size = 32 # 128
Freeze_Epoch = 50 # 50
Epoch = 100 # 100
optimizer = optim.Adam(net.parameters(),lr,weight_decay=5e-4)
lr_scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9)
train_dataset = DataGenerator(input_shape,lines[:num_train])
val_dataset = DataGenerator(input_shape,lines[num_train:], False)
gen = DataLoader(train_dataset, batch_size=Batch_size, num_workers=2, pin_memory=True,
drop_last=True, collate_fn=detection_collate)
gen_val = DataLoader(val_dataset, batch_size=Batch_size, num_workers=2, pin_memory=True,
drop_last=True, collate_fn=detection_collate)
epoch_size = train_dataset.get_len()//Batch_size
epoch_size_val = val_dataset.get_len()//Batch_size
if epoch_size == 0 or epoch_size_val == 0:
raise ValueError("数据集过小,无法进行训练,请扩充数据集。")
#------------------------------------#
# 解冻后训练
#------------------------------------#
model.Unfreeze_backbone()
for epoch in range(Freeze_Epoch,Epoch):
fit_one_epoch(model,epoch,epoch_size,epoch_size_val,gen,gen_val,Epoch,Cuda)
lr_scheduler.step()
(1)流程
'''
1. 分类实例化
2. 打开图片
3. 图片识别
'''
from PIL import Image
from classification_ls import Classification
classification = Classification()
while True :
img = input('Input image filename')
try:
image = Image.open(img)
except:
print('Open Error! Try again!')
continue
else:
class_name = classification.detect_image(image)
print(class_name)
(1)分类流程
'''
1. 加载图片、加灰条、归一化
2. 加载模型,预测
3. 显示预测结果
'''
(2)代码
import os, copy, torch
import numpy as np
from torch import nn
import matplotlib.pyplot as plt
from torch.autograd import Variable
from nets.vgg16_ls import vgg16
from nets.resnet50_ls import resnet50
from nets.mobilenet_ls import mobilenet_v2
from utils.utils_ls import letterbox_image
get_model_from_name = {"vgg16":vgg16,
"resnet50":resnet50,
"mobilenet":mobilenet_v2}
def _preprocess_input(x):
x /= 127.5
x -= 1.
return x
class Classification(object):
_defaults = {
"cuda" : False,
"backbone" : 'mobilenet',
"input_shape" : [224,224,3],
"classes_path" : 'model_data/cls_classes.txt',
"model_path" : 'model_data/mobilenet_catvsdog.pth',
}
@classmethod
def get_defaults(cls,n):
if n in cls._defaults:
return cls._defaults[n]
else:
return "Unrecognized attribute name '" + n + "'"
# 初始化classification
def __init__(self,**kwargs):
self.__dict__.update(self._defaults)
self.class_names = self._get_class()
self.generate()
# 获得所有的分类名称
def _get_class(self):
classes_path = os.path.expanduser(self.classes_path)
with open(classes_path) as f:
class_names = f.readlines()
class_names = [c.strip() for c in class_names]
return class_names
# 加载模型
def generate(self):
model_path = os.path.expanduser(self.model_path)
self.num_classes = len(self.class_names)
assert self.backbone in ["mobilenet", "resnet50", "vgg16"]
self.model = get_model_from_name[self.backbone](num_classes=self.num_classes, pretrained=False)
self.model = self.model.eval() ####################################
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
state_dict = torch.load(self.model_path,map_location=device)
self.model.load_state_dict(state_dict)
if self.cuda:
self.model = nn.DataParallel(self.model)
self.model = self.model.cuda()
print('{} model, and classes loaded.'.format(model_path))
# 检测图片
def detect_image(self,image):
old_image = copy.deepcopy(image)
crop_img = letterbox_image(image, [self.input_shape[0],self.input_shape[1]])
photo = np.array(crop_img,dtype=np.float32)
photo = np.reshape(_preprocess_input(photo),[1,self.input_shape[0],self.input_shape[1],self.input_shape[2]])
photo = np.transpose(photo,(0,3,1,2))
with torch.no_grad():
photo = Variable(torch.from_numpy(photo).type(torch.FloatTensor))
if self.cuda:
photo = photo.cuda()
preds = torch.softmax(self.model(photo)[0],dim=-1).cpu().numpy()
class_name = self.class_names[np.argmax(preds)]
probability = np.max(preds)
plt.subplot(1,1,1)
plt.imshow(np.array(old_image))
plt.title('Class:%s Probability:%.3f' %(class_name, probability))
plt.show()
return class_name
if __name__ == '__main__':
from PIL import Image
img = Image.open('img/cat.jpg')
clas = Classification()
class_name = clas.detect_image(img)
(1) 训练流程
'''
1. 导入图片流,得到预测结果。
2. 根据预测值和真实值,计算正确预测的样本数。
3. Top1 = 正确预测的样本数/总样本
'''
(2) 代码
''' 评价'''
import numpy as np
import torch
from PIL import Image
from torch.autograd import Variable
from classification_ls import Classification, _preprocess_input
from utils.utils_ls import letterbox_image
class top1_Classification(Classification):
def detect_image(self, image):
crop_img = letterbox_image(image, [self.input_shape[0],self.input_shape[1]])
photo = np.array(crop_img,dtype = np.float32)
photo = np.reshape(_preprocess_input(photo),[1,self.input_shape[0],self.input_shape[1],self.input_shape[2]])
photo = np.transpose(photo,(0,3,1,2))
with torch.no_grad():
photo = Variable(torch.from_numpy(photo).type(torch.FloatTensor))
if self.cuda:
photo = photo.cuda()
preds = torch.softmax(self.model(photo)[0], dim=-1).cpu().numpy()
arg_pred = np.argmax(preds)
return arg_pred
def evaluteTop1(classfication, lines):
correct = 0
total = len(lines)
for index, line in enumerate(lines):
annotation_path = line.split(';')[1].split()[0]
x = Image.open(annotation_path)
y = int(line.split(';')[0])
pred = classfication.detect_image(x)
correct += pred == y
if index % 100 == 0:
print("[%d/%d]"%(index,total))
return correct / total
if __name__ == '__main__':
classfication = top1_Classification()
with open(r"./cls_test.txt","r") as f:
lines = f.readlines()
top1 = evaluteTop1(classfication, lines)
print("top-1 accuracy = %.2f%%" % (top1*100))
'''
model_data/mobilenet_catvsdog.pth model, and classes loaded.
[0/26]
top-1 accuracy = 100.00%
Process finished with exit code 0
'''
(1) 训练流程
'''
1. 导入图片流,得到预测结果。按照概率对预测结果从大到小排列,取出前5个预测结果。
2. 如果前5个预测结果有预测正确的,作为预测正确,记录正确预测的样本数。
3. Top5 = 正确预测的样本数/总样本
'''
(2) 代码
import numpy as np
import torch
from PIL import Image
from torch.autograd import Variable
from classification_ls import Classification, _preprocess_input
from utils.utils_ls import letterbox_image
class top5_Classification(Classification):
def detect_image(self, image):
crop_img = letterbox_image(image, [self.input_shape[0],self.input_shape[1]])
photo = np.array(crop_img,dtype = np.float32)
# 图片预处理,归一化
photo = np.reshape(_preprocess_input(photo),[1,self.input_shape[0],self.input_shape[1],self.input_shape[2]])
photo = np.transpose(photo,(0,3,1,2))
with torch.no_grad():
photo = Variable(torch.from_numpy(photo).type(torch.FloatTensor))
if self.cuda:
photo = photo.cuda()
preds = torch.softmax(self.model(photo)[0], dim=-1).cpu().numpy()
arg_pred = np.argsort(preds)[::-1]
arg_pred_top5 = arg_pred[:5]
# print(111)
return arg_pred_top5
def evaluteTop5(classfication, lines):
correct = 0
total = len(lines)
for index, line in enumerate(lines):
annotation_path = line.split(';')[1].split()[0]
x = Image.open(annotation_path)
y = int(line.split(';')[0])
pred = classfication.detect_image(x)
correct += y in pred
if index % 100 == 0:
print("[%d/%d]"%(index,total))
return correct / total
if __name__ == '__main__':
classfication = top5_Classification()
with open(r"./cls_test.txt","r") as f:
lines = f.readlines()
top5 = evaluteTop5(classfication, lines)
print("top-5 accuracy = %.2f%%" % (top5*100))
'''
model_data/mobilenet_catvsdog.pth model, and classes loaded.
[0/26]
top-5 accuracy = 100.00%
Process finished with exit code 0
'''