把一个88的图片用VGG来做迁移学习确实有一点点夸张,但是我还是确实这么做了。
修改vgg的首个卷积层和最后的全连接层的尺寸,将88的图像利用双线性插值进行放大,标签不变。将预训练的vgg模型的权重进行冻结,训练少量的头部卷积层和尾部全连接层,有利于保证模型强大的泛化能力。
修改后模型的架构如下:
----------------------------------------------------------------
Layer (type) Output Shape Param #
================================================================
Conv2d-1 [-1, 64, 224, 224] 640
ReLU-2 [-1, 64, 224, 224] 0
MaxPool2d-3 [-1, 64, 112, 112] 0
Conv2d-4 [-1, 128, 112, 112] 73,856
ReLU-5 [-1, 128, 112, 112] 0
MaxPool2d-6 [-1, 128, 56, 56] 0
Conv2d-7 [-1, 256, 56, 56] 295,168
ReLU-8 [-1, 256, 56, 56] 0
Conv2d-9 [-1, 256, 56, 56] 590,080
ReLU-10 [-1, 256, 56, 56] 0
MaxPool2d-11 [-1, 256, 28, 28] 0
Conv2d-12 [-1, 512, 28, 28] 1,180,160
ReLU-13 [-1, 512, 28, 28] 0
Conv2d-14 [-1, 512, 28, 28] 2,359,808
ReLU-15 [-1, 512, 28, 28] 0
MaxPool2d-16 [-1, 512, 14, 14] 0
Conv2d-17 [-1, 512, 14, 14] 2,359,808
ReLU-18 [-1, 512, 14, 14] 0
Conv2d-19 [-1, 512, 14, 14] 2,359,808
ReLU-20 [-1, 512, 14, 14] 0
MaxPool2d-21 [-1, 512, 7, 7] 0
AdaptiveAvgPool2d-22 [-1, 512, 7, 7] 0
Linear-23 [-1, 4096] 102,764,544
ReLU-24 [-1, 4096] 0
Dropout-25 [-1, 4096] 0
Linear-26 [-1, 1024] 4,195,328
ReLU-27 [-1, 1024] 0
Dropout-28 [-1, 1024] 0
Linear-29 [-1, 10] 10,250
================================================================
Total params: 116,189,450
Trainable params: 116,189,450
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.19
Forward/backward pass size (MB): 125.30
Params size (MB): 443.23
Estimated Total Size (MB): 568.72
----------------------------------------------------------------
Backend TkAgg is interactive backend. Turning interactive mode on.
VGG(
(features): Sequential(
(0): Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): ReLU(inplace=True)
(2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(3): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(4): ReLU(inplace=True)
(5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(6): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(7): ReLU(inplace=True)
(8): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(9): ReLU(inplace=True)
(10): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(11): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(12): ReLU(inplace=True)
(13): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(14): ReLU(inplace=True)
(15): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
(16): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): ReLU(inplace=True)
(18): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(19): ReLU(inplace=True)
(20): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
)
(avgpool): AdaptiveAvgPool2d(output_size=(7, 7))
(classifier): Sequential(
(0): Linear(in_features=25088, out_features=4096, bias=True)
(1): ReLU(inplace=True)
(2): Dropout(p=0.5, inplace=False)
(3): Linear(in_features=4096, out_features=1024, bias=True)
(4): ReLU(inplace=True)
(5): Dropout(p=0.5, inplace=False)
(6): Linear(in_features=1024, out_features=10, bias=True)
)
)
代码如下:
import torch as t
from torchvision import models
from torchvision import datasets
from torchvision import transforms
import matplotlib.pyplot as plt
import torch.nn as nn
import numpy as np
from sklearn.datasets import load_digits
from torch.optim import Adam
import torch
from sklearn.model_selection import train_test_split
import random
import torch
import torch.nn as nn
import torch.utils.data as Data
import torchvision
from sklearn.datasets import load_digits
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
import torch as t
import torch.nn as nn
import torch.utils.data as Data
import torchvision
import matplotlib.pyplot as plt
from graphviz import Digraph
import torch
from torch.autograd import Variable
from torchsummary import summary
from PIL import Image
from torchvision.transforms import transforms
import random
class train_mini_mnist(t.utils.data.Dataset):
def __init__(self):
self.X,self.y=load_digits(return_X_y=True)
self.X=self.X
self.X_train,self.X_test,self.y_train,self.y_test=train_test_split(self.X,self.y,random_state=0)
def __getitem__(self, index):
img, target = np.array(self.X_train[index].reshape(8,8),dtype=int), int(self.y_train[index])
plt.imshow(img)
img=transforms.ToPILImage()(img)
img=img=img.resize((224,224),Image.BILINEAR)
img=img.rotate(random.randint(-20,20))#填充白色
img=transforms.ToTensor()(img)
r=random.random()
return img/(15.),target
def __len__(self):
return len(self.y_train)
class test_mini_mnist(t.utils.data.Dataset):
def __init__(self):
self.X,self.y=load_digits(return_X_y=True)
self.X=self.X/15.
self.X_train,self.X_test,self.y_train,self.y_test=train_test_split(self.X,self.y,random_state=0)
def __getitem__(self, index):
return t.tensor(self.X_test[index].reshape(1,8,8),dtype=torch.float32),self.y_test[index]
def __len__(self):
return len(self.y_test)
epoch = 1
lr = 0.0001
batch_size = 64
def build_model():
vgg11 = models.vgg11(pretrained=True)
for param in vgg11.parameters():
param.require_grad = False
#MNIST为单通道,而原模型输入为3通道,这里仅对卷积层输入维度做出改变
vgg11.features[0] = nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
in_channel = 25088
out_channel = [4096, 1024, 10]
#修改最后三个fc层
for i in range(7):
if i % 3 == 0:
vgg11.classifier[i] = nn.Linear(in_channel, out_channel[int(i/3)])
in_channel = out_channel[int(i/3)]
return vgg11
model = build_model()
print(model)
train_loader = torch.utils.data.DataLoader(
dataset=train_mini_mnist(), batch_size=batch_size, shuffle=True
)
test_loader = torch.utils.data.DataLoader(
dataset=test_mini_mnist(), batch_size=1, shuffle=True
)
optim = Adam(model.parameters(), lr=lr, betas=[0.5, 0.99])
criterion = nn.CrossEntropyLoss()
for epoch in range(epoch):
for i, (img, y) in enumerate(train_loader):
print(i)
pred_y = model(img)
loss = criterion(pred_y, y)
optim.zero_grad()
loss.backward()
optim.step()
print("step")
if i % 1 == 0:
print('epoch:[{}/1] loss:{}'.format(epoch, loss))
torch.save(model.state_dict(), './vgg_mnist.pth')
为什么没有训练结果?!10秒钟参数更新一次,在我的pc机上训练一亿多权重的神经网络实在是太难为我了,但是这个模型确实是可以编译通过的,有显卡的大佬可以试着跑一下。部分的输出如图所示:
0
step
epoch:[0/1] loss:2.3032429218292236
1
step
epoch:[0/1] loss:2.2823166847229004
2
step
epoch:[0/1] loss:2.2737157344818115
3
step
epoch:[0/1] loss:2.2206296920776367
4
step
epoch:[0/1] loss:2.1061079502105713
5
step
epoch:[0/1] loss:1.9087493419647217
6
step
我们看到loss确实在减小,模型的参数在逐步呗优化。