在学习猫狗二分类问题时,我主要参考的是这篇博客:http://t.csdn.cn/J7L0n
然后数据集下载的是:Dogs vs. Cats | Kaggle
下载的数据集一共有25000张,这里采用CPU训练速度非常慢,25000张图片训练一次要4h,所以我们仅选取了200张dog,200张cat用来train,200张dog,200张cat作为test。(从原数据集的train中复制出自己的训练集)。
数据集结构如下:
需要注意的是在以下代码中,train和test下必须要分类!
文件:data1
文件:dogs-vs-cats-迁移学习vgg16-train-small
import torch
import torchvision
from torchvision import datasets,transforms,models
import os
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import Variable
import time
path='data1'
transform=transforms.Compose([
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5],[0.5, 0.5, 0.5])
])
data_image={
x:datasets.ImageFolder(root=os.path.join(path,x),
transform=transform)
for x in ["train","test"]
}
data_loader_image={
x:torch.utils.data.DataLoader(dataset=data_image[x],
batch_size=4,
shuffle=True)
for x in ["train","test"]
}
use_gpu=torch.cuda.is_available()
print(use_gpu)
classes=data_image["train"].classes #按文件夹名字分类
classes_index=data_image["train"].class_to_idx #文件夹类名所对应的链值
print(classes)
print(classes_index)
print("train data set:",len(data_image["train"]))
print("test data set:",len(data_image["test"]))
x_train,y_train=next(iter(data_loader_image["train"]))
mean=[0.5, 0.5, 0.5]
std=[0.5, 0.5, 0.5]
img=torchvision.utils.make_grid(x_train)
img=img.numpy().transpose((1,2,0))
img=img*std+mean
print([classes[i] for i in y_train])
plt.imshow(img)
plt.show()
#选择预训练好的模型vgg16
model=models.vgg16(pretrained=True)
print(model)
for parma in model.parameters():
parma.requires_grad=False #预训练的网络不进行梯度更新
#改变模型的全连接层,从原模型的1000个类到本项目的2个类
model.classifier=torch.nn.Sequential(
torch.nn.Linear(25088,4096),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(4096,4096),
torch.nn.ReLU(),
torch.nn.Dropout(p=0.5),
torch.nn.Linear(4096,2)
)
for index,parma in enumerate(model.classifier.parameters()):
if index ==6:
parma.requires_grad=True
if use_gpu:
model=model.cuda()
print(parma)
#定义代价函数和优化器
cost=torch.nn.CrossEntropyLoss()
optimizer=torch.optim.Adam(model.classifier.parameters())
print(model)
#开始训练模型
n_epochs=1
for epoch in range(n_epochs):
since=time.time()
print("Epoch{}/{}".format(epoch,n_epochs))
print("-"*10)
for param in ["train","test"]:
if param == "train":
model.train=True
else:
model.train=False
running_loss=0.0
running_correct=0
batch=0
for data in data_loader_image[param]:
batch+=1
x,y=data
if use_gpu:
x,y=Variable(x.cuda()),Variable(y.cuda())
else:
x,y=Variable(x),Variable(y)
optimizer.zero_grad()
y_pred=model(x)
_,pred=torch.max(y_pred.data,1)
loss=cost(y_pred,y)
if param=="train":
loss.backward()
optimizer.step()
running_loss+=loss.item() #running_loss+=loss.data[0]
running_correct+=torch.sum(pred==y.data)
if batch%10==0 and param=="train":
print("Batch{},Train Loss:{:.4f},Train Acc:{:.4f}%".format(
batch,running_loss/(4*batch),100*running_correct/(4*batch)))
epoch_loss=running_loss/len(data_image[param])
epoch_correct=100*running_correct/len(data_image[param])
print("{}Loss:{:.4f},Correct:{:.4f}%".format(param,epoch_loss,epoch_correct))
now_time=time.time()-since
print("Training time is:{:.0f}m {:.0f}s".format(now_time//60,now_time%60))
torch.save(model,'model.pth')
#保存模型,备测试使用
输出结果:
False ['cat', 'dog'] {'cat': 0, 'dog': 1} train data set: 400 test data set: 400 ['cat', 'cat', 'dog', 'cat']VGG( (features): Sequential( (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (1): ReLU(inplace=True) (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (3): ReLU(inplace=True) (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (6): ReLU(inplace=True) (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (8): ReLU(inplace=True) (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (11): ReLU(inplace=True) (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (13): ReLU(inplace=True) (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (15): ReLU(inplace=True) (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (18): ReLU(inplace=True) (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (20): ReLU(inplace=True) (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (22): ReLU(inplace=True) (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (25): ReLU(inplace=True) (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (27): ReLU(inplace=True) (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (29): ReLU(inplace=True) (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) ) (avgpool): AdaptiveAvgPool2d(output_size=(7, 7)) (classifier): Sequential( (0): Linear(in_features=25088, out_features=4096, bias=True) (1): ReLU(inplace=True) (2): Dropout(p=0.5, inplace=False) (3): Linear(in_features=4096, out_features=4096, bias=True) (4): ReLU(inplace=True) (5): Dropout(p=0.5, inplace=False) (6): Linear(in_features=4096, out_features=1000, bias=True) ) ) Parameter containing: tensor([-0.0110, -0.0124], requires_grad=True) VGG( (features): Sequential( (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (1): ReLU(inplace=True) (2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (3): ReLU(inplace=True) (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (5): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (6): ReLU(inplace=True) (7): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (8): ReLU(inplace=True) (9): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (10): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (11): ReLU(inplace=True) (12): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (13): ReLU(inplace=True) (14): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (15): ReLU(inplace=True) (16): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (17): Conv2d(256, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (18): ReLU(inplace=True) (19): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (20): ReLU(inplace=True) (21): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (22): ReLU(inplace=True) (23): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) (24): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (25): ReLU(inplace=True) (26): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (27): ReLU(inplace=True) (28): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)) (29): ReLU(inplace=True) (30): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False) ) (avgpool): AdaptiveAvgPool2d(output_size=(7, 7)) (classifier): Sequential( (0): Linear(in_features=25088, out_features=4096, bias=True) (1): ReLU() (2): Dropout(p=0.5, inplace=False) (3): Linear(in_features=4096, out_features=4096, bias=True) (4): ReLU() (5): Dropout(p=0.5, inplace=False) (6): Linear(in_features=4096, out_features=2, bias=True) ) ) Epoch0/1 ---------- Batch10,Train Loss:1.3042,Train Acc:60.0000% Batch20,Train Loss:1.0106,Train Acc:68.7500% Batch30,Train Loss:1.1665,Train Acc:74.1667% Batch40,Train Loss:1.1059,Train Acc:78.1250% Batch50,Train Loss:0.9046,Train Acc:81.0000% Batch60,Train Loss:1.4522,Train Acc:79.5833% Batch70,Train Loss:1.8163,Train Acc:80.7143% Batch80,Train Loss:1.6358,Train Acc:82.1875% Batch90,Train Loss:1.5268,Train Acc:82.5000% Batch100,Train Loss:1.4596,Train Acc:83.2500% trainLoss:1.4596,Correct:83.2500% testLoss:0.4573,Correct:92.7500% Training time is:4m 33s
文件:dogs-vs-cats-迁移学习vgg16-test-small
import os
import torch
import torchvision
from torchvision import datasets,transforms,models
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import Variable
import time
model=torch.load('model.pth')
path='data1'
transform=transforms.Compose([
transforms.CenterCrop(224),
transforms.ToTensor(),
transforms.Normalize([0.5, 0.5, 0.5],[0.5, 0.5, 0.5])
])
data_test_img=datasets.ImageFolder(
root="data1/test/",
transform=transform
)
data_loader_test_img=torch.utils.data.DataLoader(
dataset=data_test_img,
batch_size=16,
shuffle=True
)
classes=data_test_img.classes
image,label=next(iter(data_loader_test_img))
images=Variable(image)
y_pred=model(images)
_,pred=torch.max(y_pred.data,1)
print(pred)
img=torchvision.utils.make_grid(image)
img=img.numpy().transpose(1,2,0)
mean=[0.5, 0.5, 0.5]
std=[0.5, 0.5, 0.5]
img=img*std+mean
print("Pred Label:",[classes[i] for i in pred])
plt.imshow(img)
plt.show()
输出:
tensor([1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1]) Pred Label: ['dog', 'cat', 'cat', 'cat', 'cat', 'cat', 'cat', 'cat', 'cat', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog', 'dog']
200张cat和200张dog的训练效果已经很可观,有GPU条件下,可以采用更多的数据,效果会非常理想!
2022/7/30