猫狗识别是学习CNN中最有趣的一个应用,本次我在Kaggle上的“Dogs vs. Cats Redux”进行了一次尝试。考虑到训练速度和实验的简易性,我使用了比较简单的AlexNet,并且还用了对应ImageNet的预训练模型(https://github.com/Cadene/pretrained-models.pytorch)。
因此,下文中将按照数据处理、模型预处理、训练和测试结果几个部分展开,实验代码如下:
https://github.com/Yannnnnnnnnnnn/learnPyTorch/tree/master/catVSdog
从Kaggle上下载完比赛数据后,首先要做两个处理:
1、将train数据中不同类别的数据放到不同的文件夹下,例如猫的数据放在“0”文件夹下,狗的数据放在“1”文件夹下,如下图所示:
2、完成train数据处理后,则需要进一步将其分成train数据和validation数据;本次实验时,我通过随机采样的方式,从train中选取了20%的数据用于验证,代码如下:
# -*- coding:utf-8 -*-
# https://blog.csdn.net/mdjxy63/article/details/78946455
__author__ = 'xuy'
import os
import shutil
import random
root_dir = r'/home/yqs/Desktop/dogs-vs-cats-redux-kernels-edition/train/0'
output_dir = r'/home/yqs/Desktop/dogs-vs-cats-redux-kernels-edition/validation/0'
percentage = 0.2
for root, dirs, files in os.walk(root_dir):
number_of_files = len(os.listdir(root))
ref_copy = int(round(percentage * number_of_files))#随机筛选20%的图片到新建的文件夹当中
for i in range(ref_copy):
chosen_one = random.choice(os.listdir(root))
file_in_track = root
file_to_copy = file_in_track + '/' + chosen_one
if os.path.isfile(file_to_copy) == True:
shutil.move(file_to_copy,output_dir)
模型预处理的主要内容读取ImageNet预训练的模型,然后对AlexNet网络最后一层进行修改,使得分类类别数量为2,代码如下:
import torch
import torch.nn as nn
from torch import optim
from torchvision.datasets import ImageFolder
from torchvision import transforms
# AlexNet网络结构
class MineAlexNet(nn.Module):
def __init__(self, num_classes=2):
super(MineAlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
# softmax
self.logsoftmax = nn.LogSoftmax(dim=1)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
x = self.logsoftmax(x)
return x
# 读取提前下载的预训练模型
state_dict = torch.load('alexnetImageNet.pth')
alexNet = MineAlexNet(1000)
alexNet.load_state_dict(state_dict)
# 修改AlexNet网络结构的后两层
alexNet.classifier[6] = nn.Linear(4096, 2)
# 保存包含了预训练参数的,猫狗分类AlexNet模型
torch.save(alexNet.state_dict(), 'begin.pth')
训练过程,目前我还没有明白炼丹的具体技巧,所以就随便训练了30epoch,代码如下:
import torch
import torch.nn as nn
from torch import optim
from torchvision.datasets import ImageFolder
from torchvision import transforms
# 数据预处理
# 非常重要,如果不处理,效果可能非常差!!!
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform = transforms.Compose([
transforms.Resize(size=(227,227)),
transforms.RandomRotation(20),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(), #将图片转换为Tensor,归一化至[0,1]
normalize
])
# 从文件夹中读取训练数据
train_dataset = ImageFolder(r'/home/yqs/Desktop/dogs-vs-cats-redux-kernels-edition/train',transform=transform)
trainloader = torch.utils.data.DataLoader(train_dataset, batch_size=512, shuffle=True)
# 从文件夹中读取validation数据
validation_dataset = ImageFolder(r'/home/yqs/Desktop/dogs-vs-cats-redux-kernels-edition/validation',transform=transform)
validationloader = torch.utils.data.DataLoader(validation_dataset, batch_size=512, shuffle=True)
# AlexNet
class MineAlexNet(nn.Module):
def __init__(self, num_classes=2):
super(MineAlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
self.logsoftmax = nn.LogSoftmax(dim=1)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
x = self.logsoftmax(x)
return x
# 读取转换后的AlexNet模型
state_dict = torch.load('begin.pth')
alexNet = MineAlexNet(2)
alexNet.load_state_dict(state_dict)
# cuda
alexNet.cuda()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(alexNet.parameters(), lr=0.00005)
epochs = 30
train_losses, validation_losses = [], []
# 训练
for e in range(epochs):
running_loss = 0
for images,labels in trainloader:
images = images.cuda()
labels = labels.cuda()
# TODO: Training pass
optimizer.zero_grad()
output = alexNet(images)
loss = criterion(output, labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
else:
validation_loss = 0
accuracy = 0
# Turn off gradients for validation, saves memory and computations
with torch.no_grad():
for images, labels in validationloader:
images = images.cuda()
labels = labels.cuda()
log_ps = alexNet(images)
validation_loss += criterion(log_ps, labels)
ps = torch.exp(log_ps)
top_p, top_class = ps.topk(1, dim=1)
equals = top_class == labels.view(*top_class.shape)
accuracy += torch.mean(equals.type(torch.FloatTensor))
train_losses.append(running_loss/len(trainloader))
validation_losses.append(validation_loss/len(validationloader))
torch.save(alexNet.state_dict(), str(e+1+37) +'.pth')
print("Epoch: {}/{}.. ".format( e+1, epochs),
"Training Loss: {:.3f}.. ".format(running_loss/len(trainloader)),
"Test Loss: {:.3f}.. ".format(validation_loss/len(validationloader)),
"Test Accuracy: {:.3f}".format(accuracy/len(validationloader)))
# 画一下精度图
import matplotlib.pyplot as plt
plt.plot(train_losses, label='Training loss')
plt.plot(validation_losses, label='Validation loss')
plt.legend(frameon=False)
最后我用test数据测试了一下训练的模型,在kaggle的得分如下,虽然结果不是特别好,但好歹也还行吧,排名大概在700左右。
预测的代码如下:
import torch
import torch.nn as nn
from torchvision import transforms
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform = transforms.Compose([
transforms.Resize(size=(227,227)),
transforms.ToTensor(),
normalize
])
# AlexNet
class MineAlexNet(nn.Module):
def __init__(self, num_classes=2):
super(MineAlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(64, 192, kernel_size=5, padding=2),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
nn.Conv2d(192, 384, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=3, padding=1),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2),
)
self.avgpool = nn.AdaptiveAvgPool2d((6, 6))
self.classifier = nn.Sequential(
nn.Dropout(),
nn.Linear(256 * 6 * 6, 4096),
nn.ReLU(inplace=True),
nn.Dropout(),
nn.Linear(4096, 4096),
nn.ReLU(inplace=True),
nn.Linear(4096, num_classes),
)
self.logsoftmax = nn.LogSoftmax(dim=1)
def forward(self, x):
x = self.features(x)
x = self.avgpool(x)
x = x.view(x.size(0), 256 * 6 * 6)
x = self.classifier(x)
x = self.logsoftmax(x)
return x
# load pretrained
state_dict = torch.load('30.pth')
alexNet = MineAlexNet(2)
alexNet.load_state_dict(state_dict)
from PIL import Image
import matplotlib.pyplot as plt
import os
f = open('/home/yqs/Desktop/dogs-vs-cats-redux-kernels-edition/submission.csv', 'w')
f.write("id,label \n")
path=r'/home/yqs/Desktop/dogs-vs-cats-redux-kernels-edition/test/'
for root, dirs, files in os.walk(path, topdown=False):
for name in files:
## 打开图片并转成灰度
image = Image.open(os.path.join(root, name))
# 转成tensor
tensor = transform(image)
tensor = tensor.unsqueeze(0)
inputdata = torch.autograd.Variable(tensor,requires_grad=False)
outputdata = alexNet(inputdata)
ps = torch.exp(outputdata)
top_p, top_class = ps.topk(1, dim=1)
print(top_p)
print(top_class)
filename,extension = os.path.splitext(name)
# 输出结果
prob = 0
if top_class.detach().numpy()[0][0]==0:
prob = 1 - top_p.detach().numpy()[0][0]
else:
prob = top_p.detach().numpy()[0][0]
print(prob)
f.write(filename+","+str(prob)+" \n")
f.close()
应为已经有代码了,所以整体写的比较简单,主要目的还是鼓励自己。