猫狗识别应该是深度学习的一个经典的练手项目,就是给一些猫和狗的图片,让你判断是猫还是狗。
数据集可以从和鲸社区下载,话不多说,直接上代码。
import torch
import torchvision.transforms as transforms
from torch.utils.data import Dataset
from torch import nn,optim
import numpy as np
import os
from PIL import Image
import matplotlib.pyplot as plt
import time
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
transformer=transforms.Compose([transforms.ToTensor()])
'''加载训练数据集'''
DogTrainImageList=os.listdir("/home/Downloads/catsdogs/train/Dog")
CatTrainImageList=os.listdir("/home/Downloads/catsdogs/train/Cat")
train_label=[]
train_data=[]
dog_train_data_dir="/home/Downloads/catsdogs/train/Dog/"
cat_train_data_dir="/home/Downloads/catsdogs/train/Cat/"
for i in range(len(DogTrainImageList)):
train_label.append(1)
dog_img=Image.open(dog_train_data_dir+DogTrainImageList[i]).resize((128,128)).convert('RGB')
temp_data=np.asarray(dog_img)/255
train_data.append(np.swapaxes(temp_data,0,2))
#train_data.append(temp_data)
for i in range(len(CatTrainImageList)):
train_label.append(0)
cat_img=Image.open(cat_train_data_dir+CatTrainImageList[i]).resize((128,128)).convert('RGB')
temp_data=np.asarray(cat_img)/255
train_data.append(np.swapaxes(temp_data,0,2))
#train_data.append(temp_data)
'''加载测试数据集'''
DogTestImageList=os.listdir("/home/Downloads/catsdogs/test/Dog")
CatTestImageList=os.listdir("/home/Downloads/catsdogs/test/Cat")
test_label=[]
test_data=[]
dog_test_data_dir="/home/Downloads/catsdogs/test/Dog/"
cat_test_data_dir="/home/Downloads/catsdogs/test/Cat/"
for i in range(len(DogTestImageList)):
test_label.append(1)
dog_img=Image.open(dog_test_data_dir+DogTestImageList[i]).resize((128,128)).convert('RGB')#有的图片是单通道的,因此需要显式转换
temp_data=np.asarray(dog_img)/255
test_data.append(np.swapaxes(temp_data,0,2))
#test_data.append(temp_data)
for i in range(len(CatTestImageList)):
test_label.append(0)
cat_img=Image.open(cat_test_data_dir+CatTestImageList[i]).resize((128,128)).convert('RGB')
temp_data=np.asarray(cat_img)/255
test_data.append(np.swapaxes(temp_data,0,2))
#test_data.append(temp_data)
class DealDataset(Dataset):
def __init__(self,data,label,transform=None):
self.data=data
self.label=label
self.transform=transform
def __getitem__(self,index):
data,label=self.data[index],int(self.label[index])
return data,label
def __len__(self):
return len(self.data)
TrainDataSet=DealDataset(train_data,train_label,transform=transformer)
TestDataSet=DealDataset(test_data,test_label,transform=transformer)
print(TrainDataSet[0][0].shape)
class AlexNet(nn.Module):
def __init__(self):
super(AlexNet, self).__init__()
self.conv = nn.Sequential(
nn.Conv2d(3, 96, 11, 4), # in_channels, out_channels, kernel_size, stride
nn.ReLU(),
nn.MaxPool2d(3, 2), # kernel_size, stride
nn.BatchNorm2d(96),
# 减小卷积窗口,使用填充为2来使得输入与输出的高和宽一致,且增大输出通道数
nn.Conv2d(96, 256, 5, 1, 2),
nn.ReLU(),
nn.MaxPool2d(3, 2),
nn.BatchNorm2d(256),
# 连续3个卷积层,且使用更小的卷积窗口。除了最后的卷积层外,进一步增大了输出通道数。
# 前两个卷积层后不使用池化层来减小输入的高和宽
nn.Conv2d(256, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 384, 3, 1, 1),
nn.ReLU(),
nn.Conv2d(384, 256, 3, 1, 1),
nn.ReLU(),
nn.MaxPool2d(3, 2),
nn.BatchNorm2d(256)
)
# 这里全连接层的输出个数比LeNet中的大数倍。使用丢弃层来缓解过拟合
self.fc = nn.Sequential(
nn.Linear(1024, 4096),
nn.ReLU(),
nn.Dropout(0.5),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.BatchNorm1d(4096),
nn.Dropout(0.5),
nn.Linear(4096, 2),
nn.ReLU()
)
def forward(self, img):
feature = self.conv(img)
output = self.fc(feature.view(img.shape[0], -1))
return output
train_iter = torch.utils.data.DataLoader(TrainDataSet, batch_size=10, shuffle=True, num_workers=4)
test_iter = torch.utils.data.DataLoader(TestDataSet, batch_size=10, shuffle=False, num_workers=4)
def evaluate_accuracy(data_iter, net, device=None):
if device is None and isinstance(net, torch.nn.Module):
# 如果没指定device就使用net的device
device = list(net.parameters())[0].device
acc_sum, n = 0.0, 0
with torch.no_grad():
for X, y in data_iter:
if isinstance(net, torch.nn.Module):
net.eval() # 评估模式, 这会关闭dropout
acc_sum += (net(X.float().to(device)).argmax(dim=1) == y.float().to(device)).float().sum().cpu().item()
net.train() # 改回训练模式
else:
if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
# 将is_training设置成False
acc_sum += (net(X.float(), is_training=False).argmax(dim=1) == y.float()).float().sum().item()
else:
acc_sum += (net(X.float()).argmax(dim=1) == y.float()).float().sum().item()
n += y.shape[0]
return acc_sum / n
def train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs):
net = net.to(device)
print("training on ", device)
loss = torch.nn.CrossEntropyLoss()
for epoch in range(num_epochs):
train_l_sum, train_acc_sum, n, batch_count, start = 0.0, 0.0, 0, 0, time.time()
for X, y in train_iter:
X = X.float().to(device)
y = y.float().to(device)
y_hat = net(X)
#print(y)
l = loss(y_hat, y.long())#必须用y.long()而不是y
optimizer.zero_grad()
l.backward()
optimizer.step()#更新梯度
train_l_sum += l.cpu().item()
train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
#print(y_hat.argmax(dim=1))
n += y.shape[0]
batch_count += 1
test_acc = evaluate_accuracy(test_iter, net)
print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
% (epoch + 1, train_l_sum / batch_count, train_acc_sum / n, test_acc, time.time() - start))
net=AlexNet()
batch_size=10
lr, num_epochs = 0.003, 40
optimizer = torch.optim.SGD(net.parameters(), lr=lr)
train_ch5(net, train_iter, test_iter, batch_size, optimizer, device, num_epochs)
这里借用了AlexNet模型。有一点需要注意,一定要在模型中添加BatchNormal层,不然测试结果会一直是0.5(在这个坑里呆了很久)。
使用代码中的batch size,学习率,epochs,最终应该能达到0.74的准确率,也不是很高,读者可以调试一下这些参数,看看能不能提高了。
本人也不是很精通深度学习,有不完善的地方欢迎各位提出。