第一次做猫狗识别的任务,遇到点问题记录一下:
主要是数据集的问题: 下载完猫狗数据集后,打开有两个文件夹分别是train和test。这两个文件夹中并没有子文件夹。直接将其导入
train_dataset = torchvision.datasets.ImageFolder(root='/home/qqsdqt/桌面/cat/data/train',transform=transform)
出现了问题是:在文件夹中没有找到文件,真是大写的懵逼,明明是有照片的好不啦。
后来查了查torchvision.datasets.ImageFolder 这个函数发现,这个函数应该读取文件夹,也就是说这个函数中的路径下的应该是文件夹,而不是直接是照片。将这个train下的文件分成dog和cat两个子文件夹。也就是说应该数下面这个样子滴!
百度了一下ImageFolder函数的解释
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader
from torch.autograd import Variable
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
class AlexNet(nn.Module):
def __init__(self,num_classes=10):
super(AlexNet,self).__init__()
self.feature_extraction = nn.Sequential(
nn.Conv2d(in_channels=3,out_channels=96,kernel_size=11,stride=4,padding=2,bias=False),
nn.BatchNorm2d(96),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
nn.Conv2d(in_channels=96,out_channels=192,kernel_size=5,stride=1,padding=2,bias=False),
nn.BatchNorm2d(192),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=3,stride=2,padding=0),
nn.Conv2d(in_channels=192,out_channels=384,kernel_size=3,stride=1,padding=1,bias=False),
nn.BatchNorm2d(384),
nn.ReLU(True),
nn.Conv2d(in_channels=384,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.Conv2d(in_channels=256,out_channels=256,kernel_size=3,stride=1,padding=1,bias=False),
nn.BatchNorm2d(256),
nn.ReLU(True),
nn.MaxPool2d(kernel_size=3, stride=2, padding=0),
)
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(in_features=256*6*6,out_features=4096),
nn.ReLU(True),
nn.Dropout(p=0.5),
nn.Linear(in_features=4096, out_features=4096),
nn.ReLU(True),
nn.Linear(in_features=4096, out_features=num_classes),
)
def forward(self,x):
x = self.feature_extraction(x)
x = x.view(x.size(0),256*6*6)
x = self.classifier(x)
return x
BARCH_SIZE=512
LR=0.001
EPOCH=20
device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform = transforms.Compose([
transforms.Resize(size=(227, 227)),
transforms.RandomRotation(20),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(), # 将图片转换为Tensor,归一化至[0,1]
normalize
])
# 从文件夹中读取训练数据
train_dataset = torchvision.datasets.ImageFolder(root='/home/qqsdqt/桌面/cat/data/train',transform=transform)
print(train_dataset.class_to_idx)
print(train_dataset.imgs)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=512, shuffle=True)
# 从文件夹中读取validation数据
validation_dataset =torchvision.datasets. ImageFolder(root='/home/qqsdqt/桌面/cat/data/test',
transform=transform)
print(validation_dataset.class_to_idx)
test_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=512, shuffle=True)
alexNet=AlexNet(10).to(device)
criterion=nn.CrossEntropyLoss()
opti=torch.optim.Adam(alexNet.parameters(),lr=LR)
if __name__ =='__main__':
Accuracy_list = []
Loss_list = []
for epoch in range(EPOCH):
sum_loss = 0.0
correct1 = 0
total1 = 0
for i,(images,labels) in enumerate(train_loader):
num_images=images.size(0)
images=Variable(images.to(device))
labels=Variable(labels.to(device))
out=alexNet(images)
_, predicted = torch.max(out.data, 1)
total1 += labels.size(0)
correct1 += (predicted == labels).sum().item()
loss=criterion(out,labels)
print(loss)
opti.zero_grad()
loss.backward()
opti.step()
# 每训练100个batch打印一次平均loss
sum_loss += loss.item()
if i % 10 == 9:
print('[%d, %d] loss: %.03f'
% (epoch + 1, i + 1, sum_loss / 100))
sum_loss = 0.0
Accuracy_list.append(100.0*correct1 / total1)
print('accurary={}'.format(100.0*correct1 / total1))
Loss_list.append(loss.item())
torch.save(alexNet,'./alexNet.pth')
x1 = range(0, EPOCH)
x2 = range(0, EPOCH)
y1 = Accuracy_list
y2 = Loss_list
plt.subplot(2, 1, 1)
plt.plot(x1, y1, 'o-')
plt.title('Train accuracy vs. epoches')
plt.ylabel('Train accuracy')
plt.subplot(2, 1, 2)
plt.plot(x2, y2, '.-')
plt.xlabel('Train loss vs. epoches')
plt.ylabel('Train loss')
plt.savefig("accuracy_epoch" + (str)(EPOCH) + ".png")
plt.show()