第一次做猫狗识别的任务,遇到点问题记录一下:
主要是数据集的问题: 下载完猫狗数据集后,打开有两个文件夹分别是train和test。这两个文件夹中并没有子文件夹。直接将其导入
train_dataset = torchvision.datasets.ImageFolder(root='/home/qqsdqt/桌面/cat/data/train',transform=transform)
出现了问题是:在文件夹中没有找到文件,真是大写的懵逼,明明是有照片的好不啦。
后来查了查torchvision.datasets.ImageFolder 这个函数发现,这个函数应该读取文件夹,也就是说这个函数中的路径下的应该是文件夹,而不是直接是照片。将这个train下的文件分成dog和cat两个子文件夹。也就是说应该数下面这个样子滴!
百度了一下ImageFolder函数的解释
import torch
import torch.nn as nn
import torchvision
from torch.utils.data import DataLoader
from torch.autograd import Variable
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
cfg = {
'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}
class VGG(nn.Module):
def __init__(self, vgg_name):
super(VGG, self).__init__()
self.conv=nn.Sequential(
nn.AvgPool2d(8,7)
)
self.features = self._make_layers(cfg[vgg_name])
self.classifier = nn.Linear(512, 2)
def forward(self, x):
out=self.conv(x)
out = self.features(out)
out = out.view(out.size(0), -1)
out = self.classifier(out)
return out
def _make_layers(self, cfg):
layers = []
in_channels = 3
for x in cfg:
if x == 'M':
layers += [nn.MaxPool2d(kernel_size=2, stride=2)]
else:
layers += [nn.Conv2d(in_channels, x, kernel_size=3, padding=1),
nn.BatchNorm2d(x),
nn.ReLU(inplace=True)]
in_channels = x
layers += [nn.AvgPool2d(kernel_size=1, stride=1)]
return nn.Sequential(*layers)
BARCH_SIZE=512
LR=0.001
EPOCH=20
device= torch.device('cuda' if torch.cuda.is_available() else 'cpu')
normalize = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
transform = transforms.Compose([
transforms.Resize(size=(227, 227)),
transforms.RandomRotation(20),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
normalize
])
train_dataset = torchvision.datasets.ImageFolder(root='/home/zhouchenglong/cat/data/train',transform=transform)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=512, shuffle=True)
validation_dataset =torchvision.datasets. ImageFolder(root='/home/zhouchenglong/cat/data/test',
transform=transform)
test_loader = torch.utils.data.DataLoader(validation_dataset, batch_size=512, shuffle=True)
vggNet = VGG('VGG16').to(device)
criterion=nn.CrossEntropyLoss().to(device)
opti=torch.optim.Adam(vggNet.parameters(),lr=LR)
if __name__=='__main__':
Accuracy_list = []
Loss_list = []
for epoch in range(EPOCH):
sum_loss = 0.0
correct1 = 0
total1 = 0
for i,(images,labels)in enumerate(train_loader):
print(i)
num_images=images.size(0)
images=Variable(images.to(device))
labels=Variable(labels.to(device))
out=vggNet(images)
_, predicted = torch.max(out.data, 1)
total1 += labels.size(0)
print('predict={},labels={}'.format(predicted.size(),labels.size()))
correct1 += (predicted == labels).sum().item()
loss=criterion(out,labels)
sum_loss+=loss.item()
opti.zero_grad()
loss.backward()
opti.step()
if i % 10 == 9:
print('[%d, %d] loss: %.03f'
% (epoch + 1, i + 1, sum_loss / 100))
sum_loss = 0.0
Accuracy_list.append(100.0 * correct1 / total1)
print('accurary={}'.format(100.0 * correct1 / total1))
Loss_list.append(loss.item())
torch.save(vggNet, './alexNet.pth')
x1 = range(0, EPOCH)
x2 = range(0, EPOCH)
y1 = Accuracy_list
y2 = Loss_list
plt.subplot(2, 1, 1)
plt.plot(x1, y1, 'o-')
plt.title('Train accuracy vs. epoches')
plt.ylabel('Train accuracy')
plt.subplot(2, 1, 2)
plt.plot(x2, y2, '.-')
plt.xlabel('Train loss vs. epoches')
plt.ylabel('Train loss')
plt.savefig("accuracy_epoch" + (str)(EPOCH) + ".png")
plt.show()
欢迎大家看我的另一篇博文pytorch alexNet 猫狗识别https://blog.csdn.net/qq_38191717/article/details/97160761