Cifar-10是一个常用的彩色图片数据集,它有10个类别的图片,分别是airplane、automobile、bird、cat、deer、dog、frog、horse、ship以及truck。每张图片是33232,即3通道彩色图片,分辨率为32*32。
导入程序包:
import torchvision as tv
import torch as t
import torch.utils.data as data
import torchvision.transforms as transforms
from torchvision.transforms import ToPILImage
#将Tensor转成Image,方便可视化
show = ToPILImage()
数据的预处理:
#对数据的预处理
transform = transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
#训练集
trainset = tv.datasets.CIFAR10(root='/data/',train=True,download=True,transform=transform)
trainloader = data.DataLoader(trainset,batch_size=4,shuffle=True,num_workers=2)
#测试集
testset = tv.datasets.CIFAR10('/data/',train=False,download=True,transform=transform)
testloaders = data.DataLoader(testset,batch_size=4,shuffle=False,num_workers=2)
classes = ('plane','car','bird','cat','deer','dog','frog','horse','ship','truck')
(data,label) = trainset[100]
print(classes[label])
#(data+1)/2是为了还原被归一化的数据
show((data+1)/2).resize((100,100))
结果如下:
Dataloader是一个可迭代的对象,它将dataset返回的每一条数据样本拼接成一个batch,并提供多线程加速优化和数据打乱等操作。
dataiter = iter(trainloader)
images,labels = dataiter.next()
print(' '.join('%11s'%classes[labels[j]] for j in range(4)))
show(tv.utils.make_grid((images+1)/2)).resize((400,100))
#定义网络
import torch.nn as nn
import torch.nn.functional as F
class Net(nn.Module):
def __init__(self):
super(Net,self).__init__()
self.conv1 = nn.Conv2d(3,6,5)
self.conv2 = nn.Conv2d(6,16,5)
self.fc1 = nn.Linear(16*5*5,120)
self.fc2 = nn.Linear(120,84)
self.fc3 = nn.Linear(84,10)
def forward(self,x):
x = F.max_pool2d(F.relu(self.conv1(x)),(2,2))
x = F.max_pool2d(F.relu(self.conv2(x)),2)
x = x.view(x.size()[0],-1)
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
net = Net()
print(net)
#定义损失函数和优化器
from torch import optim
#交叉熵损失函数
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(),lr=0.001,momentum=0.9)
#训练网络
from torch.autograd import Variable
#遍历完一遍数据称为一个epoch
for epoch in range(20):
running_loss = 0.0
for i,data in enumerate(trainloader,0):
inputs,labels = data
inputs,labels = Variable(inputs),Variable(labels)
#梯度清零
optimizer.zero_grad()
#前向传播+反向传播
outputs = net(inputs)
loss = criterion(outputs,labels)
loss.backward()
optimizer.step()
running_loss += loss.item()
if i%2000 == 1999:
print('[%d,%5d] loss:%.3f'%(epoch+1,i+1,running_loss/2000))
running_loss = 0.0
print('Finishing Training')
将测试图片输入网络,计算它的label,然后与实际的label进行比较
dataiter = iter(testloader)
images,labels = dataiter.next()
print("实际的label:",''.join('%08s'%classes[labels[j]] for j in range(4)))
show(tv.utils.make_grid(images/2-0.5)).resize((400,100))
# 计算图片在每个类别上的分数
outputs = net(Variable(images))
_,predicted = t.max(outputs.data,1)
print('预测结果:',' '.join('%5s'%classes[predicted[j]] for j in range(4)))
在整个测试集上的效果
correct = 0
total = 0
for data in testloader:
images,labels = data
outputs = net(Variable(images))
_,predicted = t.max(outputs,1)
total+=labels.size(0)
correct += (predicted == labels).sum()
print('10000张测试集中的准确率为:%d %%'%(100*correct/total))