比赛官方链接:CIFAR-10 - Object Recognition in Images | Kaggle
数据集:CIFAR-10 数据由 10 个类别的 60,000 个 32x32 彩色图像组成,每个类别包含 6000 个图像。官方数据中有50,000张训练图像和10,000张测试图像。Kaggle官方在测试集中放了290000张垃圾图像。类别:[airplane, automobile, bird, cat, deer, dog, frog, horse, ship,truck]
代码细节:
首先导入各种包
import torch
import numpy as np
import pandas as pd
import os
import torchvision
from torchvision import transforms,models
import copy
import time
import matplotlib.pyplot as plt
import torch.nn as nn
import torch.nn.functional as F
from torchvision.datasets import CIFAR10
import torchvision.transforms as transforms
import cv2
%matplotlib inline
定义一个可视化图像函数先看一下图像:
#定义可视化图像函数
def look_img(img):
#opencv读入图像格式为BGR,
#matplotlib可视化格式格式为RGB
#因此需要将BGR转化为RGB
img_RGB=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
plt.imshow(img_RGB)
plt.show()
img1=cv2.imread("E:/wodezhuomian/competitions/kaggle/cifar-10/train/train/1.png")
look_img(img1)
img1.shape
图像形状:(32,32,3)
输出图像:
查看训练集的标签:
img_labels=pd.read_csv("E:/wodezhuomian/competitions/kaggle/cifar-10/trainLabels.csv")
img_labels
输出:
下载数据集,这里选择从torchvision中下载。transform负责对图片进行调整。
transform=transforms.Compose([
transforms.ToTensor(),
transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))
])
train_dataset=torchvision.datasets.CIFAR10(root='.',
train=True,
transform=transform,
download=True)
test_dataset=torchvision.datasets.CIFAR10(root='.',
train=False,
transform=transform,)
接下来对标签进行编码
#这两个文件在上面已经下载
data_batch_1=pd.read_pickle(r'./cifar-10-batches-py/data_batch_1')
meta_data=pd.read_pickle(r'./cifar-10-batches-py/batches.meta')
#set()创建一个集合,里面没有重复的元素
one_hot_labels=set(data_batch_1['labels'])
#拿出类别名称
labels_name = meta_data['label_names']
#创建一个字典,编码和名称对应
label_dict={k:v for k,v in zip(one_hot_labels,labels_name)}
label_dict
输出:
接下来,创建DataLoader。先学习一下PyTorch的数据读取机制(DataLoad和Dtaset)。
一个机器学习项目的步骤应该包括:数据处理==>模型建立==>损失函数==>优化器==>训练==>测试。数据处理是项目的开始,而数据读取是数据处理中的重要步骤。关于数据读取的学习参考以下:http://t.csdn.cn/MY6yFhttp://t.csdn.cn/MY6yF
#creat DataLoader
batch_size=64
train_loader=torch.utils.data.DataLoader(
dataset=train_dataset,
batch_size=batch_size,
shuffle=True
)
test_loader=torch.utils.data.DataLoader(
dataset=test_dataset,
batch_size=batch_size,
shuffle=True
)
模型建立:ResNet9
#resnet9
class BasicResidualBlock(nn.Module):
def __init__(self):
super(BasicResidualBlock,self).__init__()
self.conv1=nn.Conv2d(in_channels=3,
out_channels=3,
kernel_size=3,
padding=1,
stride=1)
self.relu1=nn.ReLU()
self.conv2=nn.Conv2d(in_channels=3,
out_channels=3,
kernel_size=3,
stride=1,
padding=1)
self.relu2=nn.ReLU()
def forward(self,x):
residual=x
out=self.conv1(x)
out=self.relu1(out)
out=self.conv2(out)
out=self.relu2(out)
out+=residual
return out
def conv_block(in_channels,out_channels,pool=False):
layers=[nn.Conv2d(in_channels,out_channels,kernel_size=3,padding=1),
nn.BatchNorm2d(out_channels),
nn.ReLU(inplace=True)
]
if pool:
layers.append(nn.MaxPool2d(2))
return nn.Sequential(*layers)
class ResNet9(nn.Module):
def __init__(self,in_channels,n_classes):
super(ResNet9,self).__init__()
self.conv1=conv_block(in_channels,64)
self.conv2=conv_block(64,128,pool=True)
self.res1=nn.Sequential(conv_block(128,128),conv_block(128,128))
self.conv3=conv_block(128,256,pool=True)
self.conv4=conv_block(256,512,pool=True)
self.res2=nn.Sequential(conv_block(512,512),conv_block(512,512))
self.classifier=nn.Sequential(nn.MaxPool2d(4),
nn.Flatten(),
nn.Linear(512,128),
nn.Linear(128,n_classes)
)
def forward(self,x):
out=self.conv1(x)
out=self.conv2(out)
out=self.res1(out)+out
out=self.conv3(out)
out=self.conv4(out)
out=self.res2(out)+out
out=self.classifier(out)
return out
预测函数:
def prediction(data_loader,model,criterion,cuda=cuda):
correct=0
total=0
losses=0
for i ,(images,labels) in enumerate(data_loader):
if cuda is not None:
images=images.cuda()
labels=labels.cuda()
#image = image.to(device)
outputs=model(images)
loss=criterion(outputs,labels)
loss.to(device)
_,predictions=torch.max(outputs,dim=1)
correct+=torch.sum(labels==predictions).item()
total+=labels.shape[0]
losses += loss.data.item()
return losses/len(list(data_loader)),1-correct/total
训练函数:
def get_lr(optimizer):
for param_group in optimizer.param_groups:
return param_group['lr']
from tqdm.notebook import tqdm as tqdm
def fit(epochs,max_lr,model,train_loader,val_loader,
criterion,weight_deacy=0,grad_clip=None,opt_func=torch.optim.SGD,
cuda=None):
torch.cuda.empty_cache() # 释放内存
history=[]
#设置具有权重衰减的自定义优化器
optimizer=opt_func(model.parameters(),max_lr,weight_decay=weight_decay)
#设置一周期学习率调度器
sched=torch.optim.lr_scheduler.OneCycleLR(optimizer,max_lr,
epochs=epochs,
steps_per_epoch=len(train_loader))
if cuda is not None:
model.cuda()
train_losses=[]
test_losses=[]
train_error_rates=[]
test_error_rates=[]
#loop=tqdm(enumerate(train_loader),total=len(train_loader))
#循环遍历所有epochs
for epoch in range(epochs):
train_loss=0
n_iter=0
total=0
correct=0
lrs=[]
#训练阶段
with tqdm(enumerate(train_loader),total=len(train_loader),desc=f'Epoch{epoch+1}/{epochs}',unit='it') as tbar:
for i ,(images,labels) in tbar:
#在计算其他梯度之前将梯度归零
optimizer.zero_grad()
if cuda is not None:
images=images.cuda()
labels=labels.cuda()
outputs=model(images)
_,predictions=torch.max(outputs,1)
correct+=torch.sum(labels==predictions).item()
total+=labels.shape[0]
loss=criterion(outputs,labels)
#反向传播
loss.backward()
#梯度裁剪
if grad_clip is not None:
nn.utils.clip_grad_value_(model.parameters(),grad_clip)
#权重更新
optimizer.step()
#lr更新
lrs.append(get_lr(optimizer))
sched.step()
train_loss+=loss.detach().item()
n_iter+=1
train_losses.append(train_loss/n_iter)
#test_losses.append(test_loss)
tbar.set_postfix(train_loss=train_loss/n_iter,lr=lrs[-1])
tbar.update()
train_error_rate=1-correct/total
#验证阶段
with torch.no_grad():
test_loss,test_error_rate=prediction(val_loader,
model,criterion,
cuda)
train_error_rates.append(train_error_rate)
test_error_rates.append(test_error_rate)
train_losses.append(train_loss/n_iter)
test_losses.append(test_loss)
results={'train_loss':train_loss/n_iter,'val_loss':test_loss,'val_acc':(1-train_error_rate)*100}
results['lrs']=lrs
history.append(results)
#更新信息
#loop.set_description(f'Epoch[{epoch}/{epochs}]')
if epoch%1==0:
print('Epoch:{}/{},last_lr:{:.5f},train_loss:{:.4f},val_loss:{:.4f},val_accuracy:{:.1f}%'.format(epoch+1,epochs,results['lrs'][-1],
train_loss/n_iter,test_loss,(1-train_error_rate)*100))
return history
#优化器,损失函数
epochs=20
max_lr=0.01
grad_clip=0.1
weight_decay=1e-4
opt_func=torch.optim.Adam
criterion=nn.CrossEntropyLoss()
训练:
history_resnet=fit(epochs,max_lr,resnet_model,
train_loader,test_loader,criterion,weight_decay,
opt_func=opt_func,cuda=cuda)
输出预测结果:
test_dir = "E:/wodezhuomian/competitions/kaggle/cifar-10/test/"
#test_dir = "../data/hotdog/test"
test_set = torchvision.datasets.ImageFolder(test_dir, transform=transform)
batch_size = 64
test_iter = DataLoader(test_set, batch_size=batch_size)
dataiter = iter(test_iter)
images, labels = dataiter.next()
resnet=ResNet9(in_channels=3,n_classes=10)
resnet.load_state_dict(torch.load(PATH_RESNET))
preds=[]
for X,_ in tqdm(test_iter):
y=resnet(X)
preds.extend(y.argmax(dim=1).type(torch.int32).cpu().numpy())
sorted=list(range(1,300001))
sorted.sort(key=lambda x: str(x))
df=pd.DataFrame({'id': sorted,'label':preds})
df['label']=df['label'].map(label_dict)
df=df.sort_values('id')
df.to_csv('submission.csv',index=0)