记录学习pytorch的过程,从分类任务做起,就从最常见的cifar10下手,数据可在kaggle下载,具体步骤和代码请参考本文余下内容。在cifar10上能有98%的准确率
1、文件件代码组织目录如下所示:
.
├── data
│ ├── class2idx.json
│ ├── test
│ ├── train
│ │ ├── airplane
│ │ ├── automobile
│ │ ├── bird
│ │ ├── cat
│ │ ├── deer
│ │ ├── dog
│ │ ├── frog
│ │ ├── horse
│ │ ├── ship
│ │ └── truck
│ └── trainLabels.csv
├── log
├── models
│ ├── best_model
│ └── last_model
├── sampleSubmission.csv
└── scripts
├── densenet.py
├── __init__.py
├── predict.py
├── preprocess.py
├── result.csv
└── train_model.py
16 directories, 13 files
2、解压并依据trainLabels.csv,将图片划分到10个文件夹下,方便后面步骤调用ImageFolder进行读取数据,preprocess.py如下所示。
import os
import shutil
from tqdm import tqdm
train_data = '../data/train'
train_labels = '../data/trainLabels.csv'
def split_data(labels):
with open(labels) as f_labels:
for line in tqdm(f_labels.readlines()):
line = line.strip()
if 'id' in line:
continue
im_name,im_cls = line.split(',')
dst_path = os.path.join(train_data,im_cls)
if not os.path.exists(dst_path):
os.makedirs(dst_path)
shutil.move(os.path.join(train_data,im_name+'.png'),dst_path)
3、数据处理完成之后,写训练网络的代码train_model.py,如下所示。未完成多卡训练的功能,使用多卡训练一个epoch后悔报错,因此暂且放弃。代码不够优雅,等有空再去整理。函数参数中只需要一个dataloader就可以,代码中的两个是想做mixup加上去,代码未完成。
import torch
import torchvision
from torchvision import transforms,datasets,models
import os
import numpy as np
import time
import json
from torch import nn,optim
from torch.autograd import Variable
from tqdm import tqdm
def train_model(model, criterion, optimizer, data_loaders1,data_loaders2, num_images,cuda_device=False,finetune=None, num_epochs=25,CUDA_ID=0):
'''
:param model: model for training
:param criterion: loss function
:param optimizer:
:param data_loaders:
:param num_images: total numbers of training data
:param cuda_device: True using cuda, False using cpu
:param finetune:None for scratch, best finetune from best_model, last from last_model
:param num_epochs: max iteration number
:return:
'''
since = time.time()
best_acc = 0.0
begin_epoch = 0
#save training log
if not os.path.exists('../log'):
os.makedirs('../log')
now_time = time.strftime('%Y_%m_%d_%H_%M_%S',time.localtime(time.time()))
with open('../log/'+str(now_time)+'.txt','w') as f_log:
if finetune:
txt_path = '../models/{}_epoch.txt'.format(finetune)
model_path = '../models/{}_model'.format(finetune)
if os.path.exists(txt_path):
with open(txt_path,'r') as f_epoch:
begin_epoch,best_acc = f_epoch.read().strip().split(',')
begin_epoch,best_acc = int(float(begin_epoch)),float(best_acc)
if not os.path.exists(model_path):
print("Cannot find {} !!!".format(model_path))
print('Train from scratch...')
else:
model.load_state_dict(torch.load(model_path))
print('Finetuning ....')
else:
print('Train from scratch...')
for epoch in range(begin_epoch,num_epochs):
model.train()
if cuda_device:
model= model.cuda(CUDA_ID)
running_loss = 0.0
running_acc = 0.0
for (inputs, labels) in tqdm(data_loaders1):
if cuda_device:
inputs = inputs.cuda(CUDA_ID)
labels = labels.cuda(CUDA_ID)
inputs,labels = Variable(inputs),Variable(labels)
optimizer.zero_grad()
outputs = model(inputs)
_, preds = torch.max(outputs, 1)
loss = criterion(outputs,labels)
loss.backward()
optimizer.step()
if cuda_device:
running_loss += float(loss.data.cpu().numpy())
else:
running_loss += float(loss.data.numpy())
running_acc += torch.sum(preds == labels)
epoch_loss = running_loss / num_images
if cuda_device:
epoch_acc = running_acc.double().cpu().numpy() / num_images
else:
epoch_acc = running_acc.double().numpy() / num_images
logs = 'Epoach at {}/{}, Train loss: {}, Acc: {}'.format(str(epoch),
str(num_epochs),
str(epoch_loss),
str(epoch_acc))
print(logs)
f_log.write(logs)
#save best and last model
if epoch_acc > best_acc:
best_acc = epoch_acc
torch.save(model.state_dict(),'../models/best_model')
with open('../models/best_epoch.txt','w') as f_best_epoch:
f_best_epoch.write(str(epoch)+','+str(best_acc))
torch.save(model,'../models/last_model')
with open('../models/last_epoch.txt', 'w') as f_best_epoch:
f_best_epoch.write(str(epoch)+','+str(epoch_acc))
now = time.time() - since
print('Epoch {}/{}, time cost: {}s'.format(str(epoch),str(num_epochs),str(now)))
4、调用torchvision中的densenet做训练,具体代码为densenet.py,,如下所示。时间比较紧迫,未完成可以带参数运行的功能,待有空再增加。
#-*- encoding:utf-8 -*-
import torch
import torchvision
from torchvision import transforms,datasets,models
import os
import matplotlib.pyplot as plt
import numpy as np
import time
import copy
import json
from torch import nn,optim
from train_model import train_model
#transforms for trainning
data_transforms = {
'train': transforms.Compose([
transforms.Resize(256),
transforms.RandomCrop(224),
transforms.RandomHorizontalFlip(),
transforms.RandomRotation(5),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
# Cutout(n_holes=1, length=8)
]),
}
data_dir = u'../data/train' # train data
class2idx = u'../data/class2idx.json'
BATCH_SIZE = 48
train_datas = datasets.ImageFolder(root=data_dir,transform=data_transforms['train'])
class_names = train_datas.classes
num_classes = len(train_datas.class_to_idx)
num_images = len(train_datas)
#overview datasets
print(class_names)
print('num of images: {}'.format(str(num_images)))
print('num of classes: {}'.format(str(num_classes)))
#save class to idx
if not os.path.exists(class2idx):
with open(class2idx,'w') as f_json:
f_json.write(json.dumps(train_datas.class_to_idx))
data_loaders1 = torch.utils.data.DataLoader(train_datas,batch_size=BATCH_SIZE,shuffle=True)
data_loaders2 = torch.utils.data.DataLoader(train_datas,batch_size=BATCH_SIZE,shuffle=True)
#model for out datasets
# net = models.resnet152(pretrained=True,)
# num_in_features = net.fc.in_features
# net.fc = nn.Linear(num_in_features,num_classes)
net = models.densenet201(pretrained=True) #using densenet169 for training our datasets
num_in_features = net.classifier.in_features #fit output for our datasets
net.classifier = nn.Linear(num_in_features, num_classes)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(net.parameters(),lr=1e-5)
train_model(model=net,
criterion=criterion,
optimizer=optimizer,
data_loaders1=data_loaders1,
data_loaders2=data_loaders2,
num_images=num_images,
cuda_device=True,
finetune='best',
num_epochs=100)
5、最后一步就是进行预测了,predict.py代码如下。需要复写Dataset的实现方式,让前向推理过程支持batch操作,增加预测的速度。
import torch
import torch.nn as nn
import os
import cv2
import json
from tqdm import tqdm
from PIL import Image
from torchvision import models,transforms
from torch.utils import data
class Dataset(data.Dataset):
def __init__(self,image_path):
self.image_path = image_path
self.images_list = self.get_images(self.image_path)
self.data_transforms ={
'val': transforms.Compose([
transforms.Scale(256),
transforms.RandomCrop(224),
# transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])}
def __getitem__(self, index):
image_name = self.images_list[index]
input_image = Image.open(os.path.join(self.image_path,image_name))
input_tensor = self.data_transforms['val'](input_image)
# input_tensor = input_tensor.unsqueeze(0)
return input_tensor,image_name.replace('.png','')
def __len__(self):
return len(self.images_list)
def get_images(self,image_path):
return [i for i in os.listdir(image_path)]
test_path = '../data/test'
test_dataset = Dataset(test_path)
#load class to dict
with open(u'../data/class2idx.json') as f_json:
class_to_idx = json.load(f_json)
print(type(class_to_idx))
print(class_to_idx)
idx_to_class = {v: k for k, v in class_to_idx.items()}
num_classes = len(idx_to_class)
net = models.densenet201(pretrained=False) # model for our datasets
num_in_features = net.classifier.in_features
net.classifier = nn.Linear(num_in_features, num_classes) # fit densenet for our datasets
# net = models.resnet152(pretrained=False)
# num_in_features = net.fc.in_features
# net.fc = nn.Linear(num_in_features,num_classes)
USING_GPU = True
GPU_ID = 1
if not USING_GPU:
# cup model
net.load_state_dict(torch.load('../models/best_model',map_location='cpu'))
else:
# gpu model
net.load_state_dict(torch.load('../models/best_model'))
net.cuda(GPU_ID)
net = net.eval()
BATCH_SIZE = 16
test_dataloader = torch.utils.data.DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
shuffle=False)
with open('result.csv','w') as f_csv:
f_csv.write('id,label\n')
for inputs, names in tqdm(test_dataloader):
if USING_GPU:
inputs = inputs.cuda(GPU_ID)
outputs = net(inputs)
_, preds = torch.max(outputs, 1)
for name,pred in zip(names,preds):
f_csv.write('{},{}\n'.format(str(name),str(idx_to_class[int(pred.cpu().numpy())])))
6、从头到尾折腾一遍,思路清晰很多,对框架的使用也有进一步的了解。暂时没空上传到github,等有空再更新。