之前做过一些简单的深度学习项目,在我看来主要是一些调包工程师的工作,应用现有的模型对一些项目进行训练。初入kaggle,打算以最简单的项目为切入点,提升自己的姿势水平。
环境:autokeras 0.4.0;
torch 1.3.1;
cuda10.0;
cudnn 7.5.1;
gpu rtx2070
本文记录了这一项目进行的主要逻辑流程,主要步骤如下:
1)数据预处理,比较简单,reshape并保存为numpy的格式存储
2)使用autokeras进行模型的初筛,通过短时间的预训练搜索出较为合适的模型
3)用pytorch加载现有的预训练模型,进行进一步的训练
4)对测试数据进行预测生成csv文件,上传
代码上传至:
github/dogs_vs_cats
一、数据预训练
训练数据:将数据中的图片和标签信息储存为.npy格式,避免每次加载图像占用大量时间;数据名字中包含了标签信息,cat记为0,dog记为1。
测试数据:将数据按照标签进行排序(1-12500),按照标签顺序进行图片的加载,存储为npy格式。
import os
import cv2
import numpy as np
image_size = (64,64)
#your path of the datasets
data_dir = '/mnt/HDD/Datasets/kaggle/dogs-vs-cats/'
train_dir = data_dir + "train/"
test_dir = data_dir + 'test1/'
test_list = os.listdir(test_dir)
train_list = os.listdir(train_dir)
train_whole_images = []
train_whole_labels = []
test_whole_images = []
test_whole_ids = []
# sort the test list by the ids
for ind,name in enumerate(test_list):
test_whole_ids.append(int(name.split('.')[0]))
test_whole_ids = sorted(test_whole_ids)
test_list = [str(id_int)+'.jpg' for id_int in test_whole_ids]
for ind,name in enumerate(test_list):
test_whole_images.append(cv2.resize(cv2.imread(test_dir+name),image_size))
for ind,name in enumerate(train_list):
print(ind)
train_whole_images.append(cv2.resize(cv2.imread(train_dir+name),image_size))
if 'cat' in name:
train_whole_labels.append(0)
else:
train_whole_labels.append(1)
# restore the datasets in numpy file
np.save('test_whole_images_'+str(image_size[0])+'.npy',np.asarray(test_whole_images))
np.save('train_whole_images_'+str(image_size[0])+'.npy',np.asarray(train_whole_images))
np.save('train_whole_labels_'+str(image_size[0])+'.npy',np.asarray(train_whole_labels))
# reload the dataset stored in numpy file
#test_whole_images = np.load('test_whole_images_'+str(image_size[0])+'.npy')
#test_whole_labels = np.load('test_whole_labels_'+str(image_size[0])+'.npy')
二、Autokeras模型初筛
目前autokeras官网有两个版本,0.4.0和1.0。根据实际应用,发现1.0版本的文档很少,在模型导出和进一步用其他框架(TF、pytorch)等的加载、训练中无法实现,因此选择了040版本。
040版本中的各种网络搜索的参数,比如backend选择、最大搜索次数、模型最大规模等可以在Autokeras安装路径下的constant.py文件中进行更改,之后加载已经生成的训练npy文件,进行模型筛选。
import os
import os
import cv2
import numpy as np
image_size = (64,64)
#load numpy data file
train_images_npy = 'train_whole_images_'+str(image_size[0])+'.npy'
train_labels_npy = 'train_whole_labels_'+str(image_size[0])+'.npy'
train_whole_images = np.load(train_images_npy)
train_whole_labels = np.load(train_labels_npy)
import autokeras as ak
import torch
import torchvision
classifier = ak.ImageClassifier(verbose = True, path = 'autokeras_temp_train')
classifier.fit(x=np.asarray(train_whole_images, dtype = np.uint8), y=np.asarray(train_whole_labels,dtype =np.float16),time_limit = 60*50*1)
# the file name to store the model both structure and parameters
MODEL_DIR = 'model.h5'
# different methods to save the model, some of which may differ from different vision.
# 1)
#classifier.export_keras_model(MODEL_DIR)
# 2)
#from autokeras.utils import pickle_to_file,pickle_from_file
#pickle_to_file(classifier,MODEL_DIR)
# 3)
torch.save(classifier.cnn.best_model.produce_model(),MODEL_DIR)
model = torch.load(MODEL_DIR)
model.eval()
with torch.no_grad():
right = 0
cat = 0
dog = 0
# the number of samples chosen to test the model
test_num = 100
rand_ind = np.random.randint(0,len(train_whole_images)-1,test_num)
predict = model(torch.Tensor(train_whole_images[rand_ind])).cpu()
label = train_whole_labels[rand_ind]
print(torch.argmax(predict,dim=1))
print('label:', label)
print(torch.argmax(predict, dim=1).numpy()==label)
dog = np.sum(label)
cat = 100 - np.sum(label)
right = np.sum(torch.argmax(predict, dim=1).numpy()==label)
print('accuracy:', float(right)/100)
print('cat:', cat)
print('dog:', dog)
筛选得到的模型如下:
(origin_model): TorchModel(
(0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(3): ReLU()
(4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(6): ReLU()
(7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(8): ReLU()
(9): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
(10): TorchAdd()
(11): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(12): ReLU()
(13): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(15): ReLU()
(16): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(17): ReLU()
(18): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
(19): TorchAdd()
(20): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(21): ReLU()
(22): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(23): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(24): ReLU()
(25): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(26): ReLU()
(27): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))
(28): TorchAdd()
(29): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(30): ReLU()
(31): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(32): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(33): ReLU()
(34): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(35): ReLU()
(36): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
(37): TorchAdd()
(38): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(39): ReLU()
(40): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(41): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(42): ReLU()
(43): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(44): ReLU()
(45): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2))
(46): TorchAdd()
(47): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(48): ReLU()
(49): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(50): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(51): ReLU()
(52): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(53): ReLU()
(54): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
(55): TorchAdd()
(56): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(57): ReLU()
(58): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
(59): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(60): ReLU()
(61): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(62): ReLU()
(63): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2))
(64): TorchAdd()
(65): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(66): ReLU()
(67): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(68): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
(69): ReLU()
(70): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
(71): ReLU()
(72): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1))
(73): TorchAdd()
(74): GlobalAvgPool2d()
(75): Linear(in_features=512, out_features=2, bias=True)
)
三、pytorch进一步训练
之前没有接触过pytorch,一直用的是tensorflow,之所以选择是因为autokeras生成的h5文件在TF为backend的keras中无法加载,在pytorch中可以正常加载。
在这之中有一个问题,autokeras生成模型的最终输出结果为一个长度为2的向量,但向量并未经过softmax,因此通过构造pytorch的model,通过加载模型后增加一个softmax层实现输出的分类。
class TestModel(nn.Module):
def __init__(self):
super(TestModel,self).__init__()
#load the model pretrained from autokeras
self.origin_model = torch.load(MODEL_DIR)
self.softmax = nn.Softmax()
def forward(self,x):
x = self.origin_model(x)
x = self.softmax(x)
return x
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
image_size = (64,64)
import os
import cv2
import numpy as np
import autokeras as ak
import torch
import torchvision
import torch.optim as optim
import torch.nn as nn
class TestModel(nn.Module):
def __init__(self):
super(TestModel,self).__init__()
#load the model pretrained from autokeras
self.origin_model = torch.load(MODEL_DIR)
self.softmax = nn.Softmax()
def forward(self,x):
x = self.origin_model(x)
x = self.softmax(x)
return x
MODEL_DIR = 'model.h5'
test_model = TestModel()
test_model.train()
test_model = nn.DataParallel(test_model, device_ids=[0]).cuda()
torch.save(test_model,'test_model')
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(test_model.parameters(), lr=0.001, momentum=0.9)
index = [0,2,3,1]
train_whole_images = np.load('train_whole_images_'+str(image_size[0])+'.npy')
train_whole_labels = np.load('train_whole_labels_'+str(image_size[0])+'.npy')
#change the channel from NHWC to NCHW
train_whole_images = np.swapaxes(train_whole_images, 1, 3)
train_whole_images = np.swapaxes(train_whole_images, 2, 3)
batchsize = 128
for epoch in range(100): # loop over the dataset multiple times
for batch_ind in range(int(len(train_whole_images)/batchsize)):
running_loss = 0.0
i = batch_ind
train_whole_images_batch = train_whole_images[i*batchsize:(i+1)*batchsize-1]
train_whole_labels_batch = train_whole_labels[i*batchsize:(i+1)*batchsize-1]
# zero the parameter gradients
optimizer.zero_grad()
# forward + backward + optimize
inputs = torch.Tensor(train_whole_images_batch).cuda()
outputs = test_model(inputs)
loss = criterion(outputs.squeeze(), torch.Tensor(np.asarray(train_whole_labels_batch)).long().squeeze().cuda())
loss.backward()
optimizer.step()
if i % (20) == 0:
print('epoch:',epoch+1,',batch_ind/total_batch:',float(i/float(len(train_whole_images))*batchsize)*100,'%')
torch.save(test_model,'test_model')
with torch.no_grad():#this means the codes below is not trainable, grad dont need loaded to the gpu cache
test_num = 100
right = 0
cat = 0
dog = 0
rand_ind = np.random.randint(0,len(train_whole_images)-1,test_num)
predict = test_model(torch.Tensor(train_whole_images[rand_ind])).cpu()
print(torch.argmax(predict,dim=1))
label = train_whole_labels[rand_ind]
print('label:', label)
dog = np.sum(label)
cat = test_num - np.sum(label)
right = np.sum(torch.argmax(predict, dim=1).numpy()==label)
print(torch.argmax(predict, dim=1).numpy()==label)
print(epoch+1,':', float(right)/test_num)
print('cat:', cat)
print('dog:', dog)
#clear the cuda cache
torch.cuda.empty_cache()
print('Finished Training')
四、对测试数据进行预测
按照比赛官方要求的格式进行文件的生成。
import os
import cv2
import numpy as np
import autokeras as ak
import torch
import torchvision
import torch.optim as optim
import torch.nn as nn
# create a new model to change the last 1 layer in the origin model generated from autokeras
class TestModel(nn.Module):
def __init__(self):
super(TestModel,self).__init__()
# load origin model
self.origin_model = torch.load(MODEL_DIR)
self.softmax = nn.Softmax()
def forward(self,x):
x = self.origin_model(x)
x = self.softmax(x)
return x
image_size = (64,64)
MODEL_DIR = 'test_model'
test_model = TestModel()
# make the training done by gpu
test_model = nn.DataParallel(test_model, device_ids=[0]).cuda()
test_model.eval()
test_whole_images = np.swapaxes(test_whole_images, 1, 3)
test_whole_images = np.swapaxes(test_whole_images, 2, 3)
batchsize = 100
with torch.no_grad():
for i in range(int(len(test_whole_images)/batchsize)):
predict = test_model(torch.Tensor(test_whole_images[i*batchsize:(i+1)*batchsize])).cpu()
predict = torch.argmax(predict,dim=1).numpy()
print(predict)
ids = test_whole_ids[i*batchsize:(i+1)*batchsize]
with open('submission.csv', 'a+') as f:
#f.write('id,' + ','.join(test_whole_ids[i]) + '\n')
for i, output in zip(ids, predict):
f.write(str(i) + ',' + ','.join(
str(output)) + '\n')
print('Finished Training')