[kaggle入门]Autokeras与Dogs vs Cats猫狗识别

之前做过一些简单的深度学习项目,在我看来主要是一些调包工程师的工作,应用现有的模型对一些项目进行训练。初入kaggle,打算以最简单的项目为切入点,提升自己的姿势水平。

环境:autokeras 0.4.0;
torch 1.3.1;
cuda10.0;
cudnn 7.5.1;
gpu rtx2070

本文记录了这一项目进行的主要逻辑流程,主要步骤如下:

1)数据预处理,比较简单,reshape并保存为numpy的格式存储
2)使用autokeras进行模型的初筛,通过短时间的预训练搜索出较为合适的模型
3)用pytorch加载现有的预训练模型,进行进一步的训练
4)对测试数据进行预测生成csv文件,上传

代码上传至:
github/dogs_vs_cats

一、数据预训练
训练数据:将数据中的图片和标签信息储存为.npy格式,避免每次加载图像占用大量时间;数据名字中包含了标签信息,cat记为0,dog记为1。
测试数据:将数据按照标签进行排序(1-12500),按照标签顺序进行图片的加载,存储为npy格式。

import os 
import cv2
import numpy as np
image_size = (64,64)
#your path of the datasets
data_dir = '/mnt/HDD/Datasets/kaggle/dogs-vs-cats/'
train_dir = data_dir + "train/"
test_dir = data_dir + 'test1/'
test_list = os.listdir(test_dir)
train_list = os.listdir(train_dir)
train_whole_images = []
train_whole_labels = []
test_whole_images = []
test_whole_ids = []
# sort the test list by the ids
for ind,name in enumerate(test_list):
    test_whole_ids.append(int(name.split('.')[0]))
test_whole_ids = sorted(test_whole_ids)
test_list = [str(id_int)+'.jpg' for id_int in test_whole_ids]
for ind,name in enumerate(test_list):
    test_whole_images.append(cv2.resize(cv2.imread(test_dir+name),image_size))

for ind,name in enumerate(train_list):
    print(ind)
    train_whole_images.append(cv2.resize(cv2.imread(train_dir+name),image_size))
    if 'cat' in name:
        train_whole_labels.append(0)
    else:
        train_whole_labels.append(1)
# restore the datasets in numpy file
np.save('test_whole_images_'+str(image_size[0])+'.npy',np.asarray(test_whole_images))
np.save('train_whole_images_'+str(image_size[0])+'.npy',np.asarray(train_whole_images))
np.save('train_whole_labels_'+str(image_size[0])+'.npy',np.asarray(train_whole_labels))
# reload the dataset stored in numpy file
#test_whole_images = np.load('test_whole_images_'+str(image_size[0])+'.npy')
#test_whole_labels = np.load('test_whole_labels_'+str(image_size[0])+'.npy')

二、Autokeras模型初筛
目前autokeras官网有两个版本,0.4.0和1.0。根据实际应用,发现1.0版本的文档很少,在模型导出和进一步用其他框架(TF、pytorch)等的加载、训练中无法实现,因此选择了040版本。
040版本中的各种网络搜索的参数,比如backend选择、最大搜索次数、模型最大规模等可以在Autokeras安装路径下的constant.py文件中进行更改,之后加载已经生成的训练npy文件,进行模型筛选。

import os 
import os
import cv2
import numpy as np
image_size = (64,64)
#load numpy data file
train_images_npy = 'train_whole_images_'+str(image_size[0])+'.npy'
train_labels_npy = 'train_whole_labels_'+str(image_size[0])+'.npy'
train_whole_images = np.load(train_images_npy)
train_whole_labels = np.load(train_labels_npy)

import autokeras as ak
import torch
import torchvision
classifier = ak.ImageClassifier(verbose = True, path = 'autokeras_temp_train')
classifier.fit(x=np.asarray(train_whole_images, dtype = np.uint8), y=np.asarray(train_whole_labels,dtype =np.float16),time_limit = 60*50*1)
# the file name to store the model both structure and parameters
MODEL_DIR = 'model.h5'
# different methods to save the model, some of which may differ from different vision.
# 1)
#classifier.export_keras_model(MODEL_DIR)
# 2)
#from autokeras.utils import pickle_to_file,pickle_from_file
#pickle_to_file(classifier,MODEL_DIR) 
# 3)
torch.save(classifier.cnn.best_model.produce_model(),MODEL_DIR)
model = torch.load(MODEL_DIR)
model.eval()
with torch.no_grad():
    right = 0
    cat = 0
    dog = 0
    # the number of samples chosen to test the model
    test_num = 100
    rand_ind = np.random.randint(0,len(train_whole_images)-1,test_num)
    predict = model(torch.Tensor(train_whole_images[rand_ind])).cpu()
    label = train_whole_labels[rand_ind]
    print(torch.argmax(predict,dim=1))
    print('label:', label)
    print(torch.argmax(predict, dim=1).numpy()==label)
    dog = np.sum(label)
    cat = 100 - np.sum(label)
    right = np.sum(torch.argmax(predict, dim=1).numpy()==label)
    print('accuracy:', float(right)/100)
    print('cat:', cat)
    print('dog:', dog)

筛选得到的模型如下:

(origin_model): TorchModel(
        (0): Conv2d(3, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (3): ReLU()
        (4): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (5): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (6): ReLU()
        (7): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (8): ReLU()
        (9): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
        (10): TorchAdd()
        (11): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (12): ReLU()
        (13): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (14): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (15): ReLU()
        (16): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (17): ReLU()
        (18): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
        (19): TorchAdd()
        (20): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (21): ReLU()
        (22): Conv2d(64, 128, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (23): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (24): ReLU()
        (25): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (26): ReLU()
        (27): Conv2d(64, 128, kernel_size=(1, 1), stride=(2, 2))
        (28): TorchAdd()
        (29): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (30): ReLU()
        (31): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (32): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (33): ReLU()
        (34): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (35): ReLU()
        (36): Conv2d(128, 128, kernel_size=(1, 1), stride=(1, 1))
        (37): TorchAdd()
        (38): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (39): ReLU()
        (40): Conv2d(128, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (41): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (42): ReLU()
        (43): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (44): ReLU()
        (45): Conv2d(128, 256, kernel_size=(1, 1), stride=(2, 2))
        (46): TorchAdd()
        (47): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (48): ReLU()
        (49): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (50): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (51): ReLU()
        (52): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (53): ReLU()
        (54): Conv2d(256, 256, kernel_size=(1, 1), stride=(1, 1))
        (55): TorchAdd()
        (56): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (57): ReLU()
        (58): Conv2d(256, 512, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
        (59): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (60): ReLU()
        (61): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (62): ReLU()
        (63): Conv2d(256, 512, kernel_size=(1, 1), stride=(2, 2))
        (64): TorchAdd()
        (65): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (66): ReLU()
        (67): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (68): BatchNorm2d(512, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (69): ReLU()
        (70): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
        (71): ReLU()
        (72): Conv2d(512, 512, kernel_size=(1, 1), stride=(1, 1))
        (73): TorchAdd()
        (74): GlobalAvgPool2d()
        (75): Linear(in_features=512, out_features=2, bias=True)
      )

三、pytorch进一步训练
之前没有接触过pytorch,一直用的是tensorflow,之所以选择是因为autokeras生成的h5文件在TF为backend的keras中无法加载,在pytorch中可以正常加载。
在这之中有一个问题,autokeras生成模型的最终输出结果为一个长度为2的向量,但向量并未经过softmax,因此通过构造pytorch的model,通过加载模型后增加一个softmax层实现输出的分类。

class TestModel(nn.Module):
    def __init__(self):
        super(TestModel,self).__init__()
        #load the model pretrained from autokeras
        self.origin_model = torch.load(MODEL_DIR)
        self.softmax = nn.Softmax()
    def forward(self,x):
        x = self.origin_model(x) 
        x = self.softmax(x)
        return x
import os 
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

image_size = (64,64)
import os
import cv2
import numpy as np


import autokeras as ak
import torch
import torchvision
import torch.optim as optim
import torch.nn as nn
class TestModel(nn.Module):
    def __init__(self):
        super(TestModel,self).__init__()
        #load the model pretrained from autokeras
        self.origin_model = torch.load(MODEL_DIR)
        self.softmax = nn.Softmax()
    def forward(self,x):
        x = self.origin_model(x) 
        x = self.softmax(x)
        return x
MODEL_DIR = 'model.h5'
test_model = TestModel()
test_model.train()
test_model = nn.DataParallel(test_model, device_ids=[0]).cuda()
torch.save(test_model,'test_model')
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(test_model.parameters(), lr=0.001, momentum=0.9)
index = [0,2,3,1]
train_whole_images = np.load('train_whole_images_'+str(image_size[0])+'.npy')
train_whole_labels = np.load('train_whole_labels_'+str(image_size[0])+'.npy')
#change the channel from NHWC to NCHW
train_whole_images = np.swapaxes(train_whole_images, 1, 3)
train_whole_images = np.swapaxes(train_whole_images, 2, 3)
batchsize = 128
for epoch in range(100):  # loop over the dataset multiple times
    for batch_ind in range(int(len(train_whole_images)/batchsize)):
        running_loss = 0.0
        i = batch_ind
        train_whole_images_batch = train_whole_images[i*batchsize:(i+1)*batchsize-1]
        train_whole_labels_batch = train_whole_labels[i*batchsize:(i+1)*batchsize-1]
        # zero the parameter gradients
        optimizer.zero_grad()
        # forward + backward + optimize
        inputs = torch.Tensor(train_whole_images_batch).cuda()
        outputs = test_model(inputs)
        loss = criterion(outputs.squeeze(), torch.Tensor(np.asarray(train_whole_labels_batch)).long().squeeze().cuda())
        loss.backward()
        optimizer.step()
        if i % (20) == 0:
            print('epoch:',epoch+1,',batch_ind/total_batch:',float(i/float(len(train_whole_images))*batchsize)*100,'%')
        
    torch.save(test_model,'test_model')
    with torch.no_grad():#this means the codes below is not trainable, grad dont need loaded to the gpu cache
        test_num = 100
        right = 0
        cat = 0
        dog = 0
        rand_ind = np.random.randint(0,len(train_whole_images)-1,test_num)
        predict = test_model(torch.Tensor(train_whole_images[rand_ind])).cpu()
        print(torch.argmax(predict,dim=1))
        label = train_whole_labels[rand_ind]
        print('label:', label)
        dog = np.sum(label)
        cat = test_num - np.sum(label)
        right = np.sum(torch.argmax(predict, dim=1).numpy()==label)
        print(torch.argmax(predict, dim=1).numpy()==label)
        print(epoch+1,':', float(right)/test_num)
        print('cat:', cat)
        print('dog:', dog)
        #clear the cuda cache
        torch.cuda.empty_cache()
print('Finished Training')

四、对测试数据进行预测
按照比赛官方要求的格式进行文件的生成。

import os 
import cv2
import numpy as np

import autokeras as ak
import torch
import torchvision
import torch.optim as optim
import torch.nn as nn
# create a new model to change the last 1 layer in the origin model generated from autokeras
class TestModel(nn.Module):
    def __init__(self):
        super(TestModel,self).__init__()
        # load origin model
        self.origin_model = torch.load(MODEL_DIR)
        self.softmax = nn.Softmax()
    def forward(self,x):
        x = self.origin_model(x) 
        x = self.softmax(x)
        return x

image_size = (64,64)
MODEL_DIR = 'test_model'
test_model = TestModel()
# make the training done by gpu
test_model = nn.DataParallel(test_model, device_ids=[0]).cuda()
test_model.eval()
test_whole_images = np.swapaxes(test_whole_images, 1, 3)
test_whole_images = np.swapaxes(test_whole_images, 2, 3)
batchsize = 100
with torch.no_grad():
    for i in range(int(len(test_whole_images)/batchsize)):
        predict = test_model(torch.Tensor(test_whole_images[i*batchsize:(i+1)*batchsize])).cpu()
        predict = torch.argmax(predict,dim=1).numpy()
        print(predict)
        
        ids = test_whole_ids[i*batchsize:(i+1)*batchsize]
        with open('submission.csv', 'a+') as f:
            #f.write('id,' + ','.join(test_whole_ids[i]) + '\n')
            for i, output in zip(ids, predict):
                f.write(str(i) + ',' + ','.join(
                    str(output)) + '\n')
print('Finished Training')

你可能感兴趣的:(DL,kaggle)