Pytorch实战:Kaggle下基于Resnet猫狗识别具体实现代码

Pytorch实战:Kaggle下基于Resnet猫狗识别具体实现代码

    • 数据集下载
    • 数据处理
    • 加载训练数据
    • 训练函数
    • 训练完成后的测试
    • 完整代码
    • linux下命令
    • Tricks

数据集下载

数据集下载:https://pan.baidu.com/s/1SlNAPf3NbgPyf93XluM7Fg 密码: hpn4
一共包含12500张狗的照片,12500张猫的照片

数据处理

原始数据train文件家里包含所有的图片,首先对其进行处理,生成一个图片名称与标签相对应的txt文件,好进行索引。将猫的标签对应为0,狗的标签对应为1

import os
def text_save(filename,data_dir,data_class):
    file = open(filename,'a')
    for i in range(len(data_class)):
        s = str(data_dir[i]+' '+str(data_class[i])) +'\n'
        file.write(s)
    file.close()
    print('文件保存成功')

def get_files(file_dir):
    #file_dir 文件路径
    cat = []
    dog = []
    label_dog = []
    label_cat = []
    for file in os.listdir(file_dir):
        name = file.split(sep = '.')
        if name[0]=='cat':
            cat.append(file_dir + file)
            label_cat.append(0)#0对应猫
        else:
            dog.append(file_dir + file)
            label_dog.append(1)
    print('There are %d cats and %d dogs' %(len(cat), (len(dog))))

    cat.extend(dog)
    label_cat.extend(label_dog)
    image_list = cat
    label_list = label_cat
    print(type(image_list))
    return image_list,label_list

def data_process():#生成train.txt,包含图片名称一级标签
    image_list, label_list = get_files('train/')
    text_save('train.txt', image_list, label_list)

加载训练数据

#重写dataset类,用于加载dataloader
class train_Dataset(Dataset):
    def __init__(self, txt_path, transform=None, target_transform=None):
        fh = open(txt_path, 'r')
        imgs = []
        for line in fh:
            line = line.rstrip()
            words = line.split()
            imgs.append((words[0], int(words[1])))

        self.imgs = imgs        # 最主要就是要生成这个list, 然后DataLoader中给index,通过getitem读取图片数据
        self.transform = transform
        self.target_transform = target_transform

    def __getitem__(self, index):
        fn, label = self.imgs[index]
        img = Image.open(fn).convert('RGB')     # 像素值 0~255,在transfrom.totensor会除以255,使像素值变成 0~1
        if self.transform is not None:
            img = self.transform(img)   # 在这里做transform,转为tensor等等
        return img, label

    def __len__(self):
        return len(self.imgs)

训练函数

def save_models(net,epoch):#模型保存函数,自己更改位置
    torch.save(net.state_dict(),'/home/cat/mymodel_epoch_1{}.pth'.format(epoch))
    print('model saved')


def train(dataloader, net, lr = 0.01, momentum = 0.9, epochs = 10 ):#训练函数
    cirterion = nn.CrossEntropyLoss()#pytorch使用交叉熵不需要将标签转换为one-hot编码模式,pytorch会自动转换
    optimizer = optim.SGD(net.parameters(),lr,momentum)#使用SGD
    print('开始训练')
    for epoch in range(epochs):
        net.train()
        train_acc = 0.0#用来打印训练时候的正确率
        for i,(image,label) in tqdm(enumerate(dataloader)):
            image,label = image.cuda(),label.cuda()#都扔到显卡上去
            optimizer.zero_grad()#每一个batch_size都要清零,不然就会跟以前的grad叠加起来
            output = net(image)
            loss = cirterion(output, label)
            loss.backward()
            optimizer.step()
            _, prediction = torch.max(output.data, 1)
            train_acc += torch.sum(prediction == label.data)
            if i % 100 == 0:
                accuracy = train_acc/1600.0*100
                print("epoch: %d Iteration %d loss: %f accuracy: %f"%(epoch,i,loss,accuracy))
                train_acc = 0
        if epoch % 3 == 0:
            save_models(net, epoch)#每三个epoch保存模型
    print('训练完成')
    save_models(net, epoch)#训练完成保存模型

def train_model():#t开始训练
    dataset = train_Dataset('train.txt', transform = transform)
    train_dataloader = DataLoader(dataset,batch_size= 16, shuffle = True, num_workers= 0)
    #dataloader加载完成,dataloader类似迭代器,batch数量可以更改
    model = torchvision.models.resnet50(pretrained=True)#使用预训练模型会快一些,不用也可以
    model.fc = nn.Sequential(nn.Linear(2048,2))#二分类问题,要将模型更改一下
    model = model.cuda()#将模型扔到显卡上
    print('model construct finished')
    #开始训练
    train(net = model, lr = 0.0001, momentum = 0.09, epochs = 19 , dataloader = train_dataloader)
    #可以更改epoch数目

训练完成后的测试

def eval():
    csv_file = 'sample_submission.csv'
    test_dir = 'test/'#test图片所在位置
    test_data = test_dataset(csv_file, test_dir, transform)
    test_dataloader = DataLoader(test_data, batch_size= 1, shuffle = False, num_workers= 0)
    #test_datalaoder加载完成
    model = torchvision.models.resnet50(pretrained=False)
    model.fc = nn.Sequential(
        nn.Linear(2048,2),
    )
    model.load_state_dict(torch.load("mymodel_epoch_19.pth"))
    model = model.cuda()
    print('model_load finished')#将训练好的模型加载,我这里加载的是第19个epoch
    result = []
    model.eval()
    for i , image in tqdm(enumerate(test_dataloader)):
        image = image.cuda()
        output = model(image)
        _, prediction = torch.max(output.data, 1)
        result.append(prediction.item())
    #将结果写入文件
    dataframe = pd.DataFrame({'label':result})
    dataframe.to_csv("result.csv",sep=',')#将分类结果写入到了result.csv中,和sample_submission.csv整合一下就可以提交了

完整代码

from PIL import Image
from torch.utils.data import Dataset
import os
import matplotlib.pyplot as plt
import numpy as np
import torch
from torch import nn
from torch.utils.data import DataLoader
import torchvision
from torchvision import transforms
import torch.optim as optim
from tqdm import tqdm
import pandas as pd

def text_save(filename,data_dir,data_class):
    file = open(filename,'a')
    for i in range(len(data_class)):
        s = str(data_dir[i]+' '+str(data_class[i])) +'\n'
        file.write(s)
    file.close()
    print('文件保存成功')



def get_files(file_dir):
    #file_dir 文件路径
    cat = []
    dog = []
    label_dog = []
    label_cat = []
    for file in os.listdir(file_dir):
        name = file.split(sep = '.')
        if name[0]=='cat':
            cat.append(file_dir + file)
            label_cat.append(0)#0对应猫
        else:
            dog.append(file_dir + file)
            label_dog.append(1)
    print('There are %d cats and %d dogs' %(len(cat), (len(dog))))

    cat.extend(dog)
    label_cat.extend(label_dog)
    image_list = cat
    label_list = label_cat
    print(type(image_list))
    return image_list,label_list

def data_process():
    image_list, label_list = get_files('train/')
    text_save('123.txt', image_list, label_list)


transform = transforms.Compose([
    transforms.Resize((224,224)),
    transforms.ToTensor(),  # 将图片转换为Tensor,归一化至[0,1]
    transforms.Normalize(mean=[.5, .5, .5], std=[.5, .5, .5])
])

class train_Dataset(Dataset):
    def __init__(self, txt_path, transform=None, target_transform=None):
        fh = open(txt_path, 'r')
        imgs = []
        for line in fh:
            line = line.rstrip()
            words = line.split()
            imgs.append((words[0], int(words[1])))

        self.imgs = imgs        # 最主要就是要生成这个list, 然后DataLoader中给index,通过getitem读取图片数据
        self.transform = transform
        self.target_transform = target_transform

    def __getitem__(self, index):
        fn, label = self.imgs[index]
        img = Image.open(fn).convert('RGB')     # 像素值 0~255,在transfrom.totensor会除以255,使像素值变成 0~1
        if self.transform is not None:
            img = self.transform(img)   # 在这里做transform,转为tensor等等
        #img = torchvision.transforms.functional.to_tensor(img)
        return img, label

    def __len__(self):
        return len(self.imgs)

def save_models(net,epoch):
    torch.save(net.state_dict(),'/home/songfl/cat/mymodel_epoch_1{}.pth'.format(epoch))
    print('model saved')

def train(dataloader, net, lr = 0.01, momentum = 0.9, epochs = 10 ):#训练函数
    cirterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(),lr,momentum)
    print('开始训练')
    for epoch in range(epochs):
        net.train()
        train_acc = 0.0
        for i,(image,label) in tqdm(enumerate(dataloader)):
            image,label = image.cuda(),label.cuda()
            optimizer.zero_grad()
            output = net(image)
            loss = cirterion(output, label)
            loss.backward()
            optimizer.step()
            _, prediction = torch.max(output.data, 1)
            train_acc += torch.sum(prediction == label.data)
            if i % 100 == 0:
                accuracy = train_acc/1600.0*100
                print("epoch: %d Iteration %d loss: %f accuracy: %f"%(epoch,i,loss,accuracy))
                train_acc = 0
        if epoch % 3 == 0:
            save_models(net, epoch)
    print('训练完成')
    save_models(net, epoch)

def train_model():
    dataset = train_Dataset('train.txt', transform = transform)
    train_dataloader = DataLoader(dataset,batch_size= 16, shuffle = True, num_workers= 0)
    #dataloader加载完成
    model = torchvision.models.resnet50(pretrained=True)
    model.fc = nn.Sequential(nn.Linear(2048,2))
    model = model.cuda()
    print('model construct finished')
    #开始训练
    train(net = model, lr = 0.0001, momentum = 0.09, epochs = 19 , dataloader = train_dataloader)

class test_dataset(Dataset):
    def __init__(self, csv_file, test_dir, transform = None, target_transform = None):
            #csv_file是sample_submission.csv的位置,test_dir是test图片的文件夹
            self.test_csv = pd.read_csv(csv_file)
            self.test_dir = test_dir
            self.transform = transform

    def __getitem__(self, index):
        image_name = os.path.join(self.test_dir,str(int(self.test_csv.ix[index,0])))
        image_name = image_name + '.jpg'
        image = Image.open(image_name).convert('RGB')
        if self.transform is not None:
            image = self.transform(image)
        return image

    def __len__(self):
        return len(self.test_csv)

def eval():
    csv_file = 'sample_submission.csv'
    test_dir = 'test/'
    test_data = test_dataset(csv_file, test_dir, transform)
    test_dataloader = DataLoader(test_data, batch_size= 1, shuffle = False, num_workers= 0)
    #test_datalaoder加载完成
    model = torchvision.models.resnet50(pretrained=False)
    model.fc = nn.Sequential(
        nn.Linear(2048,2),
    )
    model.load_state_dict(torch.load("mymodel_epoch_19.pth"))
    model = model.cuda()
    print('model_load finished')
    result = []
    model.eval()
    for i , image in tqdm(enumerate(test_dataloader)):
        image = image.cuda()
        output = model(image)
        _, prediction = torch.max(output.data, 1)
        result.append(prediction.item())
    #将结果写入文件个
    dataframe = pd.DataFrame({'label':result})
    dataframe.to_csv("result.csv",sep=',')
  
if __name__ == "__main__":
    import fire
    fire.Fire()

linux下命令

训练时将以上代码保存在dog_vs_cat.py文件中。
在命令行输入以下文字进行数据预处理:

python dog_vs_cat.py data_process

在命令行输入以下文字进行模型训练:

python dog_vs_cat.py train_model

在命令行输入以下文字进行test,并将结果输出:

python dog_vs_cat.py eval

Tricks

如果直接将输出的结果上传,那么最后的scores为0.32881,排名为700+
但是如果将狗的标签预测值(标签为1)修改为0.995,将猫的标签预测值(标签为0)修改为0.005,就会得到scores为0.055,排位在70左右。具体原因是和结果评估采用的LogLoss有关:kaggle 官方的评估标准是 LogLoss,对于预测正确的样本,0.995 和 1 相差无几,但是对于预测错误的样本,0 和 0.005 的差距非常大,是 15 和 2 的差别。
Pytorch实战:Kaggle下基于Resnet猫狗识别具体实现代码_第1张图片

你可能感兴趣的:(深度学习,计算机,计算机视觉,pytorch)