利用神经网络对图像分类(对29种不同云层图像分类-pytorch)附代码

一、训练集

参加了一个气象比赛,记录一下训练过程
数据集是在比赛官网上下载的;点这里
利用神经网络对图像分类(对29种不同云层图像分类-pytorch)附代码_第1张图片
说明一下,数据集大部分应该是比赛主办方在网上爬的,所以下载不了数据集也没关系,自己写个爬虫程序下载一下一样的。

二、训练过程

我用的是pytorch框架写的,主要是用了个残差网络进行训练,具体代码如下:

# -*- encoding:utf-8 -*-
import torch
import os
from torchvision.models.resnet import resnet152
from torch.utils.data import Dataset,DataLoader
import PIL.Image as Image
import cv2
import torch.nn as nn
import argparse
import numpy as np
from torchvision.transforms.transforms import Compose,Resize,ToTensor,Normalize
from utils import plot_image, plot_curve, one_hot
# from tensorboardX import SummaryWriter

os.environ['CUDA_LAUNCH_BLOCKING'] = "1"


def parse():
    arg = argparse.ArgumentParser(description="jiao du fen lei")
    arg.add_argument("--train_csv",type=str,help="please input train csv")
    arg.add_argument("--val_csv",type=str,help="please input val csv")
    arg.add_argument("--h", type=int,default=256, help="image h")
    arg.add_argument("--w", type=int, default=256, help="image w")
    arg.add_argument("--lr",type=float,default=0.001,help="learning")
    arg.add_argument("--batchsize",type=int,default=64)
    arg.add_argument("--seed",type=int,default=42)

    return arg.parse_args()


class fenleidata(Dataset):
    def __init__(self,imagepaths,labels,transfrom=None):
        super(fenleidata,self).__init__()
        self.imagepaths = imagepaths
        self.labels = labels
        self.transfrom = transfrom

    def __getitem__(self, index):
        image = Image.open(self.imagepaths[index]) #numpy
        image = image.convert('RGB')   #  有rgb和灰度图像,必须都转化成rgb
        if self.transfrom:
            image = self.transfrom(image)
        #image = torch.Tensor(image)/255.
        label = int(self.labels[index])
        return image,label


    def __len__(self):
        return len(self.imagepaths)

def main():

    opt =parse()
    np.random.seed(opt.seed)
    torch.manual_seed(opt.seed)
    torch.cuda.manual_seed_all(opt.seed)
    # write = SummaryWriter()
#####################################
    '''
    csv文件格式: # 文件名,标签
                1.jpg,0
                2.jpg,1
                ......
    '''
    train_csv = open(opt.train_csv,'r')
    path = '你的路径/train/'
    train_imagepaths = []
    train_labels =[]
    train_datas = train_csv.readlines()
    for data in train_datas:
        data = data.strip().split(',')
        imagepaths = os.path.join(path, data[0])
        # if len(data[1]) == 1:
        train_imagepaths.append(imagepaths)
        train_labels.append(data[1])
        # else:
        #     if len(data[1]) == 2:
        #         train_imagepaths.append(imagepaths)
        #         train_labels.append(data[1][0])
        #         train_imagepaths.append(imagepaths)
        #         train_labels.append(data[1][1])
        #     else:
        #         if len(data[1]) == 3:
        #             train_imagepaths.append(imagepaths)
        #             train_labels.append(data[1][0])
        #             train_imagepaths.append(imagepaths)
        #             train_labels.append(data[1][1])
        #             train_imagepaths.append(imagepaths)
        #             train_labels.append(data[1][2])

################################
    val_csv = open(opt.val_csv, 'r')
    val_imagepaths = []
    val_labels = []
    val_datas = val_csv.readlines()
    for data in val_datas:
        data = data.strip().split(',')
        imagepaths = os.path.join(path, data[0])
        # if len(data[1]) == 1:
        val_imagepaths.append(imagepaths)
        val_labels.append(data[1])
        # else:
        #     if len(data[1]) == 2:
        #         val_imagepaths.append(imagepaths)
        #         val_labels.append(data[1][0])
        #         val_imagepaths.append(imagepaths)
        #         val_labels.append(data[1][1])
        #     else:
        #         if len(data[1]) == 3:
        #             val_imagepaths.append(imagepaths)
        #             val_labels.append(data[1][0])
        #             val_imagepaths.append(imagepaths)
        #             val_labels.append(data[1][1])
        #             val_imagepaths.append(imagepaths)
        #             val_labels.append(data[1][2])
######################################
    train_tranfrom = Compose([Resize((opt.h, opt.w)),ToTensor(),Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])
    val_tranfrom = Compose([Resize((opt.h, opt.w)), ToTensor(), Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
########################################

    CUDA = torch.cuda.is_available()

    net = resnet152(pretrained=False,num_classes=29)

    net.avgpool = nn.AdaptiveAvgPool2d((1, 1))
    if CUDA:
        net.cuda()

    critem = nn.CrossEntropyLoss()
    if CUDA:
        critem = critem.cuda()

    optim = torch.optim.Adam(net.parameters(),lr=opt.lr) #########可以更换sgd

    trainfenlei = fenleidata(train_imagepaths,train_labels,train_tranfrom)
    valfenlei = fenleidata(val_imagepaths,val_labels,val_tranfrom)

    traindataset = DataLoader(trainfenlei,batch_size=opt.batchsize,shuffle=True,pin_memory=True)
    valdatset = DataLoader(valfenlei,batch_size=8)
###############train####################################
    best_acc = 0.
    train_loss = []
    epochs = 100
    for i in range(epochs):
        net.train()
        for j,(image,label) in enumerate(traindataset):
            if CUDA:
                image = image.cuda()
                label = label.cuda()

            out = net(image)
            # out = out.unsqueeze(0)

            optim.zero_grad()
            loss = critem(out,label)
            loss.backward()
            optim.step()
            train_loss.append(loss.item())
            print('epoch:{},iter:{},loss:{}'.format(i+1,j+1,float(loss)))
            # write.add_scalar('scalar/loss',float(loss),len(traindataset)*i+j)
        if i%5==0:
            correct = 0
            net.eval()
            with torch.no_grad():
                for j,(image,label) in enumerate(valdatset):
                    if CUDA:
                        image =image.cuda()
                        label = label.cuda()
                    n = label.shape[0]
                    out = net(image)
                    # out = out.unsqueeze(0)

                    _, predict = out.topk(1, 1)
                    predict = predict.t()
                    correct += float(torch.sum(predict.eq(label))) / n
                acc = float(correct) / len(valdatset)
            print("epoch{},val_acc: {}".format(i+1,float(acc)))
            # write.add_scalar('scalar/acc',float(acc),i)
            if float(acc) > best_acc:
                best_acc = float(acc)
                torch.save(net.state_dict(),'best.pth')
    plot_curve(train_loss)


if __name__=="__main__":
    main()

里面有个用来loss可视化的模块,放在utils.py

import torch
from matplotlib import pyplot as plt


def plot_curve(data):
    _ = plt.figure()
    plt.plot(range(len(data)), data, color='blue')
    plt.legend(['value'], loc='upper right')
    plt.xlabel('step')
    plt.ylabel('value')
    plt.show()


def plot_image(img, label, name):

    _ = plt.figure()
    for i in range(6):
        plt.subplot(2, 3, i+1)
        plt.tight_layout()
        plt.imshow(img[i][0]*0.3081+0.1307, cmap='gray', interpolation='none')
        plt.title("{}: {}".format(name, label[i].item()))
        plt.xticks([])
        plt.yticks([])
    plt.show()


def one_hot(label, depth=10):
    out = torch.zeros(label.size(0), depth)
    idx = torch.LongTensor(label).view(-1, 1)
    out.scatter_(dim=1, index=idx, value=1)
    return out

因为主办方已经给了train的csv文件了,接下来只要分val的csv文件,这里很简单不详细说了
利用神经网络对图像分类(对29种不同云层图像分类-pytorch)附代码_第2张图片
之后输入csv文件路径就可以进行训练了;得到了一个best.pth文件如下
利用神经网络对图像分类(对29种不同云层图像分类-pytorch)附代码_第3张图片
这个best.pth是网络训练出来保存的参数,也是使得验证集准确度最高时保存的参数
训练的loss曲线如下:
利用神经网络对图像分类(对29种不同云层图像分类-pytorch)附代码_第4张图片
之后用这个保存的参数进行测试,测试集用的主办方的,测试代码如下:

# -*- encoding:utf-8 -*-
import torch
from torchvision.models.resnet import resnet152
import torch.nn as nn
import os
import PIL.Image as Image
from torchvision.transforms.transforms import Compose,Resize,ToTensor,Normalize
import pandas as pd

# 定义模型
net = resnet152(pretrained=False, num_classes=29)
net.avgpool = nn.AdaptiveAvgPool2d((1, 1))

# 载入参数
net.load_state_dict(torch.load('你的路径/best.pth'))

CUDA = torch.cuda.is_available()
if CUDA:
    net.cuda()

path = '/home/mllabs/hl/qixiang/test/'
img_names = os.listdir(path)
list_1 = []
with torch.no_grad(): # 为了防止跟踪历史(和使用内存),你还可以用“with torch.no_grad(): ”来包装代码块
    net.eval()

    for img_name in img_names:
        img_path = path + img_name
        img = Image.open(img_path)
        img = img.convert('RGB')
        # print(img)
        img = Resize((256, 256))(img)
        # print(img)
        img = ToTensor()(img) # 注意:torchvision.transforms.ToTensor() 函数接受PIL Image或numpy.ndarray,将其先由HWC转置为CHW格式,再转为float后每个像素除以255.
        # print(img)
        img = Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5,))(img)  # 均值和标准差都是0.5
        # print(img)
        img = img.unsqueeze(0)
        # print(img)
        if CUDA:
            img = img.cuda()
        output = net(img)
        # print(output)
        _, predict = output.topk(1, 1)  # 取最大值
        print('图片{}的分类结果为{}'.format(img_name, str(int(predict))))
        list_1.append([img_name, int(str(int(predict)))])

pd.DataFrame(list_1).to_csv("knn_res1.csv")

把生成的csv文件上传上去就可以检测结果啦,因为只用到一个残差网络resnet152,对于简单的分类还可以,复杂的多标签的分类就要多花点功夫试下别的网络。

三、总结

目前最好的分数是0.59,我测试的结果在0.42左右,虽然排不上名,但加入多标签还是有很大空间往上爬滴!

你可能感兴趣的:(人工智能,#,pytorch,图像分类)