【打卡】苹果叶片病害分类和建筑物变化检测数据挖掘竞赛

【打卡】苹果叶片病害分类和建筑物变化检测数据挖掘竞赛

文章目录

  • 【打卡】苹果叶片病害分类和建筑物变化检测数据挖掘竞赛
    • Task 1两个赛题数据可视化
    • 任务2 苹果病害数据加载与数据增强
    • 任务三 果病害模型训练与预测
    • 任务4:苹果病害模型优化与多折训练

Task 1两个赛题数据可视化

在这个任务中,参赛选手需要对两个赛题的数据进行可视化。对于苹果病害数据,选手可以展示苹果叶片的病害图像以及它们所属的标签。对于建筑物检测数据,选手需要使用"吉林一号"高分辨率卫星遥感影像作为数据集。选手需要展示这些卫星影像,并可视化其中的建筑物变化。

import os, sys, glob, argparse
import pandas as pd
import numpy as np
from tqdm import tqdm

%matplotlib inline
import matplotlib.pyplot as plt

import cv2
from PIL import Image
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold

import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset
train_path = glob.glob('./train/*/*')
test_path = glob.glob('./test/*')

np.random.shuffle(train_path)
np.random.shuffle(test_path)
plt.figure(figsize=(7, 7))
for idx in range(9):
    plt.subplot(3, 3, idx+1)
    plt.imshow(Image.open(train_path[idx]))
    plt.xticks([]);
    plt.yticks([]);
    
    plt.title(train_path[idx].split('/')[-2])

【打卡】苹果叶片病害分类和建筑物变化检测数据挖掘竞赛_第1张图片

import numpy as np
import glob
import cv2

import matplotlib.pyplot as plt

import os, sys, glob, argparse
import pandas as pd
import numpy as np
from tqdm import tqdm

import cv2
from PIL import Image
from sklearn.model_selection import train_test_split, StratifiedKFold, KFold

import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = False
torch.backends.cudnn.benchmark = True

import torchvision.models as models
import torchvision.transforms as transforms
import torchvision.datasets as datasets
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data.dataset import Dataset
train_tiff1 = glob.glob('./初赛训练集/Image1/*')
train_tiff2 = glob.glob('./初赛训练集/Image2/*')
train_label = glob.glob('./train/label1/*')

train_tiff1.sort()
train_tiff2.sort()
train_label.sort()
test_tiff1 = glob.glob('./初赛测试集/Image1/*')
test_tiff2 = glob.glob('./初赛测试集/Image2/*')

test_tiff1.sort()
test_tiff2.sort()
idx = 20
img1 = cv2.imread(train_tiff1[idx])
img2 = cv2.imread(train_tiff2[idx])
label = cv2.imread(train_label[idx])

plt.figure(dpi=200)
plt.subplot(131)
plt.imshow(img1)
plt.xticks([]); plt.yticks([])

plt.subplot(132)
plt.imshow(img2)
plt.xticks([]); plt.yticks([])

plt.subplot(133)
plt.imshow(label[:, :, 1] * 128)
plt.xticks([]); plt.yticks([])

【打卡】苹果叶片病害分类和建筑物变化检测数据挖掘竞赛_第2张图片
方法很简单,总体来说就是使用cv2.imreadplt.imshow(Image.open(train_path[idx]))实现可视化。

任务2 苹果病害数据加载与数据增强

数据加载阶段,选手需要编写代码来读取和处理提供的图像数据。数据增强阶段,选手可以使用各种图像处理技术和方法,如旋转、缩放、翻转、亮度调整等,来增强数据集的多样性和数量。

步骤1:使用OpenCV或者PIL加载数据集(已经在任务一实现)
步骤2:使用torchvision或者OpenCV实现图像分类任务的数据增强

import torch
from torch.utils.data import Dataset
from PIL import Image
import numpy as np

DATA_CACHE = {}
import cv2


class XunFeiDataset(Dataset):

    def __init__(self, img_path, transform=None):
        self.img_path = img_path
        if transform is not None:
            self.transform = transform
        else:
            self.transform = None

    def __getitem__(self, index):
        if self.img_path[index] in DATA_CACHE:
            img = DATA_CACHE[self.img_path[index]]
        else:
            img = cv2.imread(self.img_path[index])
            DATA_CACHE[self.img_path[index]] = img

        if self.transform is not None:
            img = self.transform(image=img)['image']

        if self.img_path[index].split('/')[-2] in [
                'd1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8', 'd9'
        ]:
            label = ['d1', 'd2', 'd3', 'd4', 'd5', 'd6', 'd7', 'd8',
                     'd9'].index(self.img_path[index].split('/')[-2])
        else:
            label = -1

        img = img.transpose([2, 0, 1])  # HWC -> CHW numpy中的transpose
        return img, torch.from_numpy(np.array(label))

    def __len__(self):
        return len(self.img_path)
import argparse
import torch
import torchvision.transforms as transforms
from mydatasets.xunfeidataset import XunFeiDataset
import albumentations as A


def get_loader(args, train_path, test_path):
    train_loader = torch.utils.data.DataLoader(XunFeiDataset(
        train_path[:-1000],
        A.Compose([
            A.RandomRotate90(),
            A.Resize(256, 256),
            A.RandomCrop(224, 224),
            A.HorizontalFlip(p=0.5),
            A.RandomContrast(p=0.5),
            A.RandomBrightnessContrast(p=0.5),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
        ])),
                                               batch_size=args.batch_size,
                                               shuffle=True,
                                               num_workers=args.num_workers,
                                               pin_memory=True)

    val_loader = torch.utils.data.DataLoader(
        XunFeiDataset(
            train_path[-1000:],
            A.Compose([
                A.Resize(256, 256),
                A.RandomCrop(224, 224),
                # A.HorizontalFlip(p=0.5),
                # A.RandomContrast(p=0.5),
                A.Normalize(mean=(0.485, 0.456, 0.406),
                            std=(0.229, 0.224, 0.225))
            ])),
        batch_size=args.batch_size,
        shuffle=False,
        num_workers=args.num_workers,
        pin_memory=True)

    test_loader = torch.utils.data.DataLoader(XunFeiDataset(
        test_path,
        A.Compose([
            A.Resize(256, 256),
            A.RandomCrop(224, 224),
            A.HorizontalFlip(p=0.5),
            A.RandomContrast(p=0.5),
            A.Normalize(mean=(0.485, 0.456, 0.406), std=(0.229, 0.224, 0.225))
        ])),
                                              batch_size=args.batch_size,
                                              shuffle=False,
                                              num_workers=args.num_workers,
                                              pin_memory=True)
    return train_loader, val_loader, test_loader

步骤4:实现Mixup数据增强。

def mixup_data(x, y, alpha=1.0, use_cuda=True):
    '''Returns mixed inputs, pairs of targets, and lambda'''
    if alpha > 0:
        lam = np.random.beta(alpha, alpha)
    else:
        lam = 1

    batch_size = x.size()[0]
    if use_cuda:
        index = torch.randperm(batch_size).cuda()
    else:
        index = torch.randperm(batch_size)

    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    
    return mixed_x, y_a, y_b, lam

def mixup_criterion(criterion, pred, y_a, y_b, lam):
    return lam * criterion(pred, y_a) + (1 - lam) * criterion(pred, y_b)

任务三 果病害模型训练与预测

参赛选手需要使用加载和增强后的苹果病害数据集,构建模型并进行训练和预测。选手可以选择适合的深度学习框架和模型架构,并使用训练集进行模型训练。然后,选手需要使用训练好的模型对测试集中的苹果叶片病害图像进行预测。

步骤1:自定义数据集读取
步骤2:自定义CNN模型
model.py

import torch.nn as nn
import torchvision.models as models


class XunFeiNet(nn.Module):

    def __init__(self, name='RN18'):
        super(XunFeiNet, self).__init__()
        if name == 'RN18':
            model = models.resnet18(True)
            model.avgpool = nn.AdaptiveAvgPool2d(1)
            model.fc = nn.Linear(512, 9)
        elif name == 'RN34':
            model = models.resnet34(True)
            model.avgpool = nn.AdaptiveAvgPool2d(1)
            model.fc = nn.Linear(512, 9)
        elif name == 'RN50':
            model = models.resnet50(True)
            model.avgpool = nn.AdaptiveAvgPool2d(1)
            model.fc = nn.Linear(2048, 9)
        elif name == 'RN101':
            model = models.resnet101(True)
            model.avgpool = nn.AdaptiveAvgPool2d(1)
            model.fc = nn.Linear(2048, 9)
        elif name == 'RN152':
            model = models.resnet152(True)
            model.avgpool = nn.AdaptiveAvgPool2d(1)
            model.fc = nn.Linear(2048, 9)

        self.resnet = model

    def forward(self, img):
        out = self.resnet(img)
        return out

engine.py

import torch
import numpy as np
from utils import mixup_data, mixup_criterion

def train(train_loader, model, criterion, optimizer, args):
    mix_up = args.mixup
    alpha = args.alpha

    model.train()
    train_loss = 0.0
    for i, (input, target) in enumerate(train_loader):
        input = input.cuda()
        target = target.cuda()
        
        if mix_up:
            input, targets_a, targets_b, lam = mixup_data(input, target, alpha)
             
        # compute output
        output = model(input)
        if mix_up:
            loss = mixup_criterion(criterion, output, targets_a, targets_b, lam)
        else:
            loss = criterion(output, target)

        # compute gradient and do SGD step
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if i % 20 == 0:
            print('Train loss', loss.item())

        train_loss += loss.item()

    return train_loss / len(train_loader)


def validate(val_loader, model, criterion):
    model.eval()

    val_acc = 0.0

    with torch.no_grad():
        # end = time.time()
        for i, (input, target) in enumerate(val_loader):
            input = input.cuda()
            target = target.cuda()

            # compute output
            output = model(input)
            loss = criterion(output, target)

            val_acc += (output.argmax(1) == target).sum().item()

    return val_acc / len(val_loader.dataset)


def predict(test_loader, model, criterion):
    model.eval()
    val_acc = 0.0

    test_pred = []
    with torch.no_grad():
        for i, (input, target) in enumerate(test_loader):
            input = input.cuda()
            target = target.cuda()

            # compute output
            output = model(input)
            test_pred.append(output.data.cpu().numpy())

    return np.vstack(test_pred)

任务4:苹果病害模型优化与多折训练

参赛选手需要对苹果病害模型进行优化,并进行多折交叉验证训练。选手可以通过调整模型架构、超参数调优、正则化等方法来优化模型的性能。此外,选手还需要实现多折交叉验证来更准确地评估模型的性能和泛化能力。
模型优化在上述mode.py中已经实现,本任务只考虑交叉验证

你可能感兴趣的:(数据挖掘竞赛,数据挖掘,分类,python)