Pytorch上分之路—ShuffleNetv3(鸟群分类算法)

Pytorch上分之路—ShuffleNetv3(鸟群分类算法)

本次的内容是用pytorch写一个简单的分类算法,选择了200鸟群的数据集,数据集的话可以自己到网上去找,挺容易的。


目录

  • **Pytorch上分之路—ShuffleNetv3(鸟群分类算法)**
  • 项目结构
  • 一、config
  • 二、datalist
  • 三.ShuffleNet
  • 四 train
  • 五 utils
  • 六 inference


项目结构

项目中所有的文件组成
Pytorch上分之路—ShuffleNetv3(鸟群分类算法)_第1张图片
config.py用于配置一些主要的参数
datalist.py用于数据的输出
inference.py为模型的推理部分
ShuffleNet.py模型
train.py训练模型
utils.py配合的一些小函数


以下为主要内容

一、config

import argparse

'''
training settings

metavar参数,用来控制部分命令行参数的显示
'''
parser = argparse.ArgumentParser(description='PyTorch Example for all')
parser.add_argument('--train-batch-size', type=int, default=8, metavar='N',
                    help='input batch size for training (default: 32)')
parser.add_argument('--test-batch-size', type=int, default=16, metavar='N',
                    help='input batch size for testing (default: 64)')
parser.add_argument('--epochs', type=int, default=500, metavar='N',
                    help='number of epochs to train (default: 10)')
parser.add_argument('--lr', type=float, default=0.001, metavar='LR',
                    help='learning rate (default: 0.0001)')
parser.add_argument('--momentum', type=float, default=0.5, metavar='M',
                    help='SGD momentum (default: 0.5)')
parser.add_argument('--seed', type=int, default=123, metavar='S',
                    help='random seed 设置种子的用意是一旦固定种子,后面依次生成的随机数其实都是固定的,有利于实验结果的产生与比较')
parser.add_argument('--use_cuda', type=bool, default=True,
                    help='whether to use cuda to accerlate')
parser.add_argument('--base_data_path', type=str, default='G:/数据集/分类/',
                    help="total base data path for training")
parser.add_argument('--resume', type=bool, default=True, metavar='R',
                    help="whether to use the pretrained model to start the train")
parser.add_argument('--saved_model', type=str, default="E:/完成工作/trained_model/",
                    help="the path to store the weight")
parser.add_argument('--val_num', type=float, default=0.3,
                    help="perecentage of validate data")
parser.add_argument('--pretrained_weight', type=str, default="E:/完成工作/trained_model/",
                    help="the path to load the pytorch weight")
parser.add_argument('--save', type=bool, default=True,
                    help="whether to save the model weight")
parser.add_argument('--project_name', type=str, default='鸟群分类',
                    help="该项目的工程名称")
parser.add_argument('--use_aug', type=bool, default=False,
                    help='使用数据增广,增加数据多样性')
parser.add_argument('--model_name',type=str,default="shuffle_net",
                    help='model name')

二、datalist

from random import shuffle

import numpy as np
from PIL import Image
from torch.utils.data import Dataset
from torchvision import transforms

from config import parser

args = parser.parse_args()

'''
1. 对图片进行按比例缩放
2. 对图片进行随机位置的截取
3. 对图片进行随机的水平和竖直翻转
4. 对图片进行随机角度的旋转
5. 对图片进行亮度、对比度和颜色的随机变化
'''


# 自己写Dataset至少需要有这样的格式
class Dataset(Dataset):
    def __init__(self, lines):
        super(Dataset, self).__init__()
        self.base_path = args.base_data_path
        self.annotation_lines = lines
        self.train_batches = len(self.annotation_lines)

    def __len__(self):
        return self.train_batches

    def __getitem__(self, index):
        if index == 0:
            shuffle(self.annotation_lines)
        n = len(self.annotation_lines)
        index = index % n
        img, y = self.collect_image_label(self.annotation_lines[index])

        if args.use_aug:
            img = self.img_augment(img)
        img = img.resize((32, 32), Image.BICUBIC)
        img = np.array(img, dtype=np.float32)
        temp_img = np.transpose(img / 255.0)
        temp_y = int(y) - 1
        return temp_img, temp_y

    def collect_image_label(self, line):
        line = line.split('*')
        image_path = line[0]
        label = line[1]
        image = Image.open(image_path).convert("RGB")

        return image, label

    def rand(self, a=0, b=1):
        return np.random.rand() * (b - a) + a

    def img_augment(self, image):

        # 随机位置裁剪
        random_crop = self.rand() < 0.5
        # 中心裁剪
        center_crop = self.rand() < 0.5
        # 填充后随机裁剪
        random_crop_padding = self.rand() < 0.5
        # 水平翻转
        h_flip = self.rand() < 0.5
        # 竖直翻转
        v_flip = self.rand() < 0.5
        # 亮度
        bright = self.rand() < 0.5
        # 对比度
        contrast = self.rand() < 0.5
        # 饱和度
        saturation = self.rand() < 0.5
        # 颜色随机变换
        color = self.rand() < 0.5
        compose = self.rand() < 0.5
        # 旋转30
        rotate = self.rand() < 0.5

        if h_flip:
            image = transforms.RandomHorizontalFlip()(image)
        if v_flip:
            image = transforms.RandomVerticalFlip()(image)
        if rotate:
            image = transforms.RandomRotation(30)(image)
        if bright:
            image = transforms.ColorJitter(brightness=1)(image)
        if contrast:
            image = transforms.ColorJitter(contrast=1)(image)
        if saturation:
            image = transforms.ColorJitter(saturation=1)(image)
        if color:
            image = transforms.ColorJitter(hue=0.5)(image)
        if compose:
            image = transforms.ColorJitter(0.5, 0.5, 0.5)(image)
        if random_crop:
            image = transforms.RandomCrop(100)(image)
        if center_crop:
            image = transforms.CenterCrop(100)(image)
        if random_crop_padding:
            image = transforms.RandomCrop(100, padding=8)(image)

        return image


if __name__ == "__main__":
    Dataset()

三.ShuffleNet

import torch
import torch.nn as nn
import torch.nn.functional as F


class ShuffleBlock(nn.Module):
    def __init__(self, groups):
        super(ShuffleBlock, self).__init__()
        self.groups = groups

    def forward(self, x):
        '''Channel shuffle: [N,C,H,W] -> [N,g,C/g,H,W] -> [N,C/g,g,H,w] -> [N,C,H,W]'''
        N, C, H, W = x.size()
        g = self.groups
        # 维度变换之后必须要使用.contiguous()使得张量在内存连续之后才能调用view函数
        return x.view(N, g, int(C / g), H, W).permute(0, 2, 1, 3, 4).contiguous().view(N, C, H, W)


class Bottleneck(nn.Module):
    def __init__(self, in_planes, out_planes, stride, groups):
        super(Bottleneck, self).__init__()
        self.stride = stride

        # bottleneck层中间层的channel数变为输出channel数的1/4
        mid_planes = int(out_planes / 4)

        g = 1 if in_planes == 24 else groups
        # 作者提到不在stage2的第一个pointwise层使用组卷积,因为输入channel数量太少,只有24
        self.conv1 = nn.Conv2d(in_planes, mid_planes,
                               kernel_size=1, groups=g, bias=False)
        self.bn1 = nn.BatchNorm2d(mid_planes)
        self.shuffle1 = ShuffleBlock(groups=g)
        self.conv2 = nn.Conv2d(mid_planes, mid_planes,
                               kernel_size=3, stride=stride, padding=1,
                               groups=mid_planes, bias=False)
        self.bn2 = nn.BatchNorm2d(mid_planes)
        self.conv3 = nn.Conv2d(mid_planes, out_planes,
                               kernel_size=1, groups=groups, bias=False)
        self.bn3 = nn.BatchNorm2d(out_planes)

        self.shortcut = nn.Sequential()
        if stride == 2:
            self.shortcut = nn.Sequential(nn.AvgPool2d(3, stride=2, padding=1))

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.shuffle1(out)
        out = F.relu(self.bn2(self.conv2(out)))
        out = self.bn3(self.conv3(out))
        res = self.shortcut(x)
        out = F.relu(torch.cat([out, res], 1)) if self.stride == 2 else F.relu(out + res)
        return out


class ShuffleNet(nn.Module):
    def __init__(self, cfg,num_class=200):
        super(ShuffleNet, self).__init__()
        out_planes = cfg['out_planes']
        num_blocks = cfg['num_blocks']
        groups = cfg['groups']

        self.conv1 = nn.Conv2d(3, 24, kernel_size=1, bias=False)
        self.bn1 = nn.BatchNorm2d(24)
        self.in_planes = 24
        self.layer1 = self._make_layer(out_planes[0], num_blocks[0], groups)
        self.layer2 = self._make_layer(out_planes[1], num_blocks[1], groups)
        self.layer3 = self._make_layer(out_planes[2], num_blocks[2], groups)
        self.linear = nn.Linear(out_planes[2], num_class)

    def _make_layer(self, out_planes, num_blocks, groups):
        layers = []
        for i in range(num_blocks):
            if i == 0:
                layers.append(Bottleneck(self.in_planes,
                                         out_planes - self.in_planes,
                                         stride=2, groups=groups))
            else:
                layers.append(Bottleneck(self.in_planes,
                                         out_planes,
                                         stride=1, groups=groups))
            self.in_planes = out_planes
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = F.avg_pool2d(out, 4)
        out = out.view(out.size(0), -1)
        out = self.linear(out)
        return out


def ShuffleNetG2():
    cfg = {
        'out_planes': [200, 400, 800],
        'num_blocks': [4, 8, 4],
        'groups': 2
    }
    return ShuffleNet(cfg)


def ShuffleNetG3():
    cfg = {
        'out_planes': [240, 480, 960],
        'num_blocks': [4, 8, 4],
        'groups': 3
    }
    return ShuffleNet(cfg)

if __name__=="__main__":
    model = ShuffleNetG2()
    print(model)


四 train

from __future__ import print_function

import os

import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from tqdm import tqdm

from config import parser
from datalist import Dataset

from ShuffleNet import ShuffleNetG2
from utils import Color_print
# import torchvision

'''
细度分类
'''

best_acc = 0


class train(object):
    def __init__(self):
        self.args = parser.parse_args()
        print(f"-----------{self.args.project_name}-------------")

        use_cuda = self.args.use_cuda and torch.cuda.is_available()
        if use_cuda:
            torch.cuda.manual_seed(self.args.seed)  # 为当前GPU设置随机种子
        else:
            torch.manual_seed(self.args.seed)  # 为CPU设置种子用于生成随机数,以使得结果是确定的
        self.device = torch.device("cuda" if use_cuda else "cpu")
        kwargs = {'num_workers': 0, 'pin_memory': True} if use_cuda else {}  # num_workers的值容易影响调试的是否成功
        '''
        构造DataLoader
        '''
        self.images_path = os.path.join(self.args.base_data_path,
                                        "鸟_class_num=200/data/CUB_200_2011/images.txt")
        self.labels_path = os.path.join(self.args.base_data_path,
                                        "鸟_class_num=200/data/CUB_200_2011/image_class_labels.txt")
        self.annotation_lines = self.get_image_label()

        np.random.seed(10101)  # 保证实验的可重复性
        np.random.shuffle(self.annotation_lines)
        np.random.seed(None)
        self.num_val = int(len(self.annotation_lines) * self.args.val_num)
        self.num_train = len(self.annotation_lines) - self.num_val

        self.train_loader = DataLoader(
            Dataset(self.annotation_lines[:self.num_train]),
            batch_size=self.args.train_batch_size, shuffle=True, **kwargs)
        self.test_loader = DataLoader(
            Dataset(self.annotation_lines[self.num_train + 1:]),
            batch_size=self.args.test_batch_size, shuffle=False, **kwargs)

        '''
        定义选择模型
        '''
        # 直接调用torchvision里预设好的模型及参数
        # self.model = torchvision.models.shufflenet_v2_x1_0(pretrained=True, num_classes=1000).to(self.device)
        # self.model.fc = nn.Linear(self.model.fc.in_features, 200)
        self.model = ShuffleNetG2().to(self.device)
        '''
        根据需要加载与训练的模型权重参数
        '''

        if self.args.resume:
            try:
                print("load the weight from pretrained-weight file")
                model_dict = self.model.state_dict()
                pretrained_dict = torch.load(self.args.pretrained_weight, map_location=self.device)
                pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
                model_dict.update(pretrained_dict)
                self.model.load_state_dict(model_dict)
                print("Finished to load the weight")
            except:
                print("can not load weight \n train the model from stratch")
                self.model.apply(self.weights_init)

        '''
        cuda 加速
        '''
        if use_cuda:
            self.model = torch.nn.DataParallel(self.model,
                                               device_ids=range(torch.cuda.device_count()))  # parallel use GPU
            cudnn.benchmark = True  # speed up slightly
        '''
        构造loss目标函数
        选择优化器
        学习率变化选择
        '''

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = optim.Adam(self.model.parameters(), lr=self.args.lr)
        self.scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(self.optimizer, mode='min', factor=0.9)
        self.scheduler=torch.optim.lr_scheduler.CosineAnnealingLR(self.optimizer,T_max=5,eta_min=1e-5)

        '''
        模型开始训练
        '''
        for epoch in range(1, self.args.epochs + 1):
            self.train(epoch)
            self.test(epoch)
        # 清除部分无用变量
        torch.cuda.empty_cache()
        Color_print("finish model training")


    '''
    train部分
    '''

    def train(self, epoch):
        self.model.train()
        average_loss = []
        pbar = tqdm(self.train_loader,
                    desc=f'Train Epoch{epoch}/{self.args.epochs}')

        for data, target in pbar:
            data, target = data.to(self.device), target.to(self.device)
            self.optimizer.zero_grad()  # 模型参数梯度清零
            data, target = Variable(data), Variable(target)
            output = self.model(data)
            loss = self.criterion(output, target)
            loss.backward()
            average_loss.append(loss.item())
            self.optimizer.step()
            pbar.set_description(f'Train Epoch:{epoch}/{self.args.epochs} train_loss:{round(np.mean(average_loss), 2)}')
        self.scheduler.step()

    '''
    test部分
    '''

    def test(self, epoch):

        global best_acc

        self.model.eval()
        test_loss = 0
        correct = torch.zeros(1).squeeze().cuda()
        total = torch.zeros(1).squeeze().cuda()

        average_loss = []

        pbar = tqdm(self.test_loader,
                    desc=f'Test Epoch{epoch}/{self.args.epochs}',
                    mininterval=0.3)
        for data, target in pbar:
            data, target = data.to(self.device), target.to(self.device)
            with torch.no_grad():
                data, target = Variable(data), Variable(target)
            output = self.model(data)
            average_loss.append(self.criterion(output, target).item())
            test_loss += self.criterion(output, target).item()  # sum up batch loss
            pred = torch.argmax(output, 1)
            correct += (pred == target).sum().float()
            total += len(target)
            pbar.set_description(
                f'Test Epoch:{epoch}/{self.args.epochs} test_loss:{round(np.mean(average_loss), 2)}')
            predict_acc = correct / total
        if self.args.save and predict_acc > best_acc:
            best_acc = predict_acc

            if not os.path.isdir(self.args.saved_model + self.args.project_name):
                os.mkdir(self.args.saved_model + self.args.project_name)
            torch.save({
                'epoch': epoch,
                'model_state_dict': self.model.state_dict(),
                'optimizer_state_dict': self.optimizer.state_dict(),
                'loss': round(np.mean(average_loss), 2)
            },
                self.args.saved_model + self.args.project_name + f'/Epoch-{epoch}-Test_loss-{round(np.mean(average_loss), 4)}.pth')
            percentage = round(predict_acc.item(), 4) * 100
            Color_print(
                f"\n预测准确率:{percentage}% "
                f"预测数量:{correct}/{total},"
                f"保存路径:{self.args.saved_model + self.args.project_name}/{self.args.model_name}-Epoch-{epoch}-Test_loss-{round(np.mean(average_loss), 4)}.pth'")

    def get_image_label(self):
        images = []
        labels = []
        with open(self.images_path) as f:
            for line in f.readlines():
                images.append(line.split()[-1])
        with open(self.labels_path) as f:
            for line in f.readlines():
                labels.append(line.split()[-1])

        lines = []

        for image, label in zip(images, labels):
            lines.append(
                "E:/Datasets2/Caltech-UCSD Birds-200-2011/data/CUB_200_2011/images/" + str(image) + '*' + str(label))

        return lines

    def weights_init(self, m):
        if isinstance(m, nn.Linear):
            nn.init.xavier_normal_(m.weight)
            nn.init.constant_(m.bias, 0)
            # 也可以判断是否为conv2d,使用相应的初始化方式
        elif isinstance(m, nn.Conv2d):
            nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            # 是否为批归一化层
        elif isinstance(m, nn.BatchNorm2d):
            nn.init.constant_(m.weight, 1)
            nn.init.constant_(m.bias, 0)


if __name__ == "__main__":
    train()

五 utils

class bcolors:
    HEADER = '\033[95m'
    OKBLUE = '\033[94m'
    OKGREEN = '\033[92m'
    WARNING = '\033[93m'
    FAIL = '\033[91m'
    ENDC = '\033[0m'
    BOLD = '\033[1m'
    UNDERLINE = '\033[4m'




def Color_print(line):
    print(bcolors.OKGREEN + line + bcolors.ENDC)



六 inference

from __future__ import print_function

import sys

import numpy as np
import torch
import torch.backends.cudnn as cudnn
from PIL import Image
from torch.autograd import Variable

from config import parser
from ShuffleNet import ShuffleNetG3

'''
细度分类,用BCNN
'''


class inferencce(object):
    def __init__(self, image_path):
        self.args = parser.parse_args()

        self.classes = self.read_classes()

        use_cuda = self.args.use_cuda and torch.cuda.is_available()
        torch.manual_seed(self.args.seed)
        self.device = torch.device("cuda" if use_cuda else "cpu")

        self.model = ShuffleNetG3().to(self.device)

        self.model = torch.nn.DataParallel(self.model).cuda()
        if self.args.resume:
            try:
                print("load the weight from pretrained-weight file")
                model_dict = self.model.state_dict()
                checkpoint = torch.load(self.args.pretrained_weight + "鸟群分类/Epoch-26-Test_loss-4.6228.pth", map_location=self.device)
                pretrained_dict = checkpoint['model_state_dict']
                pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
                model_dict.update(pretrained_dict)
                self.model.load_state_dict(model_dict)

                print("Finished to load the weight")
            except:
                print("can not load weight")
                sys.exit()

        if use_cuda:
            cudnn.benchmark = True

        self.image = Image.open(image_path).convert("RGB")
        self.image = self.image.resize((32, 32), Image.BICUBIC)
        self.image = np.array(self.image, dtype=np.float32)
        self.image = np.transpose(self.image / 255)

        self.predict()

    def predict(self):
        with torch.no_grad():
            self.image = torch.from_numpy(self.image)
            self.image = Variable(torch.unsqueeze(self.image, dim=0).float(), requires_grad=False)
            if self.args.use_cuda:
                self.image = self.image.cuda()

        output = self.model(self.image)
        pred = torch.argmax(output, 1)
        print(int(pred.item()))

        '''
        预测类别
        '''

        print(self.classes[pred.item()])

    def read_classes(self):
        path = "E:/Datasets2/Caltech-UCSD Birds-200-2011/data/CUB_200_2011/classes.txt"
        a = []
        with open(path, 'r') as f:
            for line in f.readlines():
                line = line.split()[-1]
                a.append(line)
        return a


if __name__ == "__main__":
    path = "C:/Users/lth/Desktop/0.jpg"
    inferencce(path)

这里由于我做的鸟群分类,存在类间相似,类内差异大的特点,属于细度分类,所以训练不像cifar,mnist那样容易出好结果,我训练了大改50个epoch之后勉强能猜对一半,欢迎有人能指出我代码的不足或者给我更好的学习意见,谢谢了

你可能感兴趣的:(torch学习)