pytorch实现GoogLeNet-InceptionV1

GoogLeNet Inception v1 结构 及 pytorch、tensorflow、keras、paddle实现

 

环境

python3.6, torch 1.0.1,  torchvision 0.4.0,   torchsummary 1.5.1

 

代码

# -*- coding: utf-8 -*- 
# @Time : 2020/2/3 9:45 
# @Author : Zhao HL
# @File : InceptionV1-torch.py
import torch, torchvision
from torchvision import transforms
from torch import optim, argmax
from torch.nn import Conv2d, Linear, MaxPool2d, AvgPool2d, ReLU, Softmax, Dropout, Module, Sequential, CrossEntropyLoss
from torchsummary import summary
import sys, os
import numpy as np
from PIL import Image
import pandas as pd
from collections import OrderedDict
from my_utils import process_show, draw_loss_acc, dataInfo_show, dataset_divide

# region parameters
# region paths
Data_path = "./data/"
Data_csv_path = "./data/split.txt"
Model_path = 'model/'
Model_file_tf = "model/InceptionV1_tf.ckpt"
Model_file_keras = "model/InceptionV1_keras.h5"
Model_file_torch = "model/InceptionV1_torch.pth"
Model_file_paddle = "model/InceptionV1_paddle.model"
# endregion

# region image parameter
Img_size = 224
Img_chs = 3
Label_size = 1
Label_class = ['agricultural',
                'airplane',
                'baseballdiamond',
                'beach',
                'buildings',
                'chaparral',
                'denseresidential',
                'forest',
                'freeway',
                'golfcourse',
                'harbor',
                'intersection',
                'mediumresidential',
                'mobilehomepark',
                'overpass',
                'parkinglot',
                'river',
                'runway',
                'sparseresidential',
                'storagetanks',
                'tenniscourt']
Labels_nums = len(Label_class)
# endregion

# region net parameter
Conv1_kernel_size = 7
Conv1_chs = 64
Conv21_kernel_size = 1
Conv21_chs = 64
Conv2_kernel_size = 3
Conv2_chs = 192
Icp3a_size = (64, 96, 128, 16, 32, 32)
Icp3b_size = (128, 128, 192, 32, 96, 64)
Icp4a_size = (192, 96, 208, 16, 48, 64)
Icp4b_size = (160, 112, 224, 24, 64, 64)
Icp4c_size = (128, 128, 256, 24, 64, 64)
Icp4d_size = (112, 144, 288, 32, 64, 64)
Icp4e_size = (256, 160, 320, 32, 128, 128)
Icp5a_size = (256, 160, 320, 32, 128, 128)
Icp5b_size = (384, 192, 384, 48, 128, 128)
Out_chs1 = 128
Out_chs2 = 1024
# endregion

# region hpyerparameter
Learning_rate = 1e-3
Batch_size = 16
Buffer_size = 256
Infer_size = 1
Epochs = 20
Train_num = 1470
Train_batch_num = Train_num // Batch_size
Val_num = 210
Val_batch_num = Val_num // Batch_size
Test_num = 420
Test_batch_num = Test_num // Batch_size
# endregion
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


# endregion

class MyDataset(torch.utils.data.Dataset):
    def __init__(self, root_path, files_list=None, transform=None):
        self.root_path = root_path
        self.transform = transform
        self.files_list = files_list if files_list else os.listdir(root_path)
        self.size = len(files_list)

    def __len__(self):
        return self.size

    def __getitem__(self, index):
        img = Image.open(os.path.join(self.root_path, self.files_list[index]))
        if self.transform:
            img = self.transform(img)
        label_str = os.path.basename(self.files_list[index])[:-6]
        label = Label_class.index(label_str)
        return img, label


class InceptionV1_Model(Module):
    def __init__(self, input_chs, model_size):
        super(InceptionV1_Model, self).__init__()
        con1_chs, con31_chs, con3_chs, con51_chs, con5_chs, pool11_chs = model_size
        self.conv1 = Sequential(
            Conv2d(input_chs, con1_chs, kernel_size=1),
            ReLU(),
        )
        self.conv3 = Sequential(
            Conv2d(input_chs, con31_chs, kernel_size=1),
            ReLU(),
            Conv2d(con31_chs, con3_chs, kernel_size=3, padding=1),
            ReLU(),
        )
        self.conv5 = Sequential(
            Conv2d(input_chs, con51_chs, kernel_size=1),
            ReLU(),
            Conv2d(con51_chs, con5_chs, kernel_size=5, padding=2),
            ReLU(),
        )
        self.pool1 = Sequential(
            MaxPool2d(kernel_size=3, stride=1, padding=1),
            Conv2d(input_chs, pool11_chs, kernel_size=1),
            ReLU(),
        )

    def forward(self, input):
        out1 = self.conv1(input)
        out2 = self.conv3(input)
        out3 = self.conv5(input)
        out4 = self.pool1(input)
        result = torch.cat([out1, out2, out3, out4], dim=1)
        return result


class InceptionV1_Out(Module):
    def __init__(self, input_chs):
        super(InceptionV1_Out, self).__init__()
        self.pool1 = Sequential(
            AvgPool2d(kernel_size=5, stride=3),
        )
        self.conv = Sequential(
            Conv2d(input_chs, Out_chs1, kernel_size=1),
            ReLU(),
        )
        self.fc = Sequential(
            Dropout(p=0.3),
            Linear(Out_chs1 * 4 * 4, Out_chs2),
            Dropout(p=0.3),
            Linear(Out_chs2, Labels_nums),
            # Softmax(),
            # ReLU(),
        )

    def forward(self, input):
        x = self.pool1(input)
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x


class InceptionV1(Module):
    def __init__(self):
        super(InceptionV1, self).__init__()

        self.conv = Sequential(
            Conv2d(Img_chs, Conv1_chs, kernel_size=Conv1_kernel_size, stride=2, padding=3),
            ReLU(),
            MaxPool2d(kernel_size=3, stride=2,padding=1),
            Conv2d(Conv1_chs, Conv21_chs, kernel_size=Conv21_kernel_size),
            ReLU(),
            Conv2d(Conv21_chs, Conv2_chs, kernel_size=Conv2_kernel_size, padding=1),
            ReLU(),
            MaxPool2d(kernel_size=3, stride=2, padding=1),
        )
        self.inception3a = InceptionV1_Model(192, Icp3a_size)
        self.inception3b = InceptionV1_Model(256, Icp3b_size)
        self.pool1 = MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception4a = InceptionV1_Model(480, Icp4a_size)
        if self.training == True:
            self.out1 = InceptionV1_Out(512)
        self.inception4b = InceptionV1_Model(512, Icp4b_size)
        self.inception4c = InceptionV1_Model(512, Icp4c_size)
        self.inception4d = InceptionV1_Model(512, Icp4d_size)
        if self.training == True:
            self.out2 = InceptionV1_Out(528)
        self.inception4e = InceptionV1_Model(528, Icp4e_size)
        self.pool2 = MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.inception5a = InceptionV1_Model(832, Icp5a_size)
        self.inception5b = InceptionV1_Model(832, Icp5b_size)
        self.pool3 = AvgPool2d(kernel_size=7, stride=1, )
        self.linear = Sequential(
            Dropout(p=0.4),
            Linear(1024, Labels_nums),
            # Softmax(),
            # ReLU(),
        )


    def forward(self, input):
        x = self.conv(input)
        x = self.inception3a(x)
        x = self.inception3b(x)
        x = self.pool1(x)
        x = self.inception4a(x)
        if self.training == True:
            output1 = self.out1(x)
        x = self.inception4b(x)
        x = self.inception4c(x)
        x = self.inception4d(x)
        if self.training == True:
            output2 = self.out2(x)
        x = self.inception4e(x)
        x = self.pool2(x)
        x = self.inception5a(x)
        x = self.inception5b(x)
        x = self.pool3(x)
        x = x.view(x.size(0), -1)
        output = self.linear(x)

        if self.training == True:
            return output1, output2, output
        else:
            return output


def train(structShow=False):
    transform = transforms.Compose([
        transforms.Resize((Img_size, Img_size)),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225]),
    ])
    df = pd.read_csv(Data_csv_path, header=0, index_col=0)
    train_list = df[df['split'] == 'train']['filename'].tolist()
    val_list = df[df['split'] == 'val']['filename'].tolist()
    train_dataset = MyDataset(Data_path, files_list=train_list, transform=transform)
    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=Batch_size, shuffle=True)
    val_dataset = MyDataset(Data_path, files_list=val_list, transform=transform)
    val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=Batch_size, shuffle=True)

    model = InceptionV1().to(device)
    model.train()
    if structShow:
        print(summary(model, (3, 224, 224)))
    # if os.path.exists(Model_file_torch):
    #     model.load_state_dict(torch.load(Model_file_torch))
    #     print('get model from',Model_file_torch)

    criterion = CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=Learning_rate)

    train_loss = np.ones(Epochs)
    train_acc = np.ones(Epochs)
    val_loss = np.ones(Epochs)
    val_acc = np.ones(Epochs)
    best_loss = float("inf")
    best_loss_epoch = 0
    for epoch in range(Epochs):
        print('Epoch %d/%d:' % (epoch + 1, Epochs))
        train_sum_loss = 0
        train_sum_acc = 0
        val_sum_loss = 0
        val_sum_acc = 0
        model.train()
        with torch.set_grad_enabled(True):
            for batch_num, (images, labels) in enumerate(train_loader):
                images, labels = images.to(device), labels.to(device)

                optimizer.zero_grad()

                output, output1, output2 = model(images)
                loss = criterion(output, labels)
                loss1 = criterion(output1, labels)
                loss2 = criterion(output2, labels)
                total_loss = 0.6*loss + 0.2*loss1 + 0.2*loss2
                train_sum_loss += total_loss.item()

                total_loss.backward()
                optimizer.step()

                _, predicted = torch.max(output.data, 1)
                correct = (predicted == labels).sum().float()
                acc = correct / labels.size(0)
                train_sum_acc += acc

                process_show(batch_num + 1, len(train_loader), acc, loss, prefix='train:')

        model.eval()
        with torch.set_grad_enabled(False):
            for batch_num, (images, labels) in enumerate(val_loader):
                images, labels = images.to(device), labels.to(device)

                output = model(images)
                loss = criterion(output, labels)
                val_sum_loss += loss.item()

                _, predicted = torch.max(output.data, 1)
                correct = (predicted == labels).sum().float()
                acc = correct / labels.size(0)
                val_sum_acc += acc

                process_show(batch_num + 1, len(val_loader), acc, loss, prefix='val:')

        train_sum_loss /= len(train_loader)
        train_sum_acc /= len(train_loader)
        val_sum_loss /= len(val_loader)
        val_sum_acc /= len(val_loader)

        train_loss[epoch] = train_sum_loss
        train_acc[epoch] = train_sum_acc
        val_loss[epoch] = val_sum_loss
        val_acc[epoch] = val_sum_acc

        print('average summary:\ntrain acc %.4f, loss %.4f ; val acc %.4f, loss %.4f'
              % (train_sum_acc, train_sum_loss, val_sum_acc, val_sum_loss))
        if val_sum_loss < best_loss:
            print('val_loss improve from %.4f to %.4f, model save to %s ! \n' % (
            best_loss, val_sum_loss, Model_file_torch))
            best_loss = val_sum_loss
            best_loss_epoch = epoch + 1
            torch.save(model.state_dict(), Model_file_torch)
        else:
            print('val_loss do not improve from %.4f \n' % (best_loss))
    print('best loss %.4f at epoch %d \n' % (best_loss, best_loss_epoch))
    draw_loss_acc(train_loss, train_acc, 'train')
    draw_loss_acc(val_loss, val_acc, 'val')


if __name__ == '__main__':
    pass
    # dataset_divide(r'E:\_Python\01_deeplearning\04_GoogLeNet\Inception1\data\split.txt')
    train(structShow=True)

my_utils.py

# -*- coding: utf-8 -*- 
# @Time : 2020/1/21 11:39 
# @Author : Zhao HL
# @File : my_utils.py
import sys,os,random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from PIL import Image
def process_show(num, nums, train_acc, train_loss, prefix='', suffix=''):
    rate = num / nums
    ratenum = int(round(rate, 2) * 100)
    bar = '\r%s batch %3d/%d:train accuracy %.4f, train loss %00.4f [%s%s]%.1f%% %s; ' % (
        prefix, num, nums, train_acc, train_loss, '#' * (ratenum//2), '_' * (50 - ratenum//2), ratenum, suffix)
    sys.stdout.write(bar)
    sys.stdout.flush()
    if num >= nums:
        print()

def dataInfo_show(data_path,csv_pth,cls_dic_path,shapesShow=True,classesShow=True):
    cls_dict = get_cls_dic(cls_dic_path)
    if classesShow:
        print('\n'+'*'*50)
        df = pd.read_csv(csv_pth)
        labels = df['label'].unique()
        label_cls = {label:cls_dict[label] for label in labels}
        print(label_cls)
        cls_count = df['label'].value_counts()
        cls_count = {cls_dict[k]:v for k,v in cls_count.items()}
        for k,v in cls_count.items():
            print(k,v)

    if shapesShow:
        print('\n'+'*'*50)
        shapes = []
        for filename in os.listdir(data_path):
            img = Image.open(os.path.join(data_path, filename))
            img = np.array(img)
            shapes.append(img.shape)
        shapes = pd.Series(shapes)
        print(shapes.value_counts())

def get_cls_dic(cls_dic_path):
    # 读取类标签字典,只取第一个逗号前的信息
    cls_df = pd.read_csv(cls_dic_path)
    cls_df['cls'] = cls_df['info'].apply(lambda x:x[:9]).tolist()
    cls_df['label'] = cls_df['info'].apply(lambda x: x[10:]).tolist()
    cls_df = cls_df.drop(columns=['info','other'])

    cls_dict = cls_df.set_index('cls').T.to_dict('list')
    cls_dict = {k:v[0] for k,v in cls_dict.items()}
    return cls_dict

def dataset_divide(csv_pth):
    cls_df = pd.read_csv(csv_pth, header=0,index_col=0)
    cls_df.insert(1,'split',None)
    filenames = list(cls_df['filename'])
    random.shuffle(filenames)
    train_num,train_val_num = int(len(filenames)*0.7),int(len(filenames)*0.8)
    train_names = filenames[:train_num]
    val_names = filenames[train_num:train_val_num]
    test_names = filenames[train_val_num:]
    cls_df.loc[cls_df['filename'].isin(train_names),'split'] = 'train'
    cls_df.loc[cls_df['filename'].isin(val_names), 'split'] = 'val'
    cls_df.loc[cls_df['filename'].isin(test_names), 'split'] = 'test'
    cls_df.to_csv(csv_pth)

def draw_loss_acc(loss,acc,type='',save_path=None):
    assert len(acc) == len(loss)
    x = [epoch for epoch in range(len(acc))]
    plt.subplot(2, 1, 1)
    plt.plot(x, acc, 'o-')
    plt.title(type+'  accuracy vs. epoches')
    plt.ylabel('accuracy')
    plt.subplot(2, 1, 2)
    plt.plot(x, loss, '.-')
    plt.xlabel(type+'  loss vs. epoches')
    plt.ylabel('loss')
    plt.show()
    if save_path:
        plt.savefig(os.path.join(save_path,type+"_acc_loss.png"))


if __name__ == '__main__':
    pass

 

你可能感兴趣的:(Pytorch,DL-Code)