街景字符编码识别-模型训练与验证

在模型训练前,我们完成了数据处理和模型分析。
经过模型分析决定先采用baseline+全连接层的网络先得到初步结果,再根据模型的问题调优。baseline选择resnet18(因为数据集简单一点,先用小网络快速得到结果),采用交叉熵函数作为损失函数,用Adam优化器进行训练。
下面我们将构建模型进行训练。首先定义训练和测试的数据载入,此处利用dataloader来载入数据。

dataPath = 'your-data-path'
trainImgPath = dataPath+'mchar_train/mchar_train/'
ValImgPath = dataPath+'mchar_val/mchar_val/'
trainLabelPath = dataPath+'mchar_train.json'
ValLabelPath = dataPath+'mchar_val.json'

class TrainDataLoader(Dataset):
    def __init__(self, root, csvPath, json_Dir):
        data = []
        self.root = root
        with open(csvPath, 'r') as csvfile:
            csv_reader = csv.reader(csvfile)
            for row in csv_reader:  
                data.append(row[0])
        with open(json_Dir, 'r') as f:
            info = json.load(f)
        self.dataList = data
        self.InfoDict = info
        self.num = len(self.dataList)
        
    def __len__(self):
        return self.num

    def ImgProcess(self, img):
        # ColorJitter(brightness=0, contrast=0, saturation=0, hue=0)
        # RandomRotation(degrees, resample=False, expand=False, center=None) 在(-degrees,+degrees)之间随机旋转
        # transforms.ToTensor, 将PIL Image或者 ndarray 转换为tensor,并且归一化至[0-1]
        transform = transforms.Compose([transforms.Resize((64, 128)),
                                        transforms.ColorJitter(0.3, 0.3, 0.2), 
                                        transforms.RandomRotation(degrees=10,fill=None),
                                        transforms.ToTensor()])
        imTensor = transform(img) # H,W,N*C
        return imTensor.float()

    def __getitem__(self, idx):
        # print('data path: ', self.root+self.dataList[idx])
        imgName = self.dataList[idx]
        img = Image.open(self.root+imgName) 
        imgInfo = self.InfoDict[imgName]
        imgTensor = self.ImgProcess(img)
        label = imgInfo['label']
        label += [10]*(6-len(label)) ## 标签字符填充
        label = torch.Tensor(label).type(torch.LongTensor)
        # print('label:', label)      
        sample = {'image':imgTensor, 'label':label}
        return sample
    
class ValDataLoader(Dataset):
    def __init__(self, root, csvPath, json_Dir):
        data = []
        self.root = root
        with open(csvPath, 'r') as csvfile:
            csv_reader = csv.reader(csvfile)
            for row in csv_reader:  
                data.append(row[0])
        with open(json_Dir, 'r') as f:
            info = json.load(f)
        self.dataList = data
        self.InfoDict = info
        self.num = len(self.dataList)
        # self.transform = transform

    def __len__(self):
        return self.num

    def ImgProcess(self, img):
        transform = transforms.Compose([transforms.Resize((64, 128)),
                                        transforms.ToTensor()])
        imTensor = transform(img) # H,W,N*C
        return imTensor.float()

    def __getitem__(self, idx):
        # print('data path: ', self.root+self.dataList[idx])
        imgName = self.dataList[idx]
        img = Image.open(self.root+imgName) 
        imgInfo = self.InfoDict[imgName]
        imgTensor = self.ImgProcess(img)
        label = imgInfo['label']
        label = label + [10]*(6-len(label)) ## 标签字符填充
        label = torch.Tensor(label).type(torch.LongTensor)
        # print('label:', label)      
        sample = {'image':imgTensor, 'label':label}
        return sample

定义模型

class model_resnet18(nn.Module):
    def __init__(self):
        super(model_resnet18, self).__init__()

        model_conv = models.resnet18()
        model_conv.avgpool = nn.AdaptiveAvgPool2d(1)
        model_conv = nn.Sequential(*list(model_conv.children())[:-1])
        self.cnn = model_conv.cuda()

        self.fc1 = nn.Linear(512, 11)
        self.fc2 = nn.Linear(512, 11)
        self.fc3 = nn.Linear(512, 11)
        self.fc4 = nn.Linear(512, 11)
        self.fc5 = nn.Linear(512, 11)
        self.fc6 = nn.Linear(512, 11)

    def forward(self, img):        
        feat = self.cnn(img)
        # print(feat.shape)
        feat = feat.view(feat.shape[0], -1)
        c1 = self.fc1(feat)
        c2 = self.fc2(feat)
        c3 = self.fc3(feat)
        c4 = self.fc4(feat)
        c5 = self.fc5(feat)
        c6 = self.fc6(feat)
        return c1, c2, c3, c4, c5, c6

验证和测试

def validate(val_loader, model, criterion):
    # 切换模型为预测模型
    model.eval()
    val_loss = []

    with torch.no_grad():
        for step, sample in enumerate(val_loader):
            imgTest = sample['image'].to(device)
            labelTest = sample['label'].to(device)
            
            c1, c2, c3, c4, c5, c6 = model(imgTest)
            loss = criterion(c1, labelTest[:, 0]) + \
                    criterion(c2, labelTest[:, 1]) + \
                    criterion(c3, labelTest[:, 2]) + \
                    criterion(c4, labelTest[:, 3]) + \
                    criterion(c5, labelTest[:, 4]) + \
                    criterion(c6, labelTest[:, 5])
            val_loss.append(loss.item())
            if(step%100==0):
                print('test step: [%d],step loss: [%.4f]' % (step, loss.item()))
    return np.mean(val_loss)

def predict(test_loader, model, criterion, use_gpu=True):
    # 切换模型为预测模型
    model.eval()
    val_loss = []

    with torch.no_grad():
        for step, sample in enumerate(val_loader):
            mgData = sample['image'].to(device)
            label = sample['label'].to(device)
            
            c1, c2, c3, c4, c5, c6 = model(input)
            output = np.concatenate([c1, c2, c3, c4, c5, c6])
    return output

一些用到的函数

def mk_dir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)

def weights_init_model(m):
    classname = m.__class__.__name__
    if classname.find('Conv2d') != -1:
        # init.kaiming_normal_(m.weight.data)
        init.xavier_uniform_(m.weight.data)
        
def Adjust_learning_rate(optimizer, epoch, base_lr=1e-4):
    Ur = 100 # Update Range
    dr = 0.25 # decay rate
    if ((epoch%Ur)==0):
        lr = base_lr * (dr**(epoch//Ur)) #学习率呈指数衰减(近似模拟指数衰减)
        print('epoch ',epoch,' Update LR: ',optimizer.param_groups[0]['lr'],' -> ',lr)
        optimizer.param_groups[0]['lr'] = lr

模型训练+验证,用tensorboard来记录训练情况,log文件被保存在log文件夹下。

## 预训练模型下载: https://blog.csdn.net/Jorbo_Li/article/details/106248808
import numpy as np
import os
import csv
import json
import math
from PIL import Image
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms, models 
import torch
from torch.nn import init
import torch.nn as nn
from tensorboardX import SummaryWriter
from torch.nn import functional as F

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DataRootPath = dataPath
log_path = dataPath+'log/'
SavePath = dataPath+'checkpoint/'
mk_dir(log_path)
mk_dir(SavePath)

BATCH_SIZE = 16
EPOCH_TOTAL = 100
learning_rate = 1e-5
# use_gpu = True
train_mode = 1 # 0-init, 1-continue

train_dataset = TrainDataLoader(DataRootPath+'mchar_train/mchar_train/', DataRootPath+'mchar_train/train.csv',DataRootPath+'mchar_train/mchar_train.json' )
train_num = len(train_dataset)
train_loader = DataLoader(dataset = train_dataset, batch_size = BATCH_SIZE, shuffle = True)
test_dataset = ValDataLoader(DataRootPath+'mchar_val/mchar_val/', DataRootPath+'mchar_val/test.csv',DataRootPath+'mchar_val/mchar_val.json')
test_loader = DataLoader(dataset = test_dataset, batch_size = BATCH_SIZE)

STEP_MAX = math.ceil(len(train_loader.dataset) / BATCH_SIZE)
model = model_resnet18()
##====== init/load weight ======##
if(train_mode):
    print('continue train: ',train_mode)
    checkpoint = torch.load(SavePath+'model.pth')
    dict_trained = checkpoint['net']
    epochDone = checkpoint['epoch']
else:
    print('first train',train_mode)
    dict_trained = torch.load(SavePath+'resnet18-5c106cde.pth')
    model.apply(weights_init_model)
    model_dict = model.state_dict()
    loaded_dict = {k: v for k, v in dict_trained.items() if k in model_dict}
    model_dict.update(loaded_dict)
    model.load_state_dict(model_dict)

model.to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, betas=(0.9, 0.999),eps=1e-08)
criterion = nn.CrossEntropyLoss() # pytorch里交叉熵已经包含了softmax操作
writer = SummaryWriter(log_path)
best_loss = 1000.0
modelFileName = 'model_resnet18'

for epoch in range(EPOCH_TOTAL):
    if (train_mode):
        epoch += epochDone
    Adjust_learning_rate(optimizer, epoch, learning_rate)
    epoch_loss = 0
    print('validate epoch %d' % epoch)
    val_loss = validate(test_loader, model, criterion)
    writer.add_scalar(modelFileName + '/data/Val_step_loss',val_loss.item(), epoch)
    if val_loss < best_loss:
        best_loss = val_loss
        # print('Find better model in Epoch {0}, saving model.'.format(epoch))
        state = {'net':model.state_dict(), 'epoch':epoch }
        torch.save(state, SavePath+'/model.pth')
    for step, sample in enumerate(train_loader):
        imgData = sample['image'].to(device)
        label = sample['label'].to(device)
        
        model.train()    
        optimizer.zero_grad()  
        c1, c2, c3, c4, c5, c6 = model(imgData)
        step_loss = criterion(c1, label[:, 0]) + \
                    criterion(c2, label[:, 1]) + \
                    criterion(c3, label[:, 2]) + \
                    criterion(c4, label[:, 3]) + \
                    criterion(c5, label[:, 4]) + \
                    criterion(c6, label[:, 5])
        step_loss.backward()
        optimizer.step()
        epoch_loss += step_loss.item()
        if(step%100 == 0):
            print('epoch: [%d/%d], step: [%d/%d],step loss: [%.4f]' % ( \
                epoch, EPOCH_TOTAL, step, STEP_MAX, step_loss.item()))
        writer.add_scalar(modelFileName + '/data/step_loss',step_loss.item(), step+epoch*STEP_MAX)
    epoch_loss = epoch_loss / (step+1)
    writer.add_scalar(modelFileName + '/data/epoch_loss',epoch_loss,epoch)

writer.close()
print('Done!')

在预训练模型初始化情况下,学习率不要设置过大,否则容易过拟合。

你可能感兴趣的:(街景字符编码识别-模型训练与验证)