pytorch神经网络训练及测试流程&代码

神经网络的训练及测试其实是个相对固定的流程,下面进行详细说明,包括命令行设置基本参数、如数据集路径等其他参数的设置、学习率、损失函数、模型参数的保存与加载及最终train.py与test.py的main()函数写法

当你已经设计好了一个神经网络模型MyModel,它可以在model_my.py中封装成MyNet:

class MyModel(nn.Module):
	def __init__(self, variable1, variable2, ...):
		super(MyModel, self).__init__()
		self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, bias=False)
		self.bn1 = nn.BatchNorm2d(64, affine=affine_par)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1, ceil_mode=True)
		......
	def forward(self, input1, input2, input3):
		......
		return ouput1, output2, output3, ...
		
def MyNet([needed_variables]):
	model = MyModel(variable1, variable2, ...)
	return model

train.pytest.py文件中,调用该函数以引入该网络:

from xxx import MyNet
......
model = MyNet([needed_variables])
......

1、训练&测试的py文件都要有的函数

get_arguments():获取所需参数

def get_arguments():
	parser = argparse.ArgumentParser(description="myNet") # 创建解析器,desciption是说明文字
	
	# 进行参数的设置,以学习率为例,learning-rate是参数名字,type=float设置数据类型,default设置默认值,help的内容是说明文字
	parser.add_argument("--learning-rate", type=float, default=0.001, help="Base learning rate for training.") 
	......
	parser.add_argument("--dataset", type=str, default='davis16', help="duts, coco, or davis16.")
	# GPU的设置
    parser.add_argument("--cuda", default=True, help="Run on CPU or GPU")
    parser.add_argument("--gpus", type=str, default="1", help="choose gpu device.") #使用1号GPU(注意,是从0开始排的)
    
    return parser.parse_args()

比如,当要运行mytrain.py文件时,进入到同一目录下,在终端命令行输入:python mytrain.py --learning-rate 0.002 --dataset duts --gpus 3,即可手动设置对应参数值,其余参数则使用默认值

configure_dataset_model(args):添加对应数据集的路径等参数

def configure_dataset_model(args):
	if args.dataset == 'davis16':
		args.batch_size = 5 # 每次输入网络的图片数量
		args.maxEpoches = 15 # maxIterations= maxEpoches*len(train_aug)/batch_size_per_gpu
		args.data_dir = 'mypath/dataset/DAVIS16' # 数据集的路径
		args.data_list = 'mypath/dataset/DAVIS16/train_seqs.txt'
		# args.data_list = 'mypath/dataset/DAVIS16/test_seqs.txt' # 准备好的训练或测试的视频序列集名字,txt文件中每行都是一个视频序列名字,后面有举例
		args.input_size = '473,473' # 统一输入图片的大小,可选
		......
		args.restore_from = './pretrained/deep_labv3/deeplab_davis_12_0.pth' # 需要用到的预训练模型,根据需要更改,这里是训练阶段要用到deeplabv3模型
		# args.restore_from = './snapshots/davis_iteration/mynet_555.pth' # 测试阶段用的是这个,我们网络训练好的模型
		args.snapshot_dir = './snapshots/davis_iteration' # 保存训练模型的路径,测试阶段不需要
		args.save_dir = './result/test/' # 测试阶段保存输出图片的路径
		
	elif args.dataset == 'duts':
		......
		# 每个数据集都是同上操作
	
	else:
		print("dataset error") # 做一个数据集不存在的报错反馈
# test_seqs.txt
blackswan
bmx-trees
breakdance
camel
car-roundabout
car-shadow
cows
dance-twirl
dog
drift-chicane
drift-straight
goat
horsejump-high
kite-surf
libby
motocross-jump
paragliding-launch
parkour
scooter-black
soapbox

1.1 初始化模型参数

这样一来,在main函数中我们就可以初始化所有模型参数

def main():
	args = get_arguments()
	print("=====> Configure dataset and model")
	configure_dataset_model(args)
	print(args)
	
	# 设置训练的GPU
	print("=====> Set GPU for training")
    if args.cuda:
        print("====> Use gpu id: '{}'".format(args.gpus))
        os.environ["CUDA_VISIBLE_DEVICES"] = args.gpus
        if not torch.cuda.is_available():
            raise Exception("No GPU found or Wrong gpu id, please run without --cuda") # 如果没有GPU导致出现提示,需要在最开始运行的时候命令'--cuda False'
            
    # 训练时需要的模块:设置生成随机数的种子,使得每次运行该文件的输出结果都一样,而不是每次随机函数生成的结果一样
    print("=====> Random Seed: ", args.random_seed)
    torch.manual_seed(args.random_seed)
    if args.cuda:
        torch.cuda.manual_seed(args.random_seed) 
    ......

数据集的加载及预处理

为了模块化,我们新建一个pre_dataset.py文件作为数据预处理模块:

class PreData(Dataset):
	def __init__(self, data_path, data_list):
        self.fwflow_list = []
        self.bwflow_list = []
        self.img_list = []
        self.label_list = []

        with open(data_list) as f:
        	seqs = f.readlines()
        	seqs = [seq.strip() for seq in seqs]
        print(seqs)
        
        # 以DAVIS16为例:
        for i in seqs:
            self.img_list+=sorted(glob.glob(os.path.join(data_path, "JPEGImages/480p", i, "*.jpg")))[:-1]
            self.label_list+=sorted(glob.glob(os.path.join(data_path, "Annotations/480p", i, "*.png")))[:-1]
            self.fwflow_list+=sorted(glob.glob(os.path.join(data_path, "davis_flow", i, "*.png")))
            self.bwflow_list+=sorted(glob.glob(os.path.join(data_path, "davis_bwflow", i, "*.png")))
            
	def __len__(self):
        return len(self.img_list)
        
	def __getitem__(self, item): # 这里的网络输入为视频序列中两帧图片+对应光流图,按需修改这个函数
        frame = [item]
        scope = 10 # 设置最大随机范围
        other = np.random.randint(-scope, scope)
        while item + other >= self.dataset_len or item + other < 0 or other == 0:
            other = np.random.randint(-scope, scope)
            #print(item, other)
        name1 = self.img_list[item]
        name2 = self.img_list[item + other]
        #print(name1,name2)
        
        while name1.split('/')[-2] != name2.split('/')[-2]:
            other = np.random.randint(-scope, scope)
            while item + other >= self.dataset_len or item + other < 0 or other == 0:
                other = np.random.randint(-scope, scope)
                #print(item,other)
            name2 = self.img_list[item + other]
            # print('in')

        frame.append(item + other) # 当前帧和随机挑选视频序列的另一帧作为一组输入
        
        videos, labels, fwflows, bwflows = [], [], [], []
        for i in frame:
            video = imread(self.img_list[i])
            fw = imread(self.fwflow_list[i])
            bw = imread(self.bwflow_list[i])
            label = imread(self.label_list[i])
            if len(label.shape) == 3:
                label = label[:, :, 0]
            label = label[:, :, np.newaxis]
            videos.append(img_normalize(video.astype(np.float32) / 255.))
            labels.append(label.astype(np.float32) / 255.)
            fwflows.append(img_normalize(fw.astype(np.float32) / 255.))
            bwflows.append(img_normalize(bw.astype(np.float32) / 255.))
            H, W = labels[0].shape[0], labels[0].shape[1]
            #print(H,W)
        return {'video': F.interpolate(torch.from_numpy(np.stack(videos, 0)).permute(0, 3, 1, 2), (self.H, self.W), mode='bilinear', align_corners=True),
                'fwflow': F.interpolate(torch.from_numpy(np.stack(fwflows, 0)).permute(0, 3, 1, 2), (self.H, self.W), mode='bilinear', align_corners=True),
                'bwflow': F.interpolate(torch.from_numpy(np.stack(bwflows, 0)).permute(0, 3, 1, 2), (self.H, self.W), mode='bilinear', align_corners=True),
                "label":torch.from_numpy(np.stack([labels[0]], 0)).permute(0, 3, 1, 2),
                "H":H, "W":W, 'name': self.img_list[item].split("/")[-2]+"/"+self.img_list[item].split("/")[-1]} #返回需要的数据

# 图像颜色的归一化函数(统一为灰度图),在上面的__getitem__中有用到
def img_normalize(image):
    if len(image.shape)==2:
        channel = (image[:, :, np.newaxis] - 0.485) / 0.229
        image = np.concatenate([channel,channel,channel], axis=2)
    else:
        image = (image-np.array([0.485, 0.456, 0.406], dtype=np.float32).reshape((1, 1, 3)))\
                /np.array([0.229, 0.224, 0.225], dtype=np.float32).reshape((1, 1, 3))
    return image

1.2 加载数据集

这样在训练或测试文件的main函数中就可以加载数据集了:

def main():
	......
	if args.dataset == 'davis':
        h, w = map(int, args.input_size.split(','))
        input_size = (h, w)
        # 测试集为例:
        db_test = db.PairwiseImg(data_path=args.data_dir, data_list=args.data_list)
        testloader = data.DataLoader(db_test, batch_size= 1, shuffle=False, num_workers=0)
        
	elif args.dataset == 'duts':
		......

2、训练还需要有的东西

设置学习率的自适应

有许多方法可用,下面只是一个例子

def adjust_learning_rate(optimizer, decay_count, decay_rate=.9):
    for param_group in optimizer.param_groups:
        param_group['lr'] = max(1e-5, 5e-4 * pow(decay_rate, decay_count))
        print(param_group['lr'])

损失函数

举个例子,如果我需要用到二进制交叉熵损失bce,那么定义这样一个函数:

bce_loss = nn.BCELoss(reduction='mean')

def bce_loss(pred, target):
	loss = 0
	bce_out = bce_loss(pred, target)
	loss += bce_out
	return loss

按需封装成相应的函数,后续使用直接调用就行

训练网络准备

设置优化器

def main():
	......#(前面提到的初始化模型参数部分)
	param_group = [{'params': get_lr_params(model), 'lr': 1*args.learning_rate },
                {'params': get_last_lr_params(model), 'lr': 10*args.learning_rate}] #针对特定层进行学习
	optimizer = optim.SGD(param_group, lr=args.learning_rate, momentum=args.momentum, weight_decay=args.weight_decay) # SGD:随机梯度下降,也可按需改成别的优化方法
	optimizer.zero_grad() # 将每个参数的梯度值都置为0,即初始化
	......

其中:

def get_lr_params(model):
    """
    返回网络的所有参数(不包括最后的分类层)
    """
    b = []
    if torch.cuda.device_count() == 1: # 当只使用一个GPU进行训练时,个人认为由于空间有限,所以只针对某个层进行学习
        b.append(model.encoder.layer3)
    else: # 当使用多个GPU进行训练时
        b.append(model.module.encoder.conv1)
        b.append(model.module.encoder.bn1)
        b.append(model.module.encoder.layer1)
        b.append(model.module.encoder.layer2)
        b.append(model.module.encoder.layer3)
        b.append(model.module.encoder.main_classifier)
    for i in range(len(b)):
        for j in b[i].modules():
            jj = 0
            for k in j.parameters():
                jj+=1
                if k.requires_grad:
                    yield k
                    
	# 也可以直接append参数,则改成:
	b = []
    if torch.cuda.device_count() == 1:
        b.append(model.encoder.layer3.parameters())
    else:
        b.append(model.module.encoder.conv1.parameters())
        b.append(model.module.encoder.bn1.parameters())
        b.append(model.module.encoder.layer1.parameters())
        b.append(model.module.encoder.layer2.parameters())
        b.append(model.module.encoder.layer3.parameters())
        b.append(model.module.encoder.main_classifier.parameters())
	for j in range(len(b)):
        for i in b[j]:
            yield i
            
def get_last_lr_params(model):
	"""
    返回网络最后分类层的的所有参数
    """
    ...... # 同上类似的操作

创建网络

def main():
	......#(初始化模型参数+设置优化器+加载数据集)
	print("=====> Building network")
    saved_state_dict = torch.load(args.restore_from) #载入deeplabv3模型(参数)
    model = MyNet([needed_variables]) #引入GNNNet网络
    new_params = model.state_dict().copy() #保存GNNNet参数
    calt = 0
    for i in saved_state_dict["model"]:
        i_parts = i.split('.') # 针对多GPU的情况
        print('i_parts:  ', '.'.join(i_parts[1:-1]))
        new_params['encoder'+'.'+'.'.join(i_parts[1:])] = saved_state_dict["model"][i]
    
    print("=====> Loading init weights")       
    model.load_state_dict(new_params)
    if args.cuda:
        if torch.cuda.device_count()>1:
            print("torch.cuda.device_count()=",torch.cuda.device_count())
            model = torch.nn.DataParallel(model).cuda()
            print("more than 1 gpu")
        else:
            print("single GPU for training")
            model = model.cuda()

    model.train() #把模型设置成训练模式
    cudnn.benchmark = True

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir) # 新建保存模型的文件夹
    
    print('=====> Computing network parameters')
    total_paramters = netParams(model)
    print('Total network parameters: ' + str(total_paramters))
	
	# 日志文件
	logFileLoc = args.snapshot_dir + args.logFile
    if os.path.isfile(logFileLoc):
        logger = open(logFileLoc, 'a')
    else:
        logger = open(logFileLoc, 'w')
        logger.write("Parameters: %s" % (str(total_paramters)))
        logger.write("\n%s\t\t%s" % ('iter', 'Loss(train)\n'))
    logger.flush()

# 计算网络参数数量的函数(上面用到了)
def netParams(model):
    '''
    Computing total network parameters
    Args:
       model: model
    return: total network parameters
    '''
    total_paramters = 0
    for parameter in model.parameters():
        i = len(parameter.size())
        #print(parameter.size())
        p = 1
        for j in range(i):
            p *= parameter.size(j)
        total_paramters += p

    return total_paramters

3、训练部分train.py的main函数

def main():
	start = timeit.default_timer() # 开始时间
	......#(网络创建部分)
	print("=====> Begin to train")
    train_len=len(trainloader)
    print("  iteration numbers  of per epoch: ", train_len)
    print("  epoch num: ", args.maxEpoches)
    print("  max iteration: ", args.maxEpoches*train_len)

	for epoch in range(1, int(args.maxEpoches)):
        running_loss = 0.0
        ite_num_per = 0
        iter_num = 0
        datasampler.set_epoch(epoch)
        model.train()
        i=0
        
        if epoch>15:
            adjust_learning_rate(optimizer, (epoch-20))# 学习率自适应
            
        for data in trainloader:
            ite_num_per = ite_num_per + 1
            i+=1
            iter_num = iter_num + 1
            img, fw_flow, bw_flow, label = data['video'].cuda(), \
                                           data['fwflow'].cuda(),\
                                           data['bwflow'].cuda(),\
                                           data['label'].cuda()
            B, Seq, C, H, W = img.size()
            
            spatial_out, temporal_out = model(img, torch.cat((fw_flow, bw_flow), 2)) # 网络的输出,这个根据自己的网络模型来写
            
            spatial_loss = bce_loss(spatial_out, label.view(B * Seq, 1, H, W))
            temporal_loss = bce_loss(temporal_out, label.view(B * Seq, 1, H, W)) # 这里假设用的是我们前面写的bce_loss函数
            loss = spatial_loss + temporal_loss
            running_loss += loss.item()  # 总体损失
            loss.backward() # 反向传播,计算当前梯度
            optimizer.step() # 根据梯度更新网络参数
            optimizer.zero_grad() # 清空之前的梯度

            print("[epoch: {}/{}, iter: {}/{}, iter: {}] train loss: {:.5f}".format(epoch, epoch_num, i, len(dataloader), iter_num, running_loss / ite_num_per))
            logger.write("Epoch[{}]({}/{}):     Loss: {:.10f}      lr: {:.5f}\n".format(epoch, i_iter, train_len, loss, lr)) # 写日志文件
            logger.flush() # 刷新缓冲区
            
            print("=====> saving model")
            torch.save({'state_dict': model.state_dict(), 'optimizer': optimizer.state_dict()}, args.snapshot_dir + "epoch_{}_loss_{:.5f}.pth".format(epoch, running_loss / ite_num_per)) # 保存当前模型参数

	end = timeit.default_timer() # 结束时间
    print(float(end-start)/3600, 'h') # 整体训练时长
    logger.write("total training time: {:.2f} h\n".format(float(end-start)/3600))
    logger.close()

4、测试部分test.py的main函数

测试部分相对简单,不需要额外的函数了,可以直接写出测试部分的main函数代码:

def main():
	......#(初始化模型参数+加载数据集)
	# 加载训练好的网络模型参数
	print("=====> Loading network") 
    model = MyNet([needed_variables]).cuda()
    for param in model.parameters():
        param.requires_grad = False
    saved_state_dict = torch.load(args.restore_from)
	model_dict = model.state_dict()
	pretrained_dict = {k[7:]: v for k, v in saved_state_dict.items() if k[7:] in model_dict}
	model_dict.update(pretrained_dict)
	model.load_state_dict(model_dict)
    model.eval()
    
    start = timeit.default_timer() # 开始时间
    num = 0 # 计算数量
    for data in testloader:
		img, fw_flow, bw_flow, H, W = data['video'].cuda(), data['fwflow'].cuda(), data['bwflow'].cuda(), data["H"].cuda(), data["W"].cuda()
		flow = torch.cat((fw_flow, bw_flow), 2)
		with torch.no_grad():
			out, _ = model(img, flow)
			
		# 对模型的输出结果进行相应处理,按照自己需要来写
		out = F.interpolate(out[0], (H, W), mode='bilinear', align_corners=True)
		out = out[0, 0].cpu().numpy()
		out = (out - np.min(out) + 1e-12) / (np.max(out) - np.min(out) + 1e-12) * 255.
		out = out.astype(np.uint8)
		
		save_folder = args.save_dir + "davis16/" + data['name'][0].split("/")[-2]
		if not os.path.exists(save_folder):
			os.makedirs(save_folder) # 新建测试结果保存的文件夹
		imwrite(save_folder + "/" + data['name'][0].split("/")[-1], out) # 将输出的图片保存到文件夹内
		print('save: '+ data['name'][0])
		num += 1
	end = timeit.default_timer() # 结束时间
	total_time = end-start
	print('total_time:' + str(total_time) + ', fps:' + str(num / total_time)) # fps是frame per second,每秒能够完成的图片数量

参考于以下文献提供的代码:
[1] Wang W, Lu X, Shen J, et al. Zero-shot video object segmentation via attentive graph neural networks[C]//Proceedings of the IEEE/CVF International Conference on Computer Vision. 2019: 9236-9245.
代码链接:https://github.com/carrierlxk/AGNN
[2] Ren S, Liu W, Liu Y, et al. Reciprocal transformations for unsupervised video object segmentation[C]//Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition. 2021: 15455-15464.
代码链接:https://github.com/OliverRensu/RTNet

你可能感兴趣的:(视频对象分割笔记,python,深度学习,神经网络,pytorch,计算机视觉)