一、pix2pixHD代码解析(train.py + test.py)

pix2pixHD代码解析

一、pix2pixHD代码解析(train.py + test.py)
二、pix2pixHD代码解析(options设置)
三、pix2pixHD代码解析(dataset处理)
四、pix2pixHD代码解析(models搭建)

一、pix2pixHD代码解析(train.py + test.py)

train.py

import time
import os
import numpy as np
import torch
from torch.autograd import Variable
from collections import OrderedDict
from subprocess import call
import fractions

from options.train_options import TrainOptions
from data.data_loader import CreateDataLoader
from models.models import create_model
import util.util as util
from util.visualizer import Visualizer

opt = TrainOptions().parse()                                                                                            # 导入训练参数
iter_path = os.path.join(opt.checkpoints_dir, opt.name, 'iter.txt')                                                     # 设置:检查点路径、opt.name默认为label2city、iter里保存epoch和iters索引
if opt.continue_train:                                                                                                  # 默认为Ture, 继续训练
    try:
        start_epoch, epoch_iter = np.loadtxt(iter_path, delimiter=',', dtype=int)                                       # 加载当前得epoch、epoch_iter
    except:
        start_epoch, epoch_iter = 1, 0                                                                                  # 若异常将二者设置为1, 0
    print('Resuming from epoch %d at iteration %d' % (start_epoch, epoch_iter))                                         # 打印是从epoch、iter处恢复的
else:
    start_epoch, epoch_iter = 1, 0                                                                                      # 如果continue_train=False, 那么也是直接从1, 0开始训练


def lcm(a, b): return abs(a * b)/fractions.gcd(a, b) if a and b else 0                                                  # fractions.gcd()计算a和b的最大公约数:100和3的最大公约数为1,100*3/1=300,因此打印频率为300
opt.print_freq = lcm(opt.print_freq, opt.batchSize)                                                                     # 打印频率和批次


if opt.debug:                                                                                                           # 参数调试
    opt.display_freq = 1                                                                                                # 显示频率
    opt.print_freq = 1                                                                                                  # 打印频率
    opt.niter = 1                                                                                                       # iter的初始学习速率
    opt.niter_decay = 0                                                                                                 # iter的线性衰减学习率为零
    opt.max_dataset_size = 10

### 加载数据集
data_loader = CreateDataLoader(opt)                                                                                     # 加载数据集
dataset = data_loader.load_data()                                                                                       # 加载数据,调用的是data.custom_dataset_data_loader中的load_data()函数
dataset_size = len(data_loader)                                                                                         # 数据集长度
print('#training images = %d' % dataset_size)                                                                           # 打印数据集有多少张照片,此处使用街景是2975张照片

model = create_model(opt)                                                                                               # 根据输入参数,创建模型
visualizer = Visualizer(opt)                                                                                            # 可视化的相关操作
if opt.fp16:                                                                                                            # 关于加速运算相关的操作
    from apex import amp
    model, [optimizer_G, optimizer_D] = amp.initialize(model, [model.optimizer_G, model.optimizer_D], opt_level='O1')
    model = torch.nn.DataParallel(model, device_ids=opt.gpu_ids)
else:
    optimizer_G, optimizer_D = model.module.optimizer_G, model.module.optimizer_D

total_steps = (start_epoch-1) * dataset_size + epoch_iter                                                               # 一共运行的步数

display_delta = total_steps % opt.display_freq                                                                          # 求余运算,用于下方的if判断
print_delta = total_steps % opt.print_freq                                                                              # 同上
save_delta = total_steps % opt.save_latest_freq                                                                         # 同上

for epoch in range(start_epoch, opt.niter + opt.niter_decay + 1):
    epoch_start_time = time.time()
    if epoch != start_epoch:                                                                                            # 如果epoch不等于读入的epoch,那么计算更新epoch_iter
        epoch_iter = epoch_iter % dataset_size                                                                          # 求余运算:当前步数 % 数据集的长度
    for i, data in enumerate(dataset, start=epoch_iter):                                                                # 这里的epoch_iter是输入数据集list的索引值
        if total_steps % opt.print_freq == print_delta:                                                                 # 记录一个打印批次内的起始时间
            iter_start_time = time.time()
        total_steps += opt.batchSize                                                                                    # 记录总步数
        epoch_iter += opt.batchSize                                                                                     # 记录当前步数

        # whether to collect output images
        save_fake = total_steps % opt.display_freq == display_delta                                                     # bool:是否保存假图片

        ############## Forward Pass ######################
        # 调用class Pix2PixHDModel(BaseModel)中的forward()函数,输入整理好的四类数据集(本人只使用了label和imgs两类) ; 返回loss和fake_img
        losses, generated = model(Variable(data['label']), Variable(data['inst']), Variable(data['image']), Variable(data['feat']), infer=save_fake)

        # sum per device losses
        ### isinstance() 函数来判断一个对象是否是一个已知的类型,类似 type()。
        #     isinstance() 与 type() 区别:
        #         type() 不会认为子类是一种父类类型,不考虑继承关系。
        #         isinstance() 会认为子类是一种父类类型,考虑继承关系。
        #     如果要判断两个类型是否相同推荐使用 isinstance()。
        losses = [torch.mean(x) if not isinstance(x, int) else x for x in losses]                                       # 如果x不是int类型那么求均值,是int就直接返回x.
        loss_dict = dict(zip(model.module.loss_names, losses))                                                          # 先使用zip()将元素一一对应起来以后,再使用dict创建一个字典

        # calculate final loss scalar
        loss_D = (loss_dict['D_fake'] + loss_dict['D_real']) * 0.5                                                      # 首先计算判别器损失:等于真假判别损失值的均值
        loss_G = loss_dict['G_GAN'] + loss_dict.get('G_GAN_Feat', 0) + loss_dict.get('G_VGG', 0)                        # 再计算生成器的损失值,由于本人不考虑feat和vgg,因此此处只有G_loss

        ############### Backward Pass ####################
        # update generator weights
        optimizer_G.zero_grad()                                                                                         # 先清空生成器优化器梯度为0
        if opt.fp16:                                                                                                    # fp16和AMP是混合精度加速的一些内容(英伟达自带的apex库)
            with amp.scale_loss(loss_G, optimizer_G) as scaled_loss: scaled_loss.backward()                
        else:
            loss_G.backward()          
        optimizer_G.step()                                                                                              # 优化器进行梯度计算

        # update discriminator weights
        optimizer_D.zero_grad()                                                                                         # 清空判别器优化器中的梯度
        if opt.fp16:                                
            with amp.scale_loss(loss_D, optimizer_D) as scaled_loss: scaled_loss.backward()                
        else:
            loss_D.backward()        
        optimizer_D.step()                                                                                              # 逐步求导

        ############## Display results and errors ##########
        ### print out errors
        if total_steps % opt.print_freq == print_delta:
            errors = {k: v.data.item() if not isinstance(v, int) else v for k, v in loss_dict.items()}                  # 这是一个 字典+for循环遍历 的简写方式
            t = (time.time() - iter_start_time) / opt.print_freq
            visualizer.print_current_errors(epoch, epoch_iter, errors, t)
            visualizer.plot_current_errors(errors, total_steps)
            #call(["nvidia-smi", "--format=csv", "--query-gpu=memory.used,memory.free"]) 

        ### display output images
        if save_fake:
            ### OrderedDict()是一个有序字典:https://www.cnblogs.com/gide/p/6370082.html
            # 很多人认为python中的字典是无序的,因为它是按照hash来存储的,
            # 但是python中有个模块collections(英文,收集、集合),里面自带了一个子类
            # OrderedDict,实现了对字典对象中元素的排序。
            visuals = OrderedDict([('input_label', util.tensor2label(data['label'][0], opt.label_nc)),
                                   ('synthesized_image', util.tensor2im(generated.data[0])),
                                   ('real_image', util.tensor2im(data['image'][0]))])
            visualizer.display_current_results(visuals, epoch, total_steps)                                             # 保存图片和更新网站

        ### save latest model
        if total_steps % opt.save_latest_freq == save_delta:
            print('saving the latest model (epoch %d, total_steps %d)' % (epoch, total_steps))                          # 保存最近的模型
            ### Pytorch模型的保存与加载
            # 例如我们创建了一个模型:model = MyVggNet()。
            # 如果使用多GPU训练,我们需要使用这行代码:model = nn.DataParallel(model).cuda()
            # 执行这个代码之后,model就不在是我们原来的模型,而是相当于在我们原来的模型外面加了一层支持GPU运行的外壳,
            # 这时候真正的模型对象为:real_model = model.module,
            model.module.save('latest')                                                                                 # 调用Pix2PixHDModel中的save()函数,保存模型。保存最近的网络模型:latest_net_G;latest_net_D
            np.savetxt(iter_path, (epoch, epoch_iter), delimiter=',', fmt='%d')

        if epoch_iter >= dataset_size:
            break
       
    # end of epoch 
    iter_end_time = time.time()
    print('End of epoch %d / %d \t Time Taken: %d sec' %
          (epoch, opt.niter + opt.niter_decay, time.time() - epoch_start_time))

    ### save model for this epoch
    if epoch % opt.save_epoch_freq == 0:
        print('saving the model at the end of epoch %d, iters %d' % (epoch, total_steps))        
        model.module.save('latest')                                                                                     # 保存最近的网络模型:latest_net_G;latest_net_D
        model.module.save(epoch)                                                                                        # 保存网络模型,如:10_net_G;10_net_D
        np.savetxt(iter_path, (epoch+1, 0), delimiter=',', fmt='%d')

    ### instead of only training the local enhancer, train the entire network after certain iterations
    if (opt.niter_fix_global != 0) and (epoch == opt.niter_fix_global):
        model.module.update_fixed_params()

    ### linearly decay learning rate after certain iterations                                                           # 特定迭代后的线性衰减学习率
    if epoch > opt.niter:
        model.module.update_learning_rate()

test.py

import os
from collections import OrderedDict
from torch.autograd import Variable
from options.test_options import TestOptions
from data.data_loader import CreateDataLoader
from models.models import create_model
import util.util as util
from util.visualizer import Visualizer
from util import html
import torch

opt = TestOptions().parse(save=False)
opt.nThreads = 1   # test code only supports nThreads = 1
opt.batchSize = 1  # test code only supports batchSize = 1
opt.serial_batches = True  # no shuffle                                                                                 # 按批次加载,不打乱
opt.no_flip = True  # no flip

data_loader = CreateDataLoader(opt)
dataset = data_loader.load_data()
visualizer = Visualizer(opt)
# create website
web_dir = os.path.join(opt.results_dir, opt.name, '%s_%s' % (opt.phase, opt.which_epoch))                               # './results/'+'label2city'+'test_latest'
webpage = html.HTML(web_dir, 'Experiment = %s, Phase = %s, Epoch = %s' % (opt.name, opt.phase, opt.which_epoch))        # 制作HTML文件用来查看test结果

# test
if not opt.engine and not opt.onnx:
    model = create_model(opt)
    if opt.data_type == 16:
        model.half()
    elif opt.data_type == 8:
        model.type(torch.uint8)
            
    if opt.verbose:
        print(model)                                                                                                    # 打印模型
else:
    from run_engine import run_trt_engine, run_onnx
    
for i, data in enumerate(dataset):
    if i >= opt.how_many:
        break
    if opt.data_type == 16:
        data['label'] = data['label'].half()
        data['inst']  = data['inst'].half()
    elif opt.data_type == 8:
        data['label'] = data['label'].uint8()
        data['inst']  = data['inst'].uint8()
    if opt.export_onnx:
        print ("Exporting to ONNX: ", opt.export_onnx)
        assert opt.export_onnx.endswith("onnx"), "Export model file should end with .onnx"
        torch.onnx.export(model, [data['label'], data['inst']],
                          opt.export_onnx, verbose=True)
        exit(0)
    minibatch = 1 
    if opt.engine:
        generated = run_trt_engine(opt.engine, minibatch, [data['label'], data['inst']])
    elif opt.onnx:
        generated = run_onnx(opt.onnx, opt.data_type, minibatch, [data['label'], data['inst']])
    else:        
        generated = model.inference(data['label'], data['inst'], data['image'])
        
    visuals = OrderedDict([('input_label', util.tensor2label(data['label'][0], opt.label_nc)),
                           ('synthesized_image', util.tensor2im(generated.data[0]))])
    img_path = data['path']
    print('process image... %s' % img_path)
    visualizer.save_images(webpage, visuals, img_path)

webpage.save()

你可能感兴趣的:(PyTorch,GAN)