一、pix2pixHD代码解析(train.py + test.py)
二、pix2pixHD代码解析(options设置)
三、pix2pixHD代码解析(dataset处理)
四、pix2pixHD代码解析(models搭建)
base_options.py
import argparse
import os
from util import util
import torch
class BaseOptions():
def __init__(self):
self.parser = argparse.ArgumentParser()
self.initialized = False
def initialize(self):
# experiment specifics
self.parser.add_argument('--name', type=str, default='label2city', help='name of the experiment. It decides where to store samples and models') # 实验名称, 它决定在哪里存储样本和模型
self.parser.add_argument('--gpu_ids', type=str, default='0, 1, 2', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU') # 要使用的GPU id:0, 1, 2三块GPU
self.parser.add_argument('--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here') # 检查点路径
self.parser.add_argument('--model', type=str, default='pix2pixHD', help='which model to use') # 选择的模型,此处由两个模型可供选择 # 选择模型
self.parser.add_argument('--norm', type=str, default='instance', help='instance normalization or batch normalization')
self.parser.add_argument('--use_dropout', action='store_true', help='use dropout for the generator')
self.parser.add_argument('--data_type', default=32, type=int, choices=[8, 16, 32], help="Supported data type i.e. 8, 16, 32 bit") # 支持的数据类型,即8、16、32位
self.parser.add_argument('--verbose', action='store_true', default=False, help='toggles verbose') # 默认为false,表示之前并无模型保存
self.parser.add_argument('--fp16', action='store_true', default=False, help='train with AMP') # fp16和amp适用于混合精度加速(英伟达自带的apex库)
self.parser.add_argument('--local_rank', type=int, default=0, help='local rank for distributed training')
# input/output sizes
self.parser.add_argument('--batchSize', type=int, default=3, help='input batch size')
self.parser.add_argument('--loadSize', type=int, default=512, help='scale images to this size') # 将图像缩放到这个大小:此处仅设置宽度,高度按照宽高比计算
self.parser.add_argument('--fineSize', type=int, default=512, help='then crop to this size') # 然后裁剪成这个大小
self.parser.add_argument('--label_nc', type=int, default=0, help='# of input label channels') # 标签图通道数
self.parser.add_argument('--input_nc', type=int, default=3, help='# of input image channels') # 输入真实图通道数
self.parser.add_argument('--output_nc', type=int, default=3, help='# of output image channels') # 生成的输出图通道数
# for setting inputs
self.parser.add_argument('--dataroot', type=str, default='./datasets/geometry/') # geometry或cityscapes
self.parser.add_argument('--resize_or_crop', type=str, default='scale_width_and_crop', help='scaling and cropping of images at load time [resize_and_crop|crop|scale_width|scale_width_and_crop]') # 这个要设置好
self.parser.add_argument('--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly') # 如果为真,则按批次加载,否则随机加载
self.parser.add_argument('--no_flip', action='store_true', help='if specified, do not flip the images for data argumentation') # 如果指定,不要为了数据论证而翻转图像。
self.parser.add_argument('--nThreads', default=2, type=int, help='# threads for loading data') # 加载数据的线程
self.parser.add_argument('--max_dataset_size', type=int, default=float("inf"), help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.')
# 每个数据集允许的最大样本数。如果数据集目录包含超过max_dataset_size,则只加载一个子集
# for displays
self.parser.add_argument('--display_winsize', type=int, default=512, help='display window size')
self.parser.add_argument('--tf_log', action='store_true', help='if specified, use tensorboard logging. Requires tensorflow installed')
# for generator
self.parser.add_argument('--netG', type=str, default='global', help='selects model to use for netG') # 选择netG使用的模型,默认为global
self.parser.add_argument('--ngf', type=int, default=64, help='# of gen filters in first conv layer') # 生成器器在第一conv层的卷积数
self.parser.add_argument('--n_downsample_global', type=int, default=4, help='number of downsampling layers in netG') # netG中的下采样层数
self.parser.add_argument('--n_blocks_global', type=int, default=9, help='number of residual blocks in the global generator network') # 全局生成器网络中残差块的数量
self.parser.add_argument('--n_blocks_local', type=int, default=3, help='number of residual blocks in the local enhancer network')
self.parser.add_argument('--n_local_enhancers', type=int, default=1, help='number of local enhancers to use') # 要使用的局部增强子的数量
self.parser.add_argument('--niter_fix_global', type=int, default=0, help='number of epochs that we only train the outmost local enhancer')
# for instance-wise features
self.parser.add_argument('--no_instance', action='store_false', help='if specified, do *not* add instance map as input') # 如果指定为True,则不添加实例映射作为输入
self.parser.add_argument('--instance_feat', action='store_true', help='if specified, add encoded instance features as input') # 如果指定,添加编码的实例特性作为输入
self.parser.add_argument('--label_feat', action='store_true', help='if specified, add encoded label features as input') # 如果指定,添加编码的标签特性作为输入
self.parser.add_argument('--feat_num', type=int, default=3, help='vector length for encoded features') # 编码特征的向量长度
self.parser.add_argument('--load_features', action='store_true', help='if specified, load precomputed feature maps') # 如果指定,则加载预计算的特征映射
self.parser.add_argument('--n_downsample_E', type=int, default=4, help='# of downsampling layers in encoder')
self.parser.add_argument('--nef', type=int, default=16, help='# of encoder filters in the first conv layer')
self.parser.add_argument('--n_clusters', type=int, default=10, help='number of clusters for features')
self.initialized = True
def parse(self, save=True):
if not self.initialized:
self.initialize()
self.opt = self.parser.parse_args()
self.opt.isTrain = self.isTrain # train or test
str_ids = self.opt.gpu_ids.split(',')
self.opt.gpu_ids = []
for str_id in str_ids:
id = int(str_id)
if id >= 0:
self.opt.gpu_ids.append(id)
# set gpu ids
if len(self.opt.gpu_ids) > 0:
torch.cuda.set_device(self.opt.gpu_ids[0])
args = vars(self.opt)
print('------------ Options -------------')
for k, v in sorted(args.items()):
print('%s: %s' % (str(k), str(v)))
print('-------------- End ----------------')
# save to the disk
expr_dir = os.path.join(self.opt.checkpoints_dir, self.opt.name)
util.mkdirs(expr_dir)
if save and not self.opt.continue_train:
file_name = os.path.join(expr_dir, 'opt.txt')
with open(file_name, 'wt') as opt_file:
opt_file.write('------------ Options -------------\n')
for k, v in sorted(args.items()):
opt_file.write('%s: %s\n' % (str(k), str(v)))
opt_file.write('-------------- End ----------------\n')
return self.opt
train_options.py
from .base_options import BaseOptions
# train参数选项
class TrainOptions(BaseOptions):
def initialize(self):
BaseOptions.initialize(self)
# for displays
self.parser.add_argument('--display_freq', type=int, default=100, help='frequency of showing training results on screen')
self.parser.add_argument('--print_freq', type=int, default=100, help='frequency of showing training results on console')
self.parser.add_argument('--save_latest_freq', type=int, default=1000, help='frequency of saving the latest results')
self.parser.add_argument('--save_epoch_freq', type=int, default=10, help='frequency of saving checkpoints at the end of epochs') # 保存模型的频率为10,每10个epoch保存model参数一次
self.parser.add_argument('--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/')
self.parser.add_argument('--debug', action='store_true', help='only do one epoch and displays at each iteration')
# for training
self.parser.add_argument('--continue_train', action='store_true', help='continue training: load the latest model')
self.parser.add_argument('--load_pretrain', type=str, default='', help='load the pretrained model from the specified location')
self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model') # 哪一个循环加载?默认设置为latest以使用最新的缓存模型
self.parser.add_argument('--phase', type=str, default='train', help='train, val, test, etc') # 默认路径选择为Train数据集
self.parser.add_argument('--niter', type=int, default=100, help='# of iter at starting learning rate') # iter的初始学习速率
self.parser.add_argument('--niter_decay', type=int, default=100, help='# of iter to linearly decay learning rate to zero')
self.parser.add_argument('--beta1', type=float, default=0.5, help='momentum term of adam')
self.parser.add_argument('--lr', type=float, default=0.0002, help='initial learning rate for adam')
# for discriminators
self.parser.add_argument('--num_D', type=int, default=2, help='number of discriminators to use') # 默认判别器数量为2
self.parser.add_argument('--n_layers_D', type=int, default=3, help='only used if which_model_netD==n_layers')
self.parser.add_argument('--ndf', type=int, default=64, help='# of discrim filters in first conv layer')
self.parser.add_argument('--lambda_feat', type=float, default=10.0, help='weight for feature matching loss')
self.parser.add_argument('--no_ganFeat_loss', action='store_true', help='if specified, do *not* use discriminator feature matching loss') # 如果指定,不使用鉴别器特征匹配丢失
self.parser.add_argument('--no_vgg_loss', action='store_true', help='if specified, do *not* use VGG feature matching loss') # 如果指定,不要使用VGG功能匹配丢失
self.parser.add_argument('--no_lsgan', action='store_true', help='do *not* use least square GAN, if false, use vanilla GAN') # 不要使用最小二乘GAN,如果为False则使用vanilla GAN
self.parser.add_argument('--pool_size', type=int, default=0, help='the size of image buffer that stores previously generated images')
self.isTrain = True # 此处是训练模型,因此若正向传播则isTrain=True;若仅反向传播则isTrain=False
test_options.py
from .base_options import BaseOptions
class TestOptions(BaseOptions):
def initialize(self):
BaseOptions.initialize(self)
self.parser.add_argument('--ntest', type=int, default=float("inf"), help='# of test examples.')
self.parser.add_argument('--results_dir', type=str, default='./results/', help='saves results here.')
self.parser.add_argument('--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images')
self.parser.add_argument('--phase', type=str, default='test', help='train, val, test, etc')
self.parser.add_argument('--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model')
self.parser.add_argument('--how_many', type=int, default=39, help='how many test images to run')
self.parser.add_argument('--cluster_path', type=str, default='features_clustered_010.npy', help='the path for clustered results of encoded features')
self.parser.add_argument('--use_encoded_image', action='store_true', help='if specified, encode the real image to get the feature map')
self.parser.add_argument("--export_onnx", type=str, help="export ONNX model to a given file")
self.parser.add_argument("--engine", type=str, help="run serialized TRT engine")
self.parser.add_argument("--onnx", type=str, help="run ONNX model via TRT")
self.isTrain = False # 不train,只前向传播