pix2pixHD总结
deeplab部分学习参考DeepLab源码分析之deeplab_demo.ipynb,并做部分修改适应
论文:pix2pixHD 代码:GitHub
1. 测试样例数据,使用下载的G网络,参照./scripts内脚本
在datasets文件夹中,有一些示例Cityscapes测试图像
#!./scripts/test_1024p.sh
python test.py --name label2city_1024p --netG local --ngf 32 --resize_or_crop none
测试结果将保存到html文件中:./results/label2city_1024p/test_latest/index.html中
2. 训练样例数据,参照./scripts内脚本
训练分辨率为1024x512的模型,
#!./scripts/train_512p.sh
python train.py --name label2city_512p
要查看train结果,请查看中间结果./checkpoints/label2city_512p/web/index.html。
如果安装了tensorflow,则可以./checkpoints/label2city_512p/logs通过添加--tf_log到train脚本来查看tensorboard登录
至文件目录下,运行tensorboard --logdir=logs打开tensorboard。
使用多GPU进行train
#!./scripts/train_512p_multigpu.sh
python train.py --name label2city_512p --batchSize 8 --gpu_ids 0,1,2,3,4,5,6,7
--batchSize大小与 --gpu_ids数目相同
3. 自建数据集(包含使用deeplab进行图像语义分割)
代码:GitHub
models:Checkpoints and frozen inference graphs.
从上述链接中下载源码及模型并执行
# deeplab_demo_test.py
import os
from io import BytesIO
import tarfile
import tempfile
from six.moves import urllib
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import datetime
import tensorflow as tf
from deeplab_demo import *
LABEL_NAMES = np.asarray([
'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tv'
])
FULL_LABEL_MAP = np.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1)
FULL_COLOR_MAP = label_to_color_image(FULL_LABEL_MAP)
pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_pascal_trainval/frozen_inference_graph.pb'
# pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_mnv2_dm05_pascal_trainval/frozen_inference_graph.pb'
# pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_mnv2_dm05_pascal_trainaug/frozen_inference_graph.pb'
# pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_mnv2_ade20k_train/frozen_inference_graph.pb'
# pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_xception_ade20k_train/frozen_inference_graph.pb'
MODEL = DeepLabModel(pb_path)
print('model loaded successfully!')
# global starttime
for num in range(0,2):
starttime = datetime.datetime.now()
# IMAGE_PATH = 'E:/data/img/20190522/img/image%d.jpg' % num
# OUT_PATH = 'E:/data/img/20190522/seg_img/seg_image%d.png' % num
IMAGE_PATH = 'E:/data/img/test/img/image%d.jpg' % num
OUT_PATH = 'E:/data/img/test/seg_map/seg_image%d.png' % num
# print(IMAGE_PATH)
path = IMAGE_PATH
try:
oringnal_im = Image.open(path)
print('running deeplab on image %s...' % path)
# starttime = datetime.datetime.now()
resized_im, seg_map = MODEL.run(oringnal_im)
except IOError:
print('Cannot retrieve image. Please check path: ' + path)
# endtime = datetime.datetime.now()
# print (endtime - starttime)
seg_image = label_to_color_image(seg_map).astype(np.uint8)
# im = Image.fromarray(seg_image)
im = Image.fromarray(seg_map.astype(np.uint8))
im.save(OUT_PATH)
以及
# -*- coding: utf-8 -*-
"""
DeepLab Demo.ipynb
https://blog.csdn.net/lifengcai_/article/details/80270409
Automatically generated by Colaboratory.
Original file is located at
https://colab.research.google.com/github/tensorflow/models/blob/master/research/deeplab/deeplab_demo.ipynb
# DeepLab Demo
This demo will demostrate the steps to run deeplab semantic segmentation model on sample input images.
"""
#@title Imports
import os
from io import BytesIO
import tarfile
import tempfile
from six.moves import urllib
from matplotlib import gridspec
from matplotlib import pyplot as plt
import numpy as np
from PIL import Image
import datetime
import tensorflow as tf
#@title Helper methods
global starttime
class DeepLabModel(object):
"""Class to load deeplab model and run inference."""
INPUT_TENSOR_NAME = 'ImageTensor:0'
OUTPUT_TENSOR_NAME = 'SemanticPredictions:0'
INPUT_SIZE = 512
FROZEN_GRAPH_NAME = 'frozen_inference_graph'
def __init__(self, pb_path):
"""Creates and loads pretrained deeplab model."""
self.graph = tf.Graph()
graph_def = None
graph_def = tf.GraphDef.FromString(open(pb_path, 'rb').read())#change1:input frozen_inference_graph.pb
if graph_def is None:
raise RuntimeError('Cannot find inference graph in tar archive.')
with self.graph.as_default():
tf.import_graph_def(graph_def, name='')
self.sess = tf.Session(graph=self.graph)
def run(self, image):
"""
Runs inference on a single image.
Args:
image: A PIL.Image object, raw input image.
Returns:
resized_image: RGB image resized from original input image.
seg_map: Segmentation map of `resized_image`.
"""
# 不resize 2019年5月21日09:43:40
width, height = image.size
# print(width, height)
resize_ratio = 1.0 * self.INPUT_SIZE / max(width, height)
# print(resize_ratio)
# target_size = (int(resize_ratio * width), int(resize_ratio * height))
target_size = (512,256)
# print(target_size)
resized_image = image.convert('RGB').resize(target_size, Image.ANTIALIAS)
# resized_image = image.convert('RGB')
batch_seg_map = self.sess.run(
self.OUTPUT_TENSOR_NAME,
feed_dict={self.INPUT_TENSOR_NAME: [np.asarray(resized_image)]})
seg_map = batch_seg_map[0]
return resized_image, seg_map
def create_pascal_label_colormap():
"""
Creates a label colormap used in PASCAL VOC segmentation benchmark.
Returns:
A Colormap for visualizing segmentation results.
"""
colormap = np.zeros((256, 3), dtype=int)
ind = np.arange(256, dtype=int)
for shift in reversed(range(8))
for channel in range(3):
colormap[:, channel] |= ((ind >> channel) & 1) << shift
ind >>= 3
return colormap
def label_to_color_image(label):
"""
Adds color defined by the dataset colormap to the label.
Args:
label: A 2D array with integer type, storing the segmentation label.
Returns:
result: A 2D array with floating type. The element of the array
is the color indexed by the corresponding element in the input label
to the PASCAL color map.
Raises:
ValueError: If label is not of rank 2 or its value is larger than color
map maximum entry.
"""
if label.ndim != 2:
raise ValueError('Expect 2-D input label')
colormap = create_pascal_label_colormap()
if np.max(label) >= len(colormap):
raise ValueError('label value too large.')
return colormap[label]
def vis_segmentation(image, seg_map):
"""Visualizes input image, segmentation map and overlay view."""
plt.figure(figsize=(15, 5))
grid_spec = gridspec.GridSpec(1, 4, width_ratios=[6, 6, 6, 1])
plt.subplot(grid_spec[0])
plt.imshow(image)
plt.axis('off')
plt.title('input image')
plt.subplot(grid_spec[1])
seg_image = label_to_color_image(seg_map).astype(np.uint8)
plt.imshow(seg_image)
plt.axis('off')
plt.title('segmentation map')
plt.subplot(grid_spec[2])
plt.imshow(image)
plt.imshow(seg_image, alpha=0.7)
plt.axis('off')
plt.title('segmentation overlay')
unique_labels = np.unique(seg_map)
ax = plt.subplot(grid_spec[3])
plt.imshow(FULL_COLOR_MAP[unique_labels].astype(np.uint8), interpolation='nearest')
ax.yaxis.tick_right()
plt.yticks(range(len(unique_labels)), LABEL_NAMES[unique_labels])
plt.xticks([], [])
ax.tick_params(width=0.0)
plt.grid('off')#Turn off axis
plt.show()
#image.save('C:/image1.png')
im = Image.fromarray(seg_image)
im.save('E:/data/img/seg_img/seg_image1.png')
# LABEL_NAMES = np.asarray([
# 'background', 'aeroplane', 'bicycle', 'bird', 'boat', 'bottle', 'bus',
# 'car', 'cat', 'chair', 'cow', 'diningtable', 'dog', 'horse', 'motorbike',
# 'person', 'pottedplant', 'sheep', 'sofa', 'train', 'tv'
# ])
# FULL_LABEL_MAP = np.arange(len(LABEL_NAMES)).reshape(len(LABEL_NAMES), 1)
# FULL_COLOR_MAP = label_to_color_image(FULL_LABEL_MAP)
# # pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_pascal_trainval/frozen_inference_graph.pb'
# pb_path='D:/workspace/ygd/Documents/GitHub/models/research/model_zoo_download/deeplabv3_pascal_trainval/frozen_inference_graph.pb'
# MODEL = DeepLabModel(pb_path)
# print('model loaded successfully!')
"""
Run on sample images
Select one of sample images (leave `IMAGE_URL` empty) or feed any internet image url for inference.
Note that we are using single scale inference in the demo for fast computation, so the results may slightly differ from the visualizations in
[README](https://github.com/tensorflow/models/blob/master/research/deeplab/README.md),
which uses multi-scale and left-right flipped inputs.
"""
def run_visualization(path):
global starttime
"""Inferences DeepLab model and visualizes result."""
try:
oringnal_im = Image.open(path)
print('running deeplab on image %s...' % path)
# starttime = datetime.datetime.now()
resized_im, seg_map = MODEL.run(oringnal_im)
except IOError:
print('Cannot retrieve image. Please check path: ' + path)
return
vis_segmentation(resized_im, seg_map)
# IMAGE_PATH = 'E:/data/img/img/image126.jpg'
# run_visualization(IMAGE_PATH)
# endtime = datetime.datetime.now()
# print (endtime - starttime)
PS:还可以创建instance map,以区分同类中的不同个体
4. 编码特征 encode_features
预计算特征图并聚类,生成 .npy的文件,供后续读取
python encode_features.py --name butel_data20190516_feat_20190523 --dataroot /home/yangd/work/python/pix2pixHD_yangd/datasets/butel_data20190516_feat_20190523
5. 预计算特征图 precompute_feature_maps
预计算特征图并保存
python precompute_feature_maps.py --name butel_20190522_feat --dataroot ./datasets/train_20190520
6. 对自建数据集进行训练测试
训练
python train.py --name butel_data20190522_feat_20190524 --instance_feat --dataroot /home/yangd/work/python/pix2pixHD_yangd/datasets/butel_data20190522_feat_20190524 --gpu_ids 0,1 --batchSize 2 --tf_log --load_pretrain /home/yangd/work/python/pix2pixHD_yangd/checkpoints/butel_data20190516_feat_20190523 --niter 300 --niter_decay 300
测试
python test.py --name butel_data20190522_feat_20190524 --instance_feat --dataroot /home/yangd/work/python/pix2pixHD_yangd/datasets/butel_data20190522_feat_20190524 --use_encoded_image
常用参数
--name
--gpu_ids
--checkpoints_dir
--batchSize
--label_nc
--dataroot
--tf_log
--no_instance
--instance_feat
--results_dir
--how_many
--use_encoded_image
附录
参数总结:
base_options
# experiment specifics
'--name', type=str, default='label2city', help='name of the experiment. It decides where to store samples and models'
'--gpu_ids', type=str, default='0', help='gpu ids: e.g. 0 0,1,2, 0,2. use -1 for CPU'
'--checkpoints_dir', type=str, default='./checkpoints', help='models are saved here'
'--model', type=str, default='pix2pixHD', help='which model to use'
'--norm', type=str, default='instance', help='instance normalization or batch normalization'
'--use_dropout', action='store_true', help='use dropout for the generator'
'--data_type', default=32, type=int, choices=[8, 16, 32], help="Supported data type i.e. 8, 16, 32 bit"
'--verbose', action='store_true', default=False, help='toggles verbose'
'--fp16', action='store_true', default=False, help='train with AMP'
'--local_rank', type=int, default=0, help='local rank for distributed training'
# input/output sizes
'--batchSize', type=int, default=1, help='input batch size'
'--loadSize', type=int, default=1024, help='scale images to this size'
'--fineSize', type=int, default=512, help='then crop to this size'
'--label_nc', type=int, default=35, help='# of input label channels'
'--input_nc', type=int, default=3, help='# of input image channels'
'--output_nc', type=int, default=3, help='# of output image channels'
# for setting inputs
'--dataroot', type=str, default='./datasets/cityscapes/'
'--resize_or_crop', type=str, default='scale_width', help='scaling and cropping of images at load time [resize_and_crop|crop|scale_width|scale_width_and_crop]'
'--serial_batches', action='store_true', help='if true, takes images in order to make batches, otherwise takes them randomly'
'--no_flip', action='store_true', help='if specified, do not flip the images for data argumentation'
'--nThreads', default=2, type=int, help='# threads for loading data'
'--max_dataset_size', type=int, default=float("inf", help='Maximum number of samples allowed per dataset. If the dataset directory contains more than max_dataset_size, only a subset is loaded.'
# for displays
'--display_winsize', type=int, default=512, help='display window size'
'--tf_log', action='store_true', help='if specified, use tensorboard logging. Requires tensorflow installed'
# for generator
'--netG', type=str, default='global', help='selects model to use for netG'
'--ngf', type=int, default=64, help='# of gen filters in first conv layer'
'--n_downsample_global', type=int, default=4, help='number of downsampling layers in netG'
'--n_blocks_global', type=int, default=9, help='number of residual blocks in the global generator network'
'--n_blocks_local', type=int, default=3, help='number of residual blocks in the local enhancer network'
'--n_local_enhancers', type=int, default=1, help='number of local enhancers to use'
'--niter_fix_global', type=int, default=0, help='number of epochs that we only train the outmost local enhancer'
# for instance-wise features
'--no_instance', action='store_true', help='if specified, do *not* add instance map as input'
'--instance_feat', action='store_true', help='if specified, add encoded instance features as input'
'--label_feat', action='store_true', help='if specified, add encoded label features as input'
'--feat_num', type=int, default=3, help='vector length for encoded features'
'--load_features', action='store_true', help='if specified, load precomputed feature maps'
'--n_downsample_E', type=int, default=4, help='# of downsampling layers in encoder'
'--nef', type=int, default=16, help='# of encoder filters in the first conv layer'
'--n_clusters', type=int, default=10, help='number of clusters for features'
test_options
'--ntest', type=int, default=float("inf", help='# of test examples.'
'--results_dir', type=str, default='./results/', help='saves results here.'
'--aspect_ratio', type=float, default=1.0, help='aspect ratio of result images'
'--phase', type=str, default='test', help='train, val, test, etc'
'--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model'
'--how_many', type=int, default=50, help='how many test images to run'
'--cluster_path', type=str, default='features_clustered_010.npy', help='the path for clustered results of encoded features'
'--use_encoded_image', action='store_true', help='if specified, encode the real image to get the feature map'
"--export_onnx", type=str, help="export ONNX model to a given file"
"--engine", type=str, help="run serialized TRT engine"
"--onnx", type=str, help="run ONNX model via TRT"
isTrain = False
train_options
# for displays
'--display_freq', type=int, default=100, help='frequency of showing training results on screen'
'--print_freq', type=int, default=100, help='frequency of showing training results on console'
'--save_latest_freq', type=int, default=1000, help='frequency of saving the latest results'
'--save_epoch_freq', type=int, default=10, help='frequency of saving checkpoints at the end of epochs'
'--no_html', action='store_true', help='do not save intermediate training results to [opt.checkpoints_dir]/[opt.name]/web/'
'--debug', action='store_true', help='only do one epoch and displays at each iteration'
# for training
'--continue_train', action='store_true', help='continue training: load the latest model'
'--load_pretrain', type=str, default='', help='load the pretrained model from the specified location'
'--which_epoch', type=str, default='latest', help='which epoch to load? set to latest to use latest cached model'
'--phase', type=str, default='train', help='train, val, test, etc'
'--niter', type=int, default=100, help='# of iter at starting learning rate'
'--niter_decay', type=int, default=100, help='# of iter to linearly decay learning rate to zero'
'--beta1', type=float, default=0.5, help='momentum term of adam'
'--lr', type=float, default=0.0002, help='initial learning rate for adam'
# for discriminators
'--num_D', type=int, default=2, help='number of discriminators to use'
'--n_layers_D', type=int, default=3, help='only used if which_model_netD==n_layers'
'--ndf', type=int, default=64, help='# of discrim filters in first conv layer'
'--lambda_feat', type=float, default=10.0, help='weight for feature matching loss'
'--no_ganFeat_loss', action='store_true', help='if specified, do *not* use discriminator feature matching loss'
'--no_vgg_loss', action='store_true', help='if specified, do *not* use VGG feature matching loss'
'--no_lsgan', action='store_true', help='do *not* use least square GAN, if false, use vanilla GAN'
'--pool_size', type=int, default=0, help='the size of image buffer that stores previously generated images'
isTrain = True