以下链接是个人关于stylegan所有见解,如有错误欢迎大家指出,我会第一时间纠正,如有兴趣可以加微信:a944284742相互讨论技术。若是帮助到了你什么,一定要记得点赞奥!因为这是对我最大的鼓励。
风格迁移0-00:stylegan-目录-史上最全:https://blog.csdn.net/weixin_43013761/article/details/100895333
在
GANS的世界1-2:stylegan-目录讲解与预训练模型测试
这篇博客中,交代了如何去运行pretrained_example.py程序,这里流程就不讲解了,并且该文件的代码也很少,下面就是他的注释:
def main():
# Initialize TensorFlow.
tflib.init_tf()
# 预训练模型加载,如果不能下载成功请看:https://blog.csdn.net/weixin_43013761/article/details/100920995 这个博客
# Load pre-trained network.
url = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ' # karras2019stylegan-ffhq-1024x1024.pkl
with dnnlib.util.open_url(url, cache_dir=config.cache_dir) as f:
_G, _D, Gs = pickle.load(f)
# _G = Instantaneous snapshot of the generator. Mainly useful for resuming a previous training run.
# _D = Instantaneous snapshot of the discriminator. Mainly useful for resuming a previous training run.
# Gs = Long-term average of the generator. Yields higher-quality results than the instantaneous snapshot.
# Print network details.
Gs.print_layers()
# 设定随机种子,种子的
# Pick latent vector.
rnd = np.random.RandomState(8)
# (1, 512)
latents = rnd.randn(1, Gs.input_shape[1])
print(latents.shape)
# Generate image.
fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
images = Gs.run(latents, None, truncation_psi=0.7, randomize_noise=True, output_transform=fmt)
# Save image.
os.makedirs(config.result_dir, exist_ok=True)
png_filename = os.path.join(config.result_dir, 'example.png')
PIL.Image.fromarray(images[0], 'RGB').save(png_filename)
if __name__ == "__main__":
main()
这就比较尴尬了,真的太简单了,简单到不知道注释些什么东西。没关系,下面我们来看看图片的融合:
图片融合代码为generate_figures.py,该程序如何运行,在
GANS的世界1-2:stylegan-目录讲解与预训练模型测试
中。也有详细的讲解。首先我们从mian函数看起:
def main():
tflib.init_tf()
os.makedirs(config.result_dir, exist_ok=True)
# 生成各个分辨率的照片,最开始都是生成1024分辨率的图像,然后通过resize改变成其他的分辨率
draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure02-uncurated-ffhq.png'), load_Gs(url_ffhq), cx=0, cy=0, cw=1024, ch=1024, rows=3, lods=[0,1,2,2,3,3], seed=5)
draw_style_mixing_figure(os.path.join(config.result_dir, 'figure03-style-mixing.png'), load_Gs(url_ffhq), w=1024, h=1024, src_seeds=[639,701,687,615,2268], dst_seeds=[888,829,1898,1733,1614,845], style_ranges=[range(0,4)]*3+[range(4,8)]*2+[range(8,18)])
draw_noise_detail_figure(os.path.join(config.result_dir, 'figure04-noise-detail.png'), load_Gs(url_ffhq), w=1024, h=1024, num_samples=100, seeds=[1157,1012])
draw_noise_components_figure(os.path.join(config.result_dir, 'figure05-noise-components.png'), load_Gs(url_ffhq), w=1024, h=1024, seeds=[1967,1555], noise_ranges=[range(0, 18), range(0, 0), range(8, 18), range(0, 8)], flips=[1])
draw_truncation_trick_figure(os.path.join(config.result_dir, 'figure08-truncation-trick.png'), load_Gs(url_ffhq), w=1024, h=1024, seeds=[91,388], psis=[1, 0.7, 0.5, 0, -0.5, -1])
draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure10-uncurated-bedrooms.png'), load_Gs(url_bedrooms), cx=0, cy=0, cw=256, ch=256, rows=5, lods=[0,0,1,1,2,2,2], seed=0)
draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure11-uncurated-cars.png'), load_Gs(url_cars), cx=0, cy=64, cw=512, ch=384, rows=4, lods=[0,1,2,2,3,3], seed=2)
draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure12-uncurated-cats.png'), load_Gs(url_cats), cx=0, cy=0, cw=256, ch=256, rows=5, lods=[0,0,1,1,2,2,2], seed=1)
首先我们来看看调用的第一个函数:
draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure02-uncurated-ffhq.png'), load_Gs(url_ffhq), cx=0, cy=0, cw=1024, ch=1024, rows=3, lods=[0,1,2,2,3,3], seed=5)
注释如下:
# 把这些多分辨率的图片绘画出来
# Figures 2, 3, 10, 11, 12: Multi-resolution grid of uncurated result images.
def draw_uncurated_result_figure(png, Gs, cx, cy, cw, ch, rows, lods, seed):
# 图片的路径
print(png)
# 首先设定一个随机种子,latents(81, 512),表明一共会生成81张图片,rows=3,代表每列都3的整数倍图片
# 81张图片,包含了
# (3,1024,1024)
# (6,512,512)
# (24,256,256)
# (48,256,256)
latents = np.random.RandomState(seed).randn(sum(rows * 2**lod for lod in lods), Gs.input_shape[1])
# images(81, 1024, 1024, 3),这里说明,生成的图片,分辨率都是1024的
images = Gs.run(latents, None, **synthesis_kwargs) # [seed, y, x, rgb]
canvas = PIL.Image.new('RGB', (sum(cw // 2**lod for lod in lods), ch * rows), 'white')
image_iter = iter(list(images))
for col, lod in enumerate(lods):
for row in range(rows * 2**lod):
image = PIL.Image.fromarray(next(image_iter), 'RGB')
image = image.crop((cx, cy, cx + cw, cy + ch))
# 下采样
image = image.resize((cw // 2**lod, ch // 2**lod), PIL.Image.ANTIALIAS)
canvas.paste(image, (sum(cw // 2**lod for lod in lods[:col]), row * ch // 2**lod))
canvas.save(png)
下面我们来看看图片融合,相信大家对这个还是十分的关注的:
draw_style_mixing_figure(os.path.join(config.result_dir, 'figure03-style-mixing.png'), load_Gs(url_ffhq), w=1024, h=1024, src_seeds=[639,701,687,615,2268], dst_seeds=[888,829,1898,1733,1614,845], style_ranges=[range(0,4)]*3+[range(4,8)]*2+[range(8,18)])
# Figure 3: Style mixing.
# 风格混合,也是图片合成
def draw_style_mixing_figure(png, Gs, w, h, src_seeds, dst_seeds, style_ranges):
print(png)
# 首先定义好种子,然后得到src与dst的latents
src_latents = np.stack(np.random.RandomState(seed).randn(Gs.input_shape[1]) for seed in src_seeds)
dst_latents = np.stack(np.random.RandomState(seed).randn(Gs.input_shape[1]) for seed in dst_seeds)
# 然后通过Gs.components.mapping.run获得对应的w(5, 18, 512)向量
src_dlatents = Gs.components.mapping.run(src_latents, None) # [seed, layer, component]
# 然后通过Gs.components.mapping.run获得对应的w(6, 18, 512)向量
dst_dlatents = Gs.components.mapping.run(dst_latents, None) # [seed, layer, component]
# src_images(5, 1024, 1024, 3)
src_images = Gs.components.synthesis.run(src_dlatents, randomize_noise=False, **synthesis_kwargs)
# dst_images(6, 1024, 1024, 3)
dst_images = Gs.components.synthesis.run(dst_dlatents, randomize_noise=False, **synthesis_kwargs)
canvas = PIL.Image.new('RGB', (w * (len(src_seeds) + 1), h * (len(dst_seeds) + 1)), 'white')
# 把原图(没有合成的src绘画完成),即最左边的图像
for col, src_image in enumerate(list(src_images)):
canvas.paste(PIL.Image.fromarray(src_image, 'RGB'), ((col + 1) * w, 0))
# 绘画每一列的图像
for row, dst_image in enumerate(list(dst_images)):
canvas.paste(PIL.Image.fromarray(dst_image, 'RGB'), (0, (row + 1) * h))
row_dlatents = np.stack([dst_dlatents[row]] * len(src_seeds))
# 注意该处的关键,该处就是合并的要点。把row_dlatents的某些特征替换成src_dlatents的
row_dlatents[:, style_ranges[row]] = src_dlatents[:, style_ranges[row]]
# 获得合成之后的图片
row_images = Gs.components.synthesis.run(row_dlatents, randomize_noise=False, **synthesis_kwargs)
# 把该列绘画完成
for col, image in enumerate(list(row_images)):
canvas.paste(PIL.Image.fromarray(image, 'RGB'), ((col + 1) * w, (row + 1) * h))
canvas.save(png)
注释还算过得去,别并且没有什么好讲解的。
下面的代码,是为了比较同噪音对图片细节的影响:
# Figure 4: Noise detail,画出不同噪音之间,对细节影响的对比
def draw_noise_detail_figure(png, Gs, w, h, num_samples, seeds):
print(png)
# 这里的len(seeds)==2,代表对比了两个人
canvas = PIL.Image.new('RGB', (w * 3, h * len(seeds)), 'white')
for row, seed in enumerate(seeds):
# 根据确定的种子,生成latents(100, 512)
latents = np.stack([np.random.RandomState(seed).randn(Gs.input_shape[1])] * num_samples)
# 根据生成latents生成图片(100, 1024, 1024, 3),相当于吗每个人。生成了100张图片
images = Gs.run(latents, None, truncation_psi=1, **synthesis_kwargs)
print(images.shape)
# 把图片格式转化为RGB
canvas.paste(PIL.Image.fromarray(images[0], 'RGB'), (0, row * h))
# 截取4张图片的是个角,然后缩放到原图的是四分之一大小,即长宽为原本的二分之一。
for i in range(4):
crop = PIL.Image.fromarray(images[i + 1], 'RGB')
crop = crop.crop((650, 180, 906, 436))
crop = crop.resize((w//2, h//2), PIL.Image.NEAREST)
canvas.paste(crop, (w + (i%2) * w//2, row * h + (i//2) * h//2))
# 对比标准差的不同
diff = np.std(np.mean(images, axis=3), axis=0) * 4
diff = np.clip(diff + 0.5, 0, 255).astype(np.uint8)
canvas.paste(PIL.Image.fromarray(diff, 'L'), (w * 2, row * h))
canvas.save(png)
下面就是truncation-trick:
# Figure 8: Truncation trick.psis=[1, 0.7, 0.5, 0, -0.5, -1]
def draw_truncation_trick_figure(png, Gs, w, h, seeds, psis):
print(png)
latents = np.stack(np.random.RandomState(seed).randn(Gs.input_shape[1]) for seed in seeds)
dlatents = Gs.components.mapping.run(latents, None) # [seed, layer, component]
dlatent_avg = Gs.get_var('dlatent_avg') # [component]
canvas = PIL.Image.new('RGB', (w * len(psis), h * len(seeds)), 'white')
for row, dlatent in enumerate(list(dlatents)):
# 核心重点
row_dlatents = (dlatent[np.newaxis] - dlatent_avg) * np.reshape(psis, [-1, 1, 1]) + dlatent_avg
row_images = Gs.components.synthesis.run(row_dlatents, randomize_noise=False, **synthesis_kwargs)
for col, image in enumerate(list(row_images)):
canvas.paste(PIL.Image.fromarray(image, 'RGB'), (col * w, row * h))
canvas.save(png)
前面提到过,其主要核心是使用到了平均脸,当psis为0的时候,得到的是下图中:
红框中的脸 ,也就是所谓的平均脸。
我不知道后面自己会不会做图片融合的项目,如果后面做了,我想我会把stylegan好好的再梳理一遍,如果没有的话,估计这个算是完成篇,如果你从头看到了这里,那么请给我点赞吧,是的,我很需要你的支持,愿你年少有为不自卑,我觉得这是对年轻人最好的祝福了!那再见了,朋友!