风格迁移0-11:stylegan-源码无死角解读(7)-图片生成与融合

以下链接是个人关于stylegan所有见解,如有错误欢迎大家指出,我会第一时间纠正,如有兴趣可以加微信:a944284742相互讨论技术。若是帮助到了你什么,一定要记得点赞奥!因为这是对我最大的鼓励。
风格迁移0-00:stylegan-目录-史上最全:https://blog.csdn.net/weixin_43013761/article/details/100895333

图片生成


GANS的世界1-2:stylegan-目录讲解与预训练模型测试
这篇博客中,交代了如何去运行pretrained_example.py程序,这里流程就不讲解了,并且该文件的代码也很少,下面就是他的注释:

def main():
    # Initialize TensorFlow.
    tflib.init_tf()

    # 预训练模型加载,如果不能下载成功请看:https://blog.csdn.net/weixin_43013761/article/details/100920995 这个博客
    # Load pre-trained network.
    url = 'https://drive.google.com/uc?id=1MEGjdvVpUsu1jB4zrXZN7Y4kBBOzizDQ' # karras2019stylegan-ffhq-1024x1024.pkl
    with dnnlib.util.open_url(url, cache_dir=config.cache_dir) as f:
        _G, _D, Gs = pickle.load(f)
        # _G = Instantaneous snapshot of the generator. Mainly useful for resuming a previous training run.
        # _D = Instantaneous snapshot of the discriminator. Mainly useful for resuming a previous training run.
        # Gs = Long-term average of the generator. Yields higher-quality results than the instantaneous snapshot.

    # Print network details.
    Gs.print_layers()

    # 设定随机种子,种子的
    # Pick latent vector.
    rnd = np.random.RandomState(8)
    # (1, 512)
    latents = rnd.randn(1, Gs.input_shape[1])
    print(latents.shape)


    # Generate image.
    fmt = dict(func=tflib.convert_images_to_uint8, nchw_to_nhwc=True)
    images = Gs.run(latents, None, truncation_psi=0.7, randomize_noise=True, output_transform=fmt)

    # Save image.
    os.makedirs(config.result_dir, exist_ok=True)
    png_filename = os.path.join(config.result_dir, 'example.png')
    PIL.Image.fromarray(images[0], 'RGB').save(png_filename)

if __name__ == "__main__":
    main()

这就比较尴尬了,真的太简单了,简单到不知道注释些什么东西。没关系,下面我们来看看图片的融合:

图片融合

图片融合代码为generate_figures.py,该程序如何运行,在
GANS的世界1-2:stylegan-目录讲解与预训练模型测试
中。也有详细的讲解。首先我们从mian函数看起:

def main():
    tflib.init_tf()
    os.makedirs(config.result_dir, exist_ok=True)
    # 生成各个分辨率的照片,最开始都是生成1024分辨率的图像,然后通过resize改变成其他的分辨率
    draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure02-uncurated-ffhq.png'), load_Gs(url_ffhq), cx=0, cy=0, cw=1024, ch=1024, rows=3, lods=[0,1,2,2,3,3], seed=5)
    draw_style_mixing_figure(os.path.join(config.result_dir, 'figure03-style-mixing.png'), load_Gs(url_ffhq), w=1024, h=1024, src_seeds=[639,701,687,615,2268], dst_seeds=[888,829,1898,1733,1614,845], style_ranges=[range(0,4)]*3+[range(4,8)]*2+[range(8,18)])
    draw_noise_detail_figure(os.path.join(config.result_dir, 'figure04-noise-detail.png'), load_Gs(url_ffhq), w=1024, h=1024, num_samples=100, seeds=[1157,1012])
    draw_noise_components_figure(os.path.join(config.result_dir, 'figure05-noise-components.png'), load_Gs(url_ffhq), w=1024, h=1024, seeds=[1967,1555], noise_ranges=[range(0, 18), range(0, 0), range(8, 18), range(0, 8)], flips=[1])
    draw_truncation_trick_figure(os.path.join(config.result_dir, 'figure08-truncation-trick.png'), load_Gs(url_ffhq), w=1024, h=1024, seeds=[91,388], psis=[1, 0.7, 0.5, 0, -0.5, -1])
    draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure10-uncurated-bedrooms.png'), load_Gs(url_bedrooms), cx=0, cy=0, cw=256, ch=256, rows=5, lods=[0,0,1,1,2,2,2], seed=0)
    draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure11-uncurated-cars.png'), load_Gs(url_cars), cx=0, cy=64, cw=512, ch=384, rows=4, lods=[0,1,2,2,3,3], seed=2)
    draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure12-uncurated-cats.png'), load_Gs(url_cats), cx=0, cy=0, cw=256, ch=256, rows=5, lods=[0,0,1,1,2,2,2], seed=1)

figure02-uncurated-ffhq.png

首先我们来看看调用的第一个函数:

draw_uncurated_result_figure(os.path.join(config.result_dir, 'figure02-uncurated-ffhq.png'), load_Gs(url_ffhq), cx=0, cy=0, cw=1024, ch=1024, rows=3, lods=[0,1,2,2,3,3], seed=5)

注释如下:

# 把这些多分辨率的图片绘画出来
# Figures 2, 3, 10, 11, 12: Multi-resolution grid of uncurated result images.
def draw_uncurated_result_figure(png, Gs, cx, cy, cw, ch, rows, lods, seed):
    # 图片的路径
    print(png)
    # 首先设定一个随机种子,latents(81, 512),表明一共会生成81张图片,rows=3,代表每列都3的整数倍图片
    # 81张图片,包含了
    # (3,1024,1024)
    # (6,512,512)
    # (24,256,256)
    # (48,256,256)
    latents = np.random.RandomState(seed).randn(sum(rows * 2**lod for lod in lods), Gs.input_shape[1])

    # images(81, 1024, 1024, 3),这里说明,生成的图片,分辨率都是1024的
    images = Gs.run(latents, None, **synthesis_kwargs) # [seed, y, x, rgb]


    canvas = PIL.Image.new('RGB', (sum(cw // 2**lod for lod in lods), ch * rows), 'white')
    image_iter = iter(list(images))
    for col, lod in enumerate(lods):
        for row in range(rows * 2**lod):
            image = PIL.Image.fromarray(next(image_iter), 'RGB')
            image = image.crop((cx, cy, cx + cw, cy + ch))
            # 下采样
            image = image.resize((cw // 2**lod, ch // 2**lod), PIL.Image.ANTIALIAS)
            canvas.paste(image, (sum(cw // 2**lod for lod in lods[:col]), row * ch // 2**lod))
    canvas.save(png)

figure03-style-mixing.png

下面我们来看看图片融合,相信大家对这个还是十分的关注的:

draw_style_mixing_figure(os.path.join(config.result_dir, 'figure03-style-mixing.png'), load_Gs(url_ffhq), w=1024, h=1024, src_seeds=[639,701,687,615,2268], dst_seeds=[888,829,1898,1733,1614,845], style_ranges=[range(0,4)]*3+[range(4,8)]*2+[range(8,18)])
# Figure 3: Style mixing.
# 风格混合,也是图片合成
def draw_style_mixing_figure(png, Gs, w, h, src_seeds, dst_seeds, style_ranges):
    print(png)
    # 首先定义好种子,然后得到src与dst的latents
    src_latents = np.stack(np.random.RandomState(seed).randn(Gs.input_shape[1]) for seed in src_seeds)
    dst_latents = np.stack(np.random.RandomState(seed).randn(Gs.input_shape[1]) for seed in dst_seeds)

    # 然后通过Gs.components.mapping.run获得对应的w(5, 18, 512)向量
    src_dlatents = Gs.components.mapping.run(src_latents, None) # [seed, layer, component]
    # 然后通过Gs.components.mapping.run获得对应的w(6, 18, 512)向量
    dst_dlatents = Gs.components.mapping.run(dst_latents, None) # [seed, layer, component]

    # src_images(5, 1024, 1024, 3)
    src_images = Gs.components.synthesis.run(src_dlatents, randomize_noise=False, **synthesis_kwargs)
    # dst_images(6, 1024, 1024, 3)
    dst_images = Gs.components.synthesis.run(dst_dlatents, randomize_noise=False, **synthesis_kwargs)

    canvas = PIL.Image.new('RGB', (w * (len(src_seeds) + 1), h * (len(dst_seeds) + 1)), 'white')

    # 把原图(没有合成的src绘画完成),即最左边的图像
    for col, src_image in enumerate(list(src_images)):
        canvas.paste(PIL.Image.fromarray(src_image, 'RGB'), ((col + 1) * w, 0))

    # 绘画每一列的图像
    for row, dst_image in enumerate(list(dst_images)):
        canvas.paste(PIL.Image.fromarray(dst_image, 'RGB'), (0, (row + 1) * h))
        row_dlatents = np.stack([dst_dlatents[row]] * len(src_seeds))
        # 注意该处的关键,该处就是合并的要点。把row_dlatents的某些特征替换成src_dlatents的
        row_dlatents[:, style_ranges[row]] = src_dlatents[:, style_ranges[row]]
        # 获得合成之后的图片
        row_images = Gs.components.synthesis.run(row_dlatents, randomize_noise=False, **synthesis_kwargs)
        # 把该列绘画完成
        for col, image in enumerate(list(row_images)):
            canvas.paste(PIL.Image.fromarray(image, 'RGB'), ((col + 1) * w, (row + 1) * h))
    canvas.save(png)

注释还算过得去,别并且没有什么好讲解的。

figure04-noise-detail.png

下面的代码,是为了比较同噪音对图片细节的影响:

# Figure 4: Noise detail,画出不同噪音之间,对细节影响的对比
def draw_noise_detail_figure(png, Gs, w, h, num_samples, seeds):
    print(png)
    # 这里的len(seeds)==2,代表对比了两个人
    canvas = PIL.Image.new('RGB', (w * 3, h * len(seeds)), 'white')
    for row, seed in enumerate(seeds):
        # 根据确定的种子,生成latents(100, 512)
        latents = np.stack([np.random.RandomState(seed).randn(Gs.input_shape[1])] * num_samples)

        # 根据生成latents生成图片(100, 1024, 1024, 3),相当于吗每个人。生成了100张图片
        images = Gs.run(latents, None, truncation_psi=1, **synthesis_kwargs)
        print(images.shape)

        # 把图片格式转化为RGB
        canvas.paste(PIL.Image.fromarray(images[0], 'RGB'), (0, row * h))

        # 截取4张图片的是个角,然后缩放到原图的是四分之一大小,即长宽为原本的二分之一。
        for i in range(4):
            crop = PIL.Image.fromarray(images[i + 1], 'RGB')
            crop = crop.crop((650, 180, 906, 436))
            crop = crop.resize((w//2, h//2), PIL.Image.NEAREST)
            canvas.paste(crop, (w + (i%2) * w//2, row * h + (i//2) * h//2))
        # 对比标准差的不同
        diff = np.std(np.mean(images, axis=3), axis=0) * 4
        diff = np.clip(diff + 0.5, 0, 255).astype(np.uint8)
        canvas.paste(PIL.Image.fromarray(diff, 'L'), (w * 2, row * h))
    canvas.save(png)

figure08-truncation-trick.png

下面就是truncation-trick:

# Figure 8: Truncation trick.psis=[1, 0.7, 0.5, 0, -0.5, -1]
def draw_truncation_trick_figure(png, Gs, w, h, seeds, psis):

    print(png)
    latents = np.stack(np.random.RandomState(seed).randn(Gs.input_shape[1]) for seed in seeds)
    dlatents = Gs.components.mapping.run(latents, None) # [seed, layer, component]
    dlatent_avg = Gs.get_var('dlatent_avg') # [component]

    canvas = PIL.Image.new('RGB', (w * len(psis), h * len(seeds)), 'white')
    for row, dlatent in enumerate(list(dlatents)):
        # 核心重点
        row_dlatents = (dlatent[np.newaxis] - dlatent_avg) * np.reshape(psis, [-1, 1, 1]) + dlatent_avg
        row_images = Gs.components.synthesis.run(row_dlatents, randomize_noise=False, **synthesis_kwargs)
        for col, image in enumerate(list(row_images)):
            canvas.paste(PIL.Image.fromarray(image, 'RGB'), (col * w, row * h))
    canvas.save(png)

前面提到过,其主要核心是使用到了平均脸,当psis为0的时候,得到的是下图中:
风格迁移0-11:stylegan-源码无死角解读(7)-图片生成与融合_第1张图片
红框中的脸 ,也就是所谓的平均脸。

章节结语

我不知道后面自己会不会做图片融合的项目,如果后面做了,我想我会把stylegan好好的再梳理一遍,如果没有的话,估计这个算是完成篇,如果你从头看到了这里,那么请给我点赞吧,是的,我很需要你的支持,愿你年少有为不自卑,我觉得这是对年轻人最好的祝福了!那再见了,朋友!

你可能感兴趣的:(风格迁移)