完成了工程设计和部分代码(model.py)的实现
工程项目文件说明:
loss_model_ckpt文件夹是存放loss_model的ckpt文件的,具体来说就是VGG_16.ckpt文件
nets是存放vgg的网络模型的
preprocessing是存放输入到vgg前需要进行的图像预处理代码
resImg存放结果图片,也就是转换完成的Image
srcImg存放原始图片,也就是需要转换的Image
styleImg存放风格图片,也就是需要训练的风格模型的图片
train2014存放MS COCO 2014数据集
transfer_model_ckpt文件夹保存训练风格转换模型的ckpt文件
eval_model是验证或者使用Model的代码文件
model用来定义模型结构
train_model是用来训练model的代码文件,是最主要的代码
我们根据论文中实验使用的风格图片,和网上找来的一些风格图片,放到styleImg文件夹下:
图片大小不一样,所以需要一个参数image_size来规范图片大小,另外需要输入模型之前进行预处理
最简单的代码部分就是定义Model了,直接根据论文中给出的model结构写代码就可以了:
代码及注释如下:
# coding: utf-8
"""
定义生成图片的net,不包括后面的VGG
"""
import tensorflow as tf
import numpy as np
def conv2d(input,filter_size,in_channels, out_channels,strides,padding='VALID'):
"""
封装卷积操作
N = (W − F + 2P )/S+1
:param input:输入的tensor
:param filter_size:卷积核大小
:param in_channels:input的通道数
:param out_channels:输出的fm个数
:param strides:卷积核移动步长
:param padding:填充方式 默认为VALID
:return:卷积后的tensor
"""
with tf.variable_scope("conv2d"):
filter=tf.Variable(tf.truncated_normal([filter_size,filter_size,in_channels, out_channels],stddev=0.1),name='weight')
#填充大小/2可以保持卷积后输出与输入大小相同
input_pad=tf.pad(input,[[0, 0], [int(filter_size / 2), int(filter_size / 2)], [int(filter_size / 2), int(filter_size / 2)], [0, 0]], mode='REFLECT')
return tf.nn.conv2d(input_pad, filter,strides=[0,strides,strides,0], padding=padding,name='conv')
def instance_norm(input):
"""
IN层,代替BN层,原理见博客
:param input:单张图片
:return:归一化后的image tensor
"""
e=1e-9
#防除以0
mean,v=tf.nn.moments(input,[1,2],keep_dims=True)
return tf.div(tf.subtract(input,mean),tf.sqrt(tf.add(v,e)))
def relu(input):
"""
激活函数relu层
:param input:tensor
:return: 函数值
"""
y=tf.nn.relu(input)
return tf.where(tf.equal(input,input),input,tf.zeros_like(input))
def residual(input,filter_size,in_channels, out_channels,strides=1):
"""
残差模块
:param input:
:param filter_size:3*3
:param in_channels: 128
:param out_channels:128
:param strides:1
:return:
"""
with tf.variable_scope("res"):
conv1=conv2d(input,filter_size,in_channels, out_channels,strides)
conv2=conv2d(relu(conv1),filter_size,in_channels,out_channels,strides)
res=conv2+input
return res
def deconv2d(input,filter_size,in_channels, out_channels,strides,isTraining):
"""
反卷积,用于上采样
原理是先放大,再卷积
:param input:
:param filter_size:
:param in_channels:
:param out_channels:
:param strides:
:param isTraining:
:return:
"""
height=0
width=0
with tf.variable_scope("deconv2d"):
if isTraining:
height=input.get_shape()[1].value
width =input.get_shape()[2].value
else:
height=tf.shape(input)[1]
width =tf.shape(input)[2]
new_height = height * strides * 2
new_width = width * strides * 2
input_resized = tf.image.resize_images(input, [new_height, new_width], tf.image.ResizeMethod.NEAREST_NEIGHBOR)
return conv2d(input_resized,filter_size,in_channels, out_channels,strides)
def base_net(img,isTraining):
"""
转换风格的网络模型
架构根据论文编写
:param img:
:param isTraining:
:return:
"""
#做pad是为了降低边界影响
image = tf.pad(img, [[0, 0], [10, 10], [10, 10], [0, 0]], mode='REFLECT')
#卷积
with tf.variable_scope("conv"):
conv1=relu(instance_norm(conv2d(input=img,filter_size=9,in_channels=3,out_channels=32,strides=1)))
conv2 = relu(instance_norm(conv2d(conv1, 3, 32, 64, 2)))
conv3 = relu(instance_norm(conv2d(conv2, 3, 64, 128, 2)))
with tf.variable_scope("res"):
res1 = residual(conv3,filter_size=3,in_channels=128, out_channels=128)
res2 = residual(res1, 3, 128, 128)
res3 = residual(res2, 3, 128, 128)
res4 = residual(res3, 3, 128, 128)
res5 = residual(res4, 3, 128, 128)
with tf.variable_scope('deconv'):
deconv1 = relu(instance_norm(deconv2d(res5,3,128, 64,2,isTraining)))
deconv2 = relu(instance_norm(deconv2d(deconv1, 3, 64, 32, 2, isTraining)))
deconv3 = tf.nn.tanh(instance_norm(conv2d(deconv2, 9, 32, 3, 1)))
#tanh输出的取值范围在[-1,1]
y = (deconv3 + 1) * 127.5
#去掉pad
height = tf.shape(y)[1]
width = tf.shape(y)[2]
y = tf.slice(y, [0, 10, 10, 0], tf.stack([-1, height - 20, width - 20, -1]))
return y