一、论文参考
这里使用的方法主要参考《A Neural Algorithm of Artistic Style》这篇论文。
简单来说,神经网络的低级层提取的是更为底层的信息,比如直线,拐角等,高级层提取的是更为复杂的内容,比如语义上的信息(图片里的是一只猫还是一条狗),将两者结合就可以将一张图片的风格迁移到另一张图片上。
具体内容可以参考论文。
这里使用VGG19模型:预训练模型
代码如下(大多我都加了注释):
import tensorflow as tf
import numpy as np
import os
import cv2
IMAGE_W = 600
IMAGE_H = 600
Ratio = None
# The picture needed converting
CONTENT_IMG = './images/Taipei101.jpg'
# The style picture
STYLE_IMG = './images/StarryNight.jpg'
# The noise ratio, which is used to generate the initial picture
INI_NOISE_RATIO = 0.7
# The balance parameter, which is used to to balance the weights of style loss and content loss
STYLE_STRENGTH = 500
# How many iterations you want to run
ITERATION = 2000
# Which layers of the content network you want to use
CONTENT_LAYERS = [('conv4_2',1.)]
# Which layers of the style network you want to use
STYLE_LAYERS = [('conv1_1',2.),('conv2_1',1.),('conv3_1',0.5),('conv4_1',0.25),('conv5_1',0.125)]
# All the layers
layers = ['conv1_1', 'conv1_2', 'conv2_1', 'conv2_2', 'conv3_1', 'conv3_2', 'conv3_3', 'conv3_4',
'conv4_1', 'conv4_2', 'conv4_3', 'conv4_4', 'conv5_1', 'conv5_2', 'conv5_3', 'conv5_4', ]
def vgg19(input, model_path=None):
'''
Build the VGG19 network, which is initialized with the pre-trained VGG19 model.
:param input: The input image.
:param model_path:Which path the VGG19 model is stored.
:return:A python dict, which contains all the layers needed.
'''
if model_path is None:
model_path = 'vgg19.npy'
if os.path.isfile(model_path) is False:
raise FileNotFoundError('vgg19.npy cannot be found!!!')
wDict = np.load(model_path, encoding="bytes").item()
net = {}
net['input'] = input
# conv1_1
weight1_1 = tf.Variable(wDict['conv1_1'][0], trainable=False)
bias1_1 = tf.Variable(wDict['conv1_1'][1], trainable=False)
net['conv1_1'] = tf.nn.relu(tf.nn.conv2d(net['input'], weight1_1, [1, 1, 1, 1], 'SAME') + bias1_1)
# conv1_2
weight1_2 = tf.Variable(wDict['conv1_2'][0], trainable=False)
bias1_2 = tf.Variable(wDict['conv1_2'][1], trainable=False)
net['conv1_2'] = tf.nn.relu(tf.nn.conv2d(net['conv1_1'], weight1_2, [1, 1, 1, 1], 'SAME') + bias1_2)
# pool1
net['pool1'] = tf.nn.avg_pool(net['conv1_2'], [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
# conv2_1
weight2_1 = tf.Variable(wDict['conv2_1'][0], trainable=False)
bias2_1 = tf.Variable(wDict['conv2_2'][1], trainable=False)
net['conv2_1'] = tf.nn.relu(tf.nn.conv2d(net['pool1'], weight2_1, [1, 1, 1, 1], 'SAME') + bias2_1)
# conv2_2
weight2_2 = tf.Variable(wDict['conv2_2'][0], trainable=False)
bias2_2 = tf.Variable(wDict['conv2_2'][1], trainable=False)
net['conv2_2'] = tf.nn.relu(tf.nn.conv2d(net['conv2_1'], weight2_2, [1, 1, 1, 1], 'SAME') + bias2_2)
# pool2
net['pool2'] = tf.nn.avg_pool(net['conv2_2'], [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
# conv3_1
weight3_1 = tf.Variable(wDict['conv3_1'][0], trainable=False)
bias3_1 = tf.Variable(wDict['conv3_1'][1], trainable=False)
net['conv3_1'] = tf.nn.relu(tf.nn.conv2d(net['pool2'], weight3_1, [1, 1, 1, 1], 'SAME') + bias3_1)
# conv3_2
weight3_2 = tf.Variable(wDict['conv3_2'][0], trainable=False)
bias3_2 = tf.Variable(wDict['conv3_2'][1], trainable=False)
net['conv3_2'] = tf.nn.relu(tf.nn.conv2d(net['conv3_1'], weight3_2, [1, 1, 1, 1], 'SAME') + bias3_2)
# conv3_3
weight3_3 = tf.Variable(wDict['conv3_3'][0], trainable=False)
bias3_3 = tf.Variable(wDict['conv3_3'][1], trainable=False)
net['conv3_3'] = tf.nn.relu(tf.nn.conv2d(net['conv3_2'], weight3_3, [1, 1, 1, 1], 'SAME') + bias3_3)
# conv3_4
weight3_4 = tf.Variable(wDict['conv3_4'][0], trainable=False)
bias3_4 = tf.Variable(wDict['conv3_4'][1], trainable=False)
net['conv3_4'] = tf.nn.relu(tf.nn.conv2d(net['conv3_3'], weight3_4, [1, 1, 1, 1], 'SAME') + bias3_4)
# pool3
net['pool3'] = tf.nn.avg_pool(net['conv3_4'], [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
# conv4_1
weight4_1 = tf.Variable(wDict['conv4_1'][0], trainable=False)
bias4_1 = tf.Variable(wDict['conv4_1'][1], trainable=False)
net['conv4_1'] = tf.nn.relu(tf.nn.conv2d(net['pool3'], weight4_1, [1, 1, 1, 1], 'SAME') + bias4_1)
# conv4_2
weight4_2 = tf.Variable(wDict['conv4_2'][0], trainable=False)
bias4_2 = tf.Variable(wDict['conv4_2'][1], trainable=False)
net['conv4_2'] = tf.nn.relu(tf.nn.conv2d(net['conv4_1'], weight4_2, [1, 1, 1, 1], 'SAME') + bias4_2)
# conv4_3
weight4_3 = tf.Variable(wDict['conv4_3'][0], trainable=False)
bias4_3 = tf.Variable(wDict['conv4_3'][1], trainable=False)
net['conv4_3'] = tf.nn.relu(tf.nn.conv2d(net['conv4_2'], weight4_3, [1, 1, 1, 1], 'SAME') + bias4_3)
# conv4_4
weight4_4 = tf.Variable(wDict['conv4_4'][0], trainable=False)
bias4_4 = tf.Variable(wDict['conv4_4'][1], trainable=False)
net['conv4_4'] = tf.nn.relu(tf.nn.conv2d(net['conv4_3'], weight4_4, [1, 1, 1, 1], 'SAME') + bias4_4)
# pool4
net['pool4'] = tf.nn.avg_pool(net['conv4_4'], [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
# conv5_1
weight5_1 = tf.Variable(wDict['conv5_1'][0], trainable=False)
bias5_1 = tf.Variable(wDict['conv5_1'][1], trainable=False)
net['conv5_1'] = tf.nn.relu(tf.nn.conv2d(net['pool4'], weight5_1, [1, 1, 1, 1], 'SAME') + bias5_1)
# conv5_2
weight5_2 = tf.Variable(wDict['conv5_2'][0], trainable=False)
bias5_2 = tf.Variable(wDict['conv5_2'][1], trainable=False)
net['conv5_2'] = tf.nn.relu(tf.nn.conv2d(net['conv5_1'], weight5_2, [1, 1, 1, 1], 'SAME') + bias5_2)
# conv5_3
weight5_3 = tf.Variable(wDict['conv5_3'][0], trainable=False)
bias5_3 = tf.Variable(wDict['conv5_3'][1], trainable=False)
net['conv5_3'] = tf.nn.relu(tf.nn.conv2d(net['conv5_2'], weight5_3, [1, 1, 1, 1], 'SAME') + bias5_3)
# conv5_4
weight5_4 = tf.Variable(wDict['conv5_4'][0], trainable=False)
bias5_4 = tf.Variable(wDict['conv5_4'][1], trainable=False)
net['conv5_4'] = tf.nn.relu(tf.nn.conv2d(net['conv5_3'], weight5_4, [1, 1, 1, 1], 'SAME') + bias5_4)
# pool5
net['pool5'] = tf.nn.avg_pool(net['conv5_4'], [1, 2, 2, 1], [1, 2, 2, 1], 'SAME')
return net
def gram_matrix(tensor, length, depth):
'''
:param tensor:The tensor you need to convert, which could be a numpy array or a TensorFlow tensor.
:param length:The length you need to convert to.
:param depth:The depth you need to convert to.
:return:A tensor.
'''
tensor = tf.reshape(tensor, (length, depth))
return tf.matmul(tf.transpose(tensor), tensor)
pass
def build_content_loss(combination, content):
'''
:param combination:The network which is the combination of the style network and content network,
that is the style-transfered network
:param content:The network of the content image
:return:The loss between the combination and the content
'''
content_sum = 0.0
for i, l in enumerate(CONTENT_LAYERS):
shape = combination[l[0]].get_shape()
M = shape[1].value * shape[2].value
N = shape[3].value
content_sum += l[1] * 0.25/(M ** 0.5 + N ** 0.5) * tf.reduce_sum(tf.pow(combination[l[0]] - content[l[0]], 2))
return content_sum
pass
def build_style_loss(combination, style):
'''
:param combination: The network which is the combination of the style network and content network,
that is the style-transfered network
:param style: The network of the style image
:return: The loss between the combination and the style
'''
style_sum = 0.0
for i, l in enumerate(STYLE_LAYERS):
shape = combination[l[0]].get_shape()
M = shape[1].value * shape[2].value
N = shape[3].value
para1 = combination[l[0]]
para2 = style[l[0]]
sub = gram_matrix(para1, M, N) - gram_matrix(para2, M, N)
sum = tf.reduce_sum(tf.pow(sub, 2))
pre = l[1] * 1.0 / (4 * N ** 2 * M ** 2)
style_sum += tf.multiply(pre, sum)
return style_sum
pass
def main():
# Define a placeholder
myinput = tf.placeholder(dtype=tf.float32, shape=[1, IMAGE_H, IMAGE_W, 3])
# Read the style image
raw_styleimg = cv2.imread(STYLE_IMG)
raw_styleimg = cv2.resize(raw_styleimg, (IMAGE_H, IMAGE_W))
styleimg = np.expand_dims(raw_styleimg, 0)
# The normalization method of th style image.
# Actually, I have tried many methods, and this one is the most useful and powerful.
styleimg[0][0] -= 123
styleimg[0][1] -= 117
styleimg[0][2] -= 104
styleimg = tf.Variable(styleimg, dtype=tf.float32, trainable=False)
raw_contentimg = cv2.imread(CONTENT_IMG)
# Store the ratio of the content image.
Ratio = raw_contentimg.shape
raw_contentimg = cv2.resize(raw_contentimg, (IMAGE_H, IMAGE_W))
contentimg = np.expand_dims(raw_contentimg, 0)
contentimg[0][0] -= 123
contentimg[0][1] -= 117
contentimg[0][2] -= 104
contentimg = tf.Variable(contentimg, dtype=tf.float32, trainable=False)
# The combination image, which is consisted of noise image and content image.
combination = INI_NOISE_RATIO*np.random.uniform(-20, 20, (1, IMAGE_H, IMAGE_W, 3)).astype('float32') \
+ \
(1.-INI_NOISE_RATIO) * contentimg
combination = tf.Variable(combination, dtype=tf.float32, trainable=True)
# Build all the networks
stylenet = vgg19(myinput * styleimg)
contentnet = vgg19(myinput * contentimg)
combinationnet = vgg19(myinput * combination)
# Define the loss function
loss = 500 * build_style_loss(combinationnet, stylenet) + build_content_loss(combinationnet, contentnet)
# Here, AdamOptimizer is used, and the learning rate is 2.0
train = tf.train.AdamOptimizer(2).minimize(loss)
# Input Image, consisted of 1s.
img = np.ones(dtype=np.float32, shape=[1, IMAGE_H, IMAGE_W, 3])
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
for i in range(ITERATION):
print(sess.run(loss, feed_dict={myinput: img}))
sess.run(train, feed_dict={myinput: img})
# Actually, the COMBINATION is the final output.
pic = sess.run(combination, feed_dict={myinput: img})[0]
pic[0] += 123
pic[1] += 117
pic[2] += 104
cv2.imwrite('results/%d.jpg' % i, cv2.resize(pic, (Ratio[1], Ratio[0])))
if __name__ == '__main__':
main()