tensorflow使用vgg19网络模型的神经网络风格迁移

tensorflow 安装

cuda安装

cuDNN安装

vgg19模型下载!

理解得快不快取决于参考的代码结构简不简单、清不清晰,这里拿Honlan大佬的项目内容来做参考,这个方向其实论文不少,比如这个。

开始读代码了!首先是加载vgg19的网络模型,关于模型结构的拆解可以参考这篇文章,模型结构的介绍文章更多,不举例了

import tensorflow as tf
import numpy as np
import scipy.io
import scipy.misc
import os
import time

def the_current_time():
	print(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))))

CONTENT_IMG = 'content.jpg'
STYLE_IMG = 'style5.jpg'
OUTPUT_DIR = 'neural_style_transfer_tensorflow/'

if not os.path.exists(OUTPUT_DIR):
	os.mkdir(OUTPUT_DIR)

IMAGE_W = 800
IMAGE_H = 600
COLOR_C = 3

NOISE_RATIO = 0.7
BETA = 5
ALPHA = 100
VGG_MODEL = 'imagenet-vgg-verydeep-19.mat'
MEAN_VALUES = np.array([123.68, 116.779, 103.939]).reshape((1, 1, 1, 3))

起手式,不用解释 

def load_vgg_model(path):
	vgg = scipy.io.loadmat(path)
	vgg_layers = vgg['layers']

	def _weights(layer, expected_layer_name):
		W = vgg_layers[0][layer][0][0][2][0][0]
		b = vgg_layers[0][layer][0][0][2][0][1]
		layer_name = vgg_layers[0][layer][0][0][0][0]
		assert layer_name == expected_layer_name
		return W, b

	def _conv2d_relu(prev_layer, layer, layer_name):
		W, b = _weights(layer, layer_name)
		W = tf.constant(W)
		b = tf.constant(np.reshape(b, (b.size)))
		return tf.nn.relu(tf.nn.conv2d(prev_layer, filter=W, strides=[1, 1, 1, 1], padding='SAME') + b)

	def _avgpool(prev_layer):
		return tf.nn.avg_pool(prev_layer, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')

	graph = {}
	graph['input']    = tf.Variable(np.zeros((1, IMAGE_H, IMAGE_W, COLOR_C)), dtype='float32')
	graph['conv1_1']  = _conv2d_relu(graph['input'], 0, 'conv1_1')
	# graph['conv1_2']  = _conv2d_relu(graph['conv1_1'], 2, 'conv1_2')
	graph['avgpool1'] = _avgpool(graph['conv1_1'])
	graph['conv2_1']  = _conv2d_relu(graph['avgpool1'], 5, 'conv2_1')
	# graph['conv2_2']  = _conv2d_relu(graph['conv2_1'], 7, 'conv2_2')
	graph['avgpool2'] = _avgpool(graph['conv2_1'])
	graph['conv3_1']  = _conv2d_relu(graph['avgpool2'], 10, 'conv3_1')
	graph['conv3_2']  = _conv2d_relu(graph['conv3_1'], 12, 'conv3_2')
	# graph['conv3_3']  = _conv2d_relu(graph['conv3_2'], 14, 'conv3_3')
	# graph['conv3_4']  = _conv2d_relu(graph['conv3_3'], 16, 'conv3_4')
	graph['avgpool3'] = _avgpool(graph['conv3_2'])
	graph['conv4_1']  = _conv2d_relu(graph['avgpool3'], 19, 'conv4_1')
	graph['conv4_2']  = _conv2d_relu(graph['conv4_1'], 21, 'conv4_2')
	# graph['conv4_3']  = _conv2d_relu(graph['conv4_2'], 23, 'conv4_3')
	# graph['conv4_4']  = _conv2d_relu(graph['conv4_3'], 25, 'conv4_4')
	graph['avgpool4'] = _avgpool(graph['conv4_2'])
	graph['conv5_1']  = _conv2d_relu(graph['avgpool4'], 28, 'conv5_1')
	graph['conv5_2']  = _conv2d_relu(graph['conv5_1'], 30, 'conv5_2')
	# graph['conv5_3']  = _conv2d_relu(graph['conv5_2'], 32, 'conv5_3')
	# graph['conv5_4']  = _conv2d_relu(graph['conv5_3'], 34, 'conv5_4')
	graph['avgpool5'] = _avgpool(graph['conv5_2'])
	return graph

这里和项目原来的代码不大一样,我去掉了8层卷积层,我也很绝望,捡垃圾捡来的1050只有2g显存,用完整个模型显存就爆了,这部分代码是把那个500+M的模型读回来。

def content_loss_func(sess, model):
	def _content_loss(p, x):
		N = p.shape[3]
		M = p.shape[1] * p.shape[2]
		return (1 / (4 * N * M)) * tf.reduce_sum(tf.pow(x - p, 2))
	return _content_loss(sess.run(model['conv4_2']), model['conv4_2'])

计算内容图片(content image)的损失函数,很好理解,其中 _content_loss(p, x) 就是损失函数的定义,论文里的内容,sess.run(model['conv4_2']) 这个返回经过 conv4_2 卷积层后的输出,根据 sess 指定的上下文决定计算的是什么图(一般是生成图片)的卷积层输出,而后一个参数 model['conv4_2'] 是指定的输入图片(content)的输出。

STYLE_LAYERS = [('conv1_1', 0.5), ('conv2_1', 1.0), ('conv3_1', 1.5), ('conv4_1', 3.0), ('conv5_1', 4.0)]

这里指定了计算style图片特征的五层网络和所对应的权重。

def style_loss_func(sess, model):
	def _gram_matrix(F, N, M):
		Ft = tf.reshape(F, (M, N))
		return tf.matmul(tf.transpose(Ft), Ft)

	def _style_loss(a, x):
		N = a.shape[3]
		M = a.shape[1] * a.shape[2]
		A = _gram_matrix(a, N, M)
		G = _gram_matrix(x, N, M)
		return (1 / (4 * N ** 2 * M ** 2)) * tf.reduce_sum(tf.pow(G - A, 2))

	return sum([_style_loss(sess.run(model[layer_name]), model[layer_name]) * w for layer_name, w in STYLE_LAYERS])

style图片的损失函数,但是这里使用的是Gram矩阵表示style图片的特征。关于Gram矩阵,这里就不解释了(我还没搞懂)。

 

def generate_noise_image(content_image, noise_ratio=NOISE_RATIO):
	noise_image = np.random.uniform(-20, 20, (1, IMAGE_H, IMAGE_W, COLOR_C)).astype('float32')
	input_image = noise_image * noise_ratio + content_image * (1 - noise_ratio)
	return input_image

输入图片加入噪声

def load_image(path):
	image = scipy.misc.imread(path)
	image = scipy.misc.imresize(image, (IMAGE_H, IMAGE_W))
	image = np.reshape(image, ((1, ) + image.shape))
	image = image - MEAN_VALUES
	return image

def save_image(path, image):
	image = image + MEAN_VALUES
	image = image[0]
	image = np.clip(image, 0, 255).astype('uint8')
	scipy.misc.imsave(path, image)

加载和保存图片,只是要注意一下tensorflow的shape表示方式和其他框架的不大一样,MEAN_VALUES 是 vgg19 模型里 'normalization' 的值,shape=(1,1,1,3)

with tf.Session() as sess:
	content_image = load_image(CONTENT_IMG)
	style_image = load_image(STYLE_IMG)
	model = load_vgg_model(VGG_MODEL)

	input_image = generate_noise_image(content_image)
	sess.run(tf.global_variables_initializer())

	sess.run(model['input'].assign(content_image))
	content_loss = content_loss_func(sess, model)

	sess.run(model['input'].assign(style_image))
	style_loss = style_loss_func(sess, model)

	total_loss = BETA * content_loss + ALPHA * style_loss
	optimizer = tf.train.AdamOptimizer(2.0)
	train = optimizer.minimize(total_loss)

	sess.run(tf.global_variables_initializer())
	sess.run(model['input'].assign(input_image))

	ITERATIONS = 2000
	for i in range(ITERATIONS):
		sess.run(train)
		if i % 100 == 0:
			output_image = sess.run(model['input'])
			the_current_time()
			print('Iteration %d' % i)
			print('Cost: ', sess.run(total_loss))

			save_image(os.path.join(OUTPUT_DIR, 'output_%d.jpg' % i), output_image)

这个有点长,慢慢来 

content_image = load_image(CONTENT_IMG)
style_image = load_image(STYLE_IMG)
model = load_vgg_model(VGG_MODEL)
input_image = generate_noise_image(content_image)
sess.run(tf.global_variables_initializer())

把需要的数据都加载一下,不过要注意的是tensorflow是先把图(Graph)搭好,然后放到会话(Session)里,最后run(),但是代码并不是按代码顺序执行,像这里,调用了 tf.global_variables_initializer() 之后,图片、模型的变量才会执行初始化。

sess.run(model['input'].assign(content_image))
content_loss = content_loss_func(sess, model)

将内容图片(content image)丢进模型里,计算损失函数

sess.run(model['input'].assign(style_image))
style_loss = style_loss_func(sess, model)

同理,计算风格图片(style image)的损失函数

total_loss = BETA * content_loss + ALPHA * style_loss

定义两个图片的损失函数对总损失函数的影响

optimizer = tf.train.AdamOptimizer(2.0)
train = optimizer.minimize(total_loss)

定义优化函数

sess.run(tf.global_variables_initializer())
sess.run(model['input'].assign(input_image))

把总损失函数和优化器初始化,把加了噪声的内容图片丢进模型作为输入

ITERATIONS = 2000
	for i in range(ITERATIONS):
		sess.run(train)
		if i % 100 == 0:
			output_image = sess.run(model['input'])
			the_current_time()
			print('Iteration %d' % i)
			print('Cost: ', sess.run(total_loss))

			save_image(os.path.join(OUTPUT_DIR, 'output_%d.jpg' % i), output_image)

迭代2000次,每次都做一次学习,每一次的结果都作为下一次的输入,优化器会不断更新数据(直接写进原来的变量中)。

大概原理感觉不难理解,但是搞出这个论文的人真的厉害。

另外,tensorflow这个框架也是厉害,画个图,搭起来,run,简单无脑,一气呵成。

 

你可能感兴趣的:(深度学习)