from __future__ import print_function
import os
from io import BytesIO
import numpy as np
from functools import partial
import PIL.Image
import scipy.misc
import tensorflow as tf
'''创建图和会话'''
graph = tf.Graph()
sess = tf.InteractiveSession(graph=graph)
'''导入模型'''
model_fn = 'tensorflow_inception_graph.pb'#导入Inception网络
# tensorflow_inception_graph.pb文件的下载:
# https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip
#with tf.gfile.FastGFile(model_fn, 'rb') as f:
with tf.gfile.GFile(model_fn, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
使用的图像数据格式通常是(height,width,channel),只能表示一张图像; 而Inception模型要求的输入格式却是(batch,height, width, channel),即同时将多张图像送入网络
tf.expand_dims(input, dim, name=None)
# 定义输入图像的占位符
t_input = tf.placeholder(np.float32, name='input')
#图像预处理——减均值
imagenet_mean = 117.0 #在训练Inception模型时做了减均值预处理,此处也需减同样的均值以保持一致
#图像预处理——增加维度
# 图像数据格式一般是(height,width,channels),为同时将多张图片输入网络而在前面增加一维
# 变为(batch,height,width,channel)
t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)
# 导入模型并将经预处理的图像送入网络中
tf.import_graph_def(graph_def, {'input': t_preprocessed})
layers = [op.name for op in graph.get_operations() if op.type == 'Conv2D']
# 输出卷积层层数
print('Number of layers', len(layers))
# 输出所有卷积层名称
print(layers)
# 还可输出指定卷积层的参数
name1 = 'mixed4d_3x3_bottleneck_pre_relu'
print('shape of %s: %s' % (name1, str(graph.get_tensor_by_name('import/' + name1 + ':0').get_shape())))
name2 = 'mixed4e_5x5_bottleneck_pre_relu'
print('shape of %s: %s' % (name2, str(graph.get_tensor_by_name('import/' + name2 + ':0').get_shape())))
# 把一个numpy.ndarray保存成图像文件
def savearray(img_array, img_name):
scipy.misc.toimage(img_array).save(img_name)
print('img saved: %s' % img_name)
# 渲染函数
def render_naive(t_obj, img0, iter_n=20, step=1.0):
# t_obj:是layer_output[:, :, :, channel],即卷积层某个通道的值
# img0:初始图像(噪声图像)
# iter_n:迭代次数
# step:用于控制每次迭代步长,可以看作学习率
t_score = tf.reduce_mean(t_obj)
# t_score是t_obj的平均值
# 由于我们的目标是调整输入图像使卷积层激活值尽可能大
# 即最大化t_score
# 为达到此目标,可使用梯度下降
# 计算t_score对t_input的梯度
t_grad = tf.gradients(t_score, t_input)[0]
img = img0.copy()#复制新图像可避免影响原图像的值
for i in range(iter_n):
# 在sess中计算梯度,以及当前的t_score
g, score = sess.run([t_grad, t_score], {t_input: img})
# 对img应用梯度
# 首先对梯度进行归一化处理
g /= g.std() + 1e-8
# 将正规化处理后的梯度应用在图像上,step用于控制每次迭代步长,此处为1.0
img += g * step
#print('score(mean)=%f' % (score))
print('iter:%d' %(i+1), 'score(mean)=%f' % score)
# 保存图片
savearray(img, 'naive_deepdream.jpg')
它的核心就在于渲染函数。
# 定义卷积层、通道数,并取出对应的tensor
name = 'mixed4d_3x3_bottleneck_pre_relu'# (?, ?, ?, 144)
channel = 139
# 'mixed4d_3x3_bottleneck_pre_relu'共144个通道
# 此处可选任意通道(0~143之间任意整数)进行最大化
layer_output = graph.get_tensor_by_name("import/%s:0" % name)
# layer_output[:, :, :, channel]即可表示该卷积层的第140个通道
# 定义图像噪声
img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
# 调用render_naive函数渲染
render_naive(layer_output[:, :, :, channel], img_noise, iter_n=20)
# 保存并显示图片
im = PIL.Image.open('naive_deepdream.jpg')
im.show()
im.save('naive_single_chn.jpg')
# 定义卷积层、通道数,并取出对应的tensor
name3 = 'mixed3a_3x3_bottleneck_pre_relu'
layer_output = graph.get_tensor_by_name("import/%s:0" % name3)
print('shape of %s: %s' % (name3, str(graph.get_tensor_by_name('import/' + name3 + ':0').get_shape())))
# 定义噪声图像
img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
# 调用render_naive函数渲染
channel = 86 # (?, ?, ?, 96)
render_naive(layer_output[:, :, :, channel], img_noise, iter_n=20)
# 保存并显示图片
im = PIL.Image.open('naive_deepdream.jpg')
im.show()
im.save('shallow_single_chn.jpg')
除了对较低层的卷积特征进行可视化,若还想知道高层的卷积特征是什么样的,也可以通过同样的方法对它进行可视化。
# 定义卷积层、通道数,并取出对应的tensor
name4 = 'mixed5b_5x5_pre_relu'
layer_output = graph.get_tensor_by_name("import/%s:0" % name4)
print('shape of %s: %s' % (name4, str(graph.get_tensor_by_name('import/' + name4 + ':0').get_shape())))
# 定义噪声图像
img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
# 调用render_naive函数渲染
channel =118 # (?, ?, ?, 128)
render_naive(layer_output[:, :, :, channel], img_noise, iter_n=20)
# 保存并显示图片
im = PIL.Image.open('naive_deepdream.jpg')
im.show()
im.save('deep_single_chn.jpg')
# 定义卷积层、通道数,并取出对应的tensor
name1 = 'mixed4d_3x3_bottleneck_pre_relu' #(?, ?, ?, 144)
name2= 'mixed4e_5x5_bottleneck_pre_relu' # (?, ?, ?, 32)
channel1 = 139 #因为共144通道,此处可选择0~143之间任意整数
channel2 = 28 # 因为共32通道,此处可选择0~31之间任意整数
layer_output1= graph.get_tensor_by_name("import/%s:0" % name1)
layer_output2= graph.get_tensor_by_name("import/%s:0" % name2)
# 定义噪声图像
img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
# 调用render_naive函数渲染
render_naive(layer_output1[:, :, :, channel1]+layer_output2[:, :, :, channel2], img_noise, iter_n=20)
# 保存并显示图片
im = PIL.Image.open('naive_deepdream.jpg')
im.show()
im.save('multi_chn.jpg')
# 定义卷积层,并取出对应的tensor
name = 'mixed4d_3x3_bottleneck_pre_relu'
layer_output= graph.get_tensor_by_name("import/%s:0" % name)
# 定义噪声图像
img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
# 调用render_naive函数渲染
render_naive(layer_output, img_noise, iter_n=20) # 不指定特定通道,即表示利用所有通道特征
# 单通道时:layer_output[:, :, :, channel]
# 保存并显示图片
im = PIL.Image.open('naive_deepdream.jpg')
#im = PIL.Image.open('deepdream.jpg')
im.show()
im.save('all_chn.jpg')
from __future__ import print_function
import os
from io import BytesIO
import numpy as np
from functools import partial
import PIL.Image
import scipy.misc
import tensorflow as tf
graph = tf.Graph()
model_fn = 'tensorflow_inception_graph.pb'
sess = tf.InteractiveSession(graph=graph)
with tf.gfile.FastGFile(model_fn, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
t_input = tf.placeholder(np.float32, name='input')
imagenet_mean = 117.0
t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)
tf.import_graph_def(graph_def, {'input': t_preprocessed})
# 保存图像
def savearray(img_array, img_name):
scipy.misc.toimage(img_array).save(img_name)
print('img saved: %s' % img_name)
# 将图像放大ratio倍
def resize_ratio(img, ratio):
min = img.min()
max = img.max()
img = (img - min) / (max - min) * 255
img = np.float32(scipy.misc.imresize(img, ratio))
img = img / 255 * (max - min) + min
return img
# 调整图像尺寸
def resize(img, hw):
min = img.min()
max = img.max()
img = (img - min) / (max - min) * 255
img = np.float32(scipy.misc.imresize(img, hw))
img = img / 255 * (max - min) + min
return img
# 原始图像尺寸可能很大,从而导致内存耗尽问题
# 每次只对 tile_size * tile_size 大小的图像计算梯度,避免内存问题
def calc_grad_tiled(img, t_grad, tile_size=512):
sz = tile_size
h, w = img.shape[:2]
sx, sy = np.random.randint(sz, size=2)
img_shift = np.roll(np.roll(img, sx, 1), sy, 0) # 先在行上做整体移动,再在列上做整体移动
grad = np.zeros_like(img)
for y in range(0, max(h - sz // 2, sz), sz):
for x in range(0, max(w - sz // 2, sz), sz):
sub = img_shift[y:y + sz, x:x + sz]
g = sess.run(t_grad, {t_input: sub})
grad[y:y + sz, x:x + sz] = g
return np.roll(np.roll(grad, -sx, 1), -sy, 0)
def render_deepdream(t_obj, img0,
iter_n=10, step=1.5, octave_n=4, octave_scale=1.4):
t_score = tf.reduce_mean(t_obj)
t_grad = tf.gradients(t_score, t_input)[0]
img = img0.copy()
# 将图像进行金字塔分解
# 从而分为高频、低频部分
octaves = []
for i in range(octave_n - 1):
hw = img.shape[:2]
lo = resize(img, np.int32(np.float32(hw) / octave_scale))
hi = img - resize(lo, hw)
img = lo
octaves.append(hi)
# 首先生成低频的图像,再依次放大并加上高频
for octave in range(octave_n):
if octave > 0:
hi = octaves[-octave]
img = resize(img, hi.shape[:2]) + hi
for i in range(iter_n):
g = calc_grad_tiled(img, t_grad)
img += g * (step / (np.abs(g).mean() + 1e-7))
img = img.clip(0, 255)
savearray(img, 'mountain_deepdream.jpg')
im = PIL.Image.open('mountain_deepdream.jpg').show()
name = 'mixed4c'
layer_output = graph.get_tensor_by_name("import/%s:0" % name)
img0 = PIL.Image.open('cat.png') # 自己找一张图片
img0 = np.float32(img0)
render_deepdream(tf.square(layer_output), img0)
我找了两张图片测试了一下。
下图的原图是WLOP画的K/DA阿卡丽,我可太爱了哈哈哈,渲染后倒是有点克苏鲁,当然还可以再调试一下背景图像、通道数、卷积层得到各种各样的生成图像。