最近在GitHub上看代码偶然发现了使输入经过网络传输后的输出,即“embedding”可视化的小细节,在此写下来加深记忆:
Git原链接:https://github.com/ywpkwon/siamese_tf_mnist
首先是创建网络(Siamese 网络):
import tensorflow as tf
class siamese:
# Create model
def __init__(self):
self.x1 = tf.placeholder(tf.float32, [None, 784])
self.x2 = tf.placeholder(tf.float32, [None, 784])
with tf.variable_scope("siamese") as scope:
self.o1 = self.network(self.x1)
scope.reuse_variables()
self.o2 = self.network(self.x2)
# Create loss
self.y_ = tf.placeholder(tf.float32, [None])
self.loss = self.loss_with_spring()
def network(self, x):
weights = []
fc1 = self.fc_layer(x, 1024, "fc1")
ac1 = tf.nn.relu(fc1)
fc2 = self.fc_layer(ac1, 1024, "fc2")
ac2 = tf.nn.relu(fc2)
fc3 = self.fc_layer(ac2, 2, "fc3")
return fc3
def fc_layer(self, bottom, n_weight, name):
assert len(bottom.get_shape()) == 2
n_prev_weight = bottom.get_shape()[1]
initer = tf.truncated_normal_initializer(stddev=0.01)
W = tf.get_variable(name+'W', dtype=tf.float32, shape=[n_prev_weight, n_weight], initializer=initer)
b = tf.get_variable(name+'b', dtype=tf.float32, initializer=tf.constant(0.01, shape=[n_weight], dtype=tf.float32))
fc = tf.nn.bias_add(tf.matmul(bottom, W), b)
return fc
def loss_with_spring(self):
margin = 5.0
labels_t = self.y_
labels_f = tf.subtract(1.0, self.y_, name="1-yi") # labels_ = !labels;
eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
eucd2 = tf.reduce_sum(eucd2, 1)
eucd = tf.sqrt(eucd2+1e-6, name="eucd")
C = tf.constant(margin, name="C")
# yi*||CNN(p1i)-CNN(p2i)||^2 + (1-yi)*max(0, C-||CNN(p1i)-CNN(p2i)||^2)
pos = tf.multiply(labels_t, eucd2, name="yi_x_eucd2")
# neg = tf.multiply(labels_f, tf.subtract(0.0,eucd2), name="yi_x_eucd2")
# neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C,eucd2)), name="Nyi_x_C-eucd_xx_2")
neg = tf.multiply(labels_f, tf.pow(tf.maximum(tf.subtract(C, eucd), 0), 2), name="Nyi_x_C-eucd_xx_2")
losses = tf.add(pos, neg, name="losses")
loss = tf.reduce_mean(losses, name="loss")
return loss
def loss_with_step(self):
margin = 5.0
labels_t = self.y_
labels_f = tf.subtract(1.0, self.y_, name="1-yi") # labels_ = !labels;
eucd2 = tf.pow(tf.subtract(self.o1, self.o2), 2)
eucd2 = tf.reduce_sum(eucd2, 1)
eucd = tf.sqrt(eucd2+1e-6, name="eucd")
C = tf.constant(margin, name="C")
pos = tf.multiply(labels_t, eucd, name="y_x_eucd")
neg = tf.multiply(labels_f, tf.maximum(0.0, tf.subtract(C, eucd)), name="Ny_C-eucd")
losses = tf.add(pos, neg, name="losses")
loss = tf.reduce_mean(losses, name="loss")
return loss
作者定义了Siamese类,要注意的是当要调用的时候通过一个类的实例来调用,在构造函数中有sefl.o1 = network(self.x1),如后面run.py中的siamese.o1:
""" Siamese implementation using Tensorflow with MNIST example.
This siamese network embeds a 28x28 image (a point in 784D)
into a point in 2D.
By Youngwook Paul Kwon (young at berkeley.edu)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from builtins import input
#import system things
from tensorflow.examples.tutorials.mnist import input_data # for data
import tensorflow as tf
import numpy as np
import os
#import helpers
import inference
import visualize
# prepare data and tf.session
mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
sess = tf.InteractiveSession()
# setup siamese network
siamese = inference.siamese();
train_step = tf.train.GradientDescentOptimizer(0.01).minimize(siamese.loss)
saver = tf.train.Saver()
tf.initialize_all_variables().run()
# if you just want to load a previously trainmodel?
load = False
model_ckpt = './model.meta'
if os.path.isfile(model_ckpt):
input_var = None
while input_var not in ['yes', 'no']:
input_var = input("We found model files. Do you want to load it and continue training [yes/no]?")
if input_var == 'yes':
load = True
# start training
if load: saver.restore(sess, './model')
for step in range(50000):
batch_x1, batch_y1 = mnist.train.next_batch(128)
batch_x2, batch_y2 = mnist.train.next_batch(128)
batch_y = (batch_y1 == batch_y2).astype('float')
_, loss_v = sess.run([train_step, siamese.loss], feed_dict={
siamese.x1: batch_x1,
siamese.x2: batch_x2,
siamese.y_: batch_y})
if np.isnan(loss_v):
print('Model diverged with loss = NaN')
quit()
if step % 10 == 0:
print ('step %d: loss %.3f' % (step, loss_v))
if step % 1000 == 0 and step > 0:
saver.save(sess, './model')
embed = siamese.o1.eval({siamese.x1: mnist.test.images})
embed.tofile('embed.txt')
# visualize result
x_test = mnist.test.images.reshape([-1, 28, 28])
y_test = mnist.test.labels
visualize.visualize(embed, x_test, y_test)
在这里我们可以看到经过网络传输的是siamese.o1,这是一个张量,再调用tensorflow中张量自带的方法eval()(即代码中的
embed = siamese.o1.eval({siamese.x1: mnist.test.images})
),就是一个可以存储的np类型的数据啦,这样embed就可以直接调用np.ndarray.tofile()方法存入文件,注意numpy官方文档中说的是“text”和“binary”文件,就是文本文件.txt和二进制文件(就是np数组了);存入文件后后面我们可视化的时候就好对这部分进行加载(numpy.ndarray.fromfile()方法):且看visuallize.py 文件:
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import offsetbox
def visualize(embed, x_test, y_test):
# two ways of visualization: scale to fit [0,1] scale
# feat = embed - np.min(embed, 0)
# feat /= np.max(feat, 0)
# two ways of visualization: leave with original scale
feat = embed
ax_min = np.min(embed,0)
ax_max = np.max(embed,0)
ax_dist_sq = np.sum((ax_max-ax_min)**2)
plt.figure()
ax = plt.subplot(111)
colormap = plt.get_cmap('tab10')
shown_images = np.array([[1., 1.]])
for i in range(feat.shape[0]):
dist = np.sum((feat[i] - shown_images)**2, 1)
if np.min(dist) < 3e-4*ax_dist_sq: # don't show points that are too close
continue
shown_images = np.r_[shown_images, [feat[i]]]
patch_to_color = np.expand_dims(x_test[i], -1)
patch_to_color = np.tile(patch_to_color, (1, 1, 3))
patch_to_color = (1-patch_to_color) * (1,1,1) + patch_to_color * colormap(y_test[i]/10.)[:3]
imagebox = offsetbox.AnnotationBbox(
offsetbox.OffsetImage(patch_to_color, zoom=0.5, cmap=plt.cm.gray_r),
xy=feat[i], frameon=False
)
ax.add_artist(imagebox)
plt.axis([ax_min[0], ax_max[0], ax_min[1], ax_max[1]])
# plt.xticks([]), plt.yticks([])
plt.title('Embedding from the last layer of the network')
plt.show()
if __name__ == "__main__":
mnist = input_data.read_data_sets('MNIST_data', one_hot=False)
x_test = mnist.test.images
y_test = mnist.test.labels
x_test = x_test.reshape([-1, 28, 28])
embed = np.fromfile('embed.txt', dtype=np.float32)
embed = embed.reshape([-1, 2])
visualize(embed, x_test, y_test)
注意embed是根据test数据得到的,可视化的时候要做的就是把embed和原来的test数据对上,然后可视化的是原来的数据,但是对于有些距离很近的图片为了可视化效果就不显示了;numpy.r_() :Translates slice objects to concatenation along the first axis.
np.r_用于串接两个数组或矩阵。plt.axis()用来设置图的绘图区间,如ax_min[0]表示很轴的最小值,ax_max[0]表示横轴的最大值;同理,ax_min[1]表示纵轴的最小值,ax_max[1]表示纵轴的最大值。ax 是之前定义过的绘制子图的函数,调用一下add_artist(imagebox)就表示绘图了,,中间过程先不用管。