结构源自ImageNet比赛。
一般的神经网络:卷积层+全连接层;
全卷积网络:没有全连接层。(全连接层需要的参数很多)
特点:
人脸识别:通过观察人脸确定对应的身份,在应用中跟多的是确认(verification)。
通过卷积神经网络生成特殊类型的图片,图片所有pixel需要生成,多目标回归。
以下是一个VGG16的TensorFlow模型。
utils.py
:import skimage
import skimage.io
import skimage.transform
import numpy as np
# synset = [l.strip() for l in open('synset.txt').readlines()]
# returns image of shape [224, 224, 3]
# [height, width, depth]
def load_image(path):
# load image
img = skimage.io.imread(path)
img = img / 255.0
assert (0 <= img).all() and (img <= 1.0).all()
# print "Original Image Shape: ", img.shape
# we crop image from center
short_edge = min(img.shape[:2])
yy = int((img.shape[0] - short_edge) / 2)
xx = int((img.shape[1] - short_edge) / 2)
crop_img = img[yy: yy + short_edge, xx: xx + short_edge]
# resize to 224, 224
resized_img = skimage.transform.resize(crop_img, (224, 224))
return resized_img
# returns the top1 string
def print_prob(prob, file_path):
synset = [l.strip() for l in open(file_path).readlines()]
# print prob
pred = np.argsort(prob)[::-1]
# Get top1 label
top1 = synset[pred[0]]
print(("Top1: ", top1, prob[pred[0]]))
# Get top5 label
top5 = [(synset[pred[i]], prob[pred[i]]) for i in range(5)]
print(("Top5: ", top5))
return top1
def load_image2(path, height=None, width=None):
# load image
img = skimage.io.imread(path)
img = img / 255.0
if height is not None and width is not None:
ny = height
nx = width
elif height is not None:
ny = height
nx = img.shape[1] * ny / img.shape[0]
elif width is not None:
nx = width
ny = img.shape[0] * nx / img.shape[1]
else:
ny = img.shape[0]
nx = img.shape[1]
return skimage.transform.resize(img, (ny, nx))
def test():
img = skimage.io.imread("./test_data/starry_night.jpg")
ny = 300
nx = img.shape[1] * ny / img.shape[0]
img = skimage.transform.resize(img, (ny, nx))
skimage.io.imsave("./test_data/test/output.jpg", img)
if __name__ == "__main__":
test()
vgg16.py
:import inspect
import os
import numpy as np
import tensorflow as tf
import time
VGG_MEAN = [103.939, 116.779, 123.68]
class Vgg16:
def __init__(self, vgg16_npy_path=None):
if vgg16_npy_path is None:
path = inspect.getfile(Vgg16)
path = os.path.abspath(os.path.join(path, os.pardir))
path = os.path.join(path, "vgg16.npy")
vgg16_npy_path = path
print(path)
self.data_dict = np.load(vgg16_npy_path, encoding='latin1').item()
print("npy file loaded")
def build(self, rgb):
"""
load variable from npy to build the VGG
:param rgb: rgb image [batch, height, width, 3] values scaled [0, 1]
"""
start_time = time.time()
print("build model started")
rgb_scaled = rgb * 255.0
# Convert RGB to BGR
red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb_scaled)
assert red.get_shape().as_list()[1:] == [224, 224, 1]
assert green.get_shape().as_list()[1:] == [224, 224, 1]
assert blue.get_shape().as_list()[1:] == [224, 224, 1]
bgr = tf.concat(axis=3, values=[
blue - VGG_MEAN[0],
green - VGG_MEAN[1],
red - VGG_MEAN[2],
])
assert bgr.get_shape().as_list()[1:] == [224, 224, 3]
self.conv1_1 = self.conv_layer(bgr, "conv1_1")
self.conv1_2 = self.conv_layer(self.conv1_1, "conv1_2")
self.pool1 = self.max_pool(self.conv1_2, 'pool1')
self.conv2_1 = self.conv_layer(self.pool1, "conv2_1")
self.conv2_2 = self.conv_layer(self.conv2_1, "conv2_2")
self.pool2 = self.max_pool(self.conv2_2, 'pool2')
self.conv3_1 = self.conv_layer(self.pool2, "conv3_1")
self.conv3_2 = self.conv_layer(self.conv3_1, "conv3_2")
self.conv3_3 = self.conv_layer(self.conv3_2, "conv3_3")
self.pool3 = self.max_pool(self.conv3_3, 'pool3')
self.conv4_1 = self.conv_layer(self.pool3, "conv4_1")
self.conv4_2 = self.conv_layer(self.conv4_1, "conv4_2")
self.conv4_3 = self.conv_layer(self.conv4_2, "conv4_3")
self.pool4 = self.max_pool(self.conv4_3, 'pool4')
self.conv5_1 = self.conv_layer(self.pool4, "conv5_1")
self.conv5_2 = self.conv_layer(self.conv5_1, "conv5_2")
self.conv5_3 = self.conv_layer(self.conv5_2, "conv5_3")
self.pool5 = self.max_pool(self.conv5_3, 'pool5')
self.fc6 = self.fc_layer(self.pool5, "fc6")
assert self.fc6.get_shape().as_list()[1:] == [4096]
self.relu6 = tf.nn.relu(self.fc6)
self.fc7 = self.fc_layer(self.relu6, "fc7")
self.relu7 = tf.nn.relu(self.fc7)
self.fc8 = self.fc_layer(self.relu7, "fc8")
self.prob = tf.nn.softmax(self.fc8, name="prob")
self.data_dict = None
print(("build model finished: %ds" % (time.time() - start_time)))
def avg_pool(self, bottom, name):
return tf.nn.avg_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
def max_pool(self, bottom, name):
return tf.nn.max_pool(bottom, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)
def conv_layer(self, bottom, name):
with tf.variable_scope(name):
filt = self.get_conv_filter(name)
conv = tf.nn.conv2d(bottom, filt, [1, 1, 1, 1], padding='SAME')
conv_biases = self.get_bias(name)
bias = tf.nn.bias_add(conv, conv_biases)
relu = tf.nn.relu(bias)
return relu
def fc_layer(self, bottom, name):
with tf.variable_scope(name):
shape = bottom.get_shape().as_list()
dim = 1
for d in shape[1:]:
dim *= d
x = tf.reshape(bottom, [-1, dim])
weights = self.get_fc_weight(name)
biases = self.get_bias(name)
# Fully connected layer. Note that the '+' operation automatically
# broadcasts the biases.
fc = tf.nn.bias_add(tf.matmul(x, weights), biases)
return fc
def get_conv_filter(self, name):
return tf.constant(self.data_dict[name][0], name="filter")
def get_bias(self, name):
return tf.constant(self.data_dict[name][1], name="biases")
def get_fc_weight(self, name):
return tf.constant(self.data_dict[name][0], name="weights")
vgg16_test.py
:import numpy as np
import tensorflow as tf
# import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import skimage
import vgg16
import utils
img1 = utils.load_image("./test_data/dog.png")
print img1.shape
batch = img1.reshape((1, 224, 224, 3))
#plot the image
# imgshow1=plt.imshow(img1)
# with tf.Session(config=tf.ConfigProto(gpu_options=(tf.GPUOptions(per_process_gpu_memory_fraction=0.7)))) as sess:
with tf.device('/cpu:0'):
with tf.Session() as sess:
images = tf.placeholder("float", [1, 224, 224, 3])
feed_dict = {images: batch}
vgg = vgg16.Vgg16()
with tf.name_scope("content_vgg"):
vgg.build(images)
prob = sess.run(vgg.prob, feed_dict=feed_dict)
top5 = np.argsort(prob[0])[-1:-6:-1]
for n, label in enumerate(top5):
print label
pool1 = sess.run(vgg.pool1, feed_dict=feed_dict)
print pool1.shape
conv3_3=sess.run(vgg.conv3_3, feed_dict=feed_dict)
print conv3_3.shape
#now let's plot the model filters
vgg = vgg16.Vgg16()
#get the saved parameter dict keys
print vgg.data_dict.keys()
#show the first conv layer
filter_conv1=vgg.get_conv_filter("conv1_1")
print 'filter_conv1', filter_conv1.shape
tf.Print(filter_conv1[:,:,:,:5],[filter_conv1[:,:,:,:5]])
filter_conv3=vgg.get_conv_filter("conv3_3")
print 'filter_conv3', filter_conv3.shape
tf.Print(filter_conv3[:,:,:3,:5],[filter_conv3[:,:,:3,:5]])