本文是深度学习实战系列文章,主要是利用官网VGG 19层网络训练得到模型产生的weight和bias数值,对输入的任意一张图像进行前向训练,从而得到特征图。
一. 代码
# coding: utf-8
import scipy.io
import numpy as np
import os
import scipy.misc
import matplotlib.pyplot as plt
import tensorflow as tf
def _conv_layer(input, weights, bias):
conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
return tf.nn.bias_add(conv, bias)
def _pool_layer(input):
return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
def preprocess(image, mean_pixel):
return image - mean_pixel
def unprocess(image, mean_pixel):
return image + mean_pixel
def imread(path):
return scipy.misc.imread(path).astype(np.float)
def imsave(path, img):
img = np.clip(img, 0, 255).astype(np.uint8)
scipy.misc.imsave(path, img)
print ("Functions for VGG ready")
def net(data_path, input_image):
layers = (
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
'relu5_3', 'conv5_4', 'relu5_4'
data = scipy.io.loadmat(data_path)
mean = data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0, 1))
weights = data['layers'][0]
net = {}
current = input_image
for i, name in enumerate(layers):
kind = name[:4]
if kind == 'conv':
kernels, bias = weights[i][0][0][0][0]
# matconvnet: weights are [width, height, in_channels, out_channels]
# tensorflow: weights are [height, width, in_channels, out_channels]
kernels = np.transpose(kernels, (1, 0, 2, 3))
bias = bias.reshape(-1)
current = _conv_layer(current, kernels, bias)
elif kind == 'relu':
current = tf.nn.relu(current)
elif kind == 'pool':
current = _pool_layer(current)
net[name] = current
assert len(net) == len(layers)
return net, mean_pixel, layers
print ("Network for VGG ready")
cwd = os.getcwd()
VGG_PATH = cwd + "/data/imagenet-vgg-verydeep-19.mat"
IMG_PATH = cwd + "/data/cat.jpg"
input_image = imread(IMG_PATH)
shape = (1,input_image.shape[0],input_image.shape[1],input_image.shape[2])
with tf.Session() as sess:
image = tf.placeholder('float', shape=shape)
nets, mean_pixel, all_layers = net(VGG_PATH, image)
input_image_pre = np.array([preprocess(input_image, mean_pixel)])
layers = all_layers # For all layers
# layers = ('relu2_1', 'relu3_1', 'relu4_1')
for i, layer in enumerate(layers):
print ("[%d/%d] %s" % (i+1, len(layers), layer))
features = nets[layer].eval(feed_dict={image: input_image_pre})
print (" Type of 'features' is ", type(features))
print (" Shape of 'features' is %s" % (features.shape,))
# Plot response
if 1:
plt.figure(i+1, figsize=(10, 5))
plt.matshow(features[0, :, :, 0], cmap=plt.cm.gray, fignum=i+1)
plt.title("" + layer)
二. VGG19网络结构
0 is conv1_1 (3, 3, 3, 64)
1 is relu activation function
2 is conv1_2 (3, 3, 64, 64)
3 is relu
4 is maxpool
5 is conv2_1 (3, 3, 64, 128)
6 is relu
7 is conv2_2 (3, 3, 128, 128)
8 is relu
9 is maxpool
10 is conv3_1 (3, 3, 128, 256)
11 is relu
12 is conv3_2 (3, 3, 256, 256)
13 is relu
14 is conv3_3 (3, 3, 256, 256)
15 is relu
16 is conv3_4 (3, 3, 256, 256)
17 is relu
18 is maxpool
19 is conv4_1 (3, 3, 256, 512)
20 is relu
21 is conv4_2 (3, 3, 512, 512)
22 is relu
23 is conv4_3 (3, 3, 512, 512)
24 is relu
25 is conv4_4 (3, 3, 512, 512)
26 is relu
27 is maxpool
28 is conv5_1 (3, 3, 512, 512)
29 is relu
30 is conv5_2 (3, 3, 512, 512)
31 is relu
32 is conv5_3 (3, 3, 512, 512)
33 is relu
34 is conv5_4 (3, 3, 512, 512)
35 is relu
36 is maxpool
37 is fullyconnected (7, 7, 512, 4096)
38 is relu
39 is fullyconnected (1, 1, 4096, 4096)
40 is relu
41 is fullyconnected (1, 1, 4096, 1000)
42 is softmax
三. 代码解析VGG_PATH = cwd + "/data/imagenet-vgg-verydeep-19.mat"
IMG_PATH = cwd + "/data/cat.jpg"
这个 cwd 指令就是获取得到当前代码文件所在的路径;