VGG-Net的结构图,来自论文《VERY DEEP CONVOLUTIONAL NETWORK SFORLARGE-SCALE IMAGE RECOGNITION》,发表于ICLR 2015上,比较起ALEXNET,VGG对图片有更精确的估值以及更省空间。其具体网络结构如下所示:
下载imagenet-vgg-verydeep-19.mat模型数据文件,加载模型文件,进行数据架构解析:
import scipy.io
import numpy as np
import os
import scipy.misc
import matplotlib.pyplot as plt
import tensorflow as tf
import cmd
from cmd import Cmd
cwd = os.getcwd()
print("cwd", cwd)
#模型数据路径
VGG_PATH = cwd + "/model/imagenet-vgg-verydeep-19.mat"
data = scipy.io.loadmat(VGG_PATH)
print("data", data.keys())
输出为:可知data数据结构为字典类型,这里主要关注layers以及normalization类型。
data dict_keys(['__globals__', 'classes', 'layers', 'normalization', '__header__', '__version__'])
由以上网络结构可知,输入图片为224*224*3的彩色图片,观察一下data['normalization']数据结构
print(data['normalization'][0][0][0].shape)
其大小刚好对应图片的均值:
(224, 224, 3)
再观察一下data['layers']数据结构,debug模式有:
其中layers具体结构:可知有43个数组元素:
第一个数组元素为卷积层1
print("conv1", data['layers'][0][0])
array([[1., 1., 1., 1.]]), array(['conv'], dtype='
第二个为relu层:
print("relu", data['layers'][0][1])
relu [[(array(['relu'], dtype='
由以上分析可知,data['layers'][0][i]为某一层的具体信息数组,其中每个元素均为array数组,外面又两层虚括号,去掉,依次遍历每一层,获取VGG网络的网络结构如下:其中倒数第二个array为该层的名称信息
for i in range(0, 43):
print(i+1, data['layers'][0][i][0][0][-2])
最终输出为:
1 ['conv1_1']
2 ['relu']
3 ['conv1_2']
4 ['relu']
5 ['max']
6 ['conv2_1']
7 ['relu']
8 ['conv2_2']
9 ['relu']
10 ['max']
11 ['conv3_1']
12 ['relu']
13 ['conv3_2']
14 ['relu']
15 ['conv3_3']
16 ['relu']
17 ['conv3_4']
18 ['relu']
19 ['max']
20 ['conv4_1']
21 ['relu']
22 ['conv4_2']
23 ['relu']
24 ['conv4_3']
25 ['relu']
26 ['conv4_4']
27 ['relu']
28 ['max']
29 ['conv5_1']
30 ['relu']
31 ['conv5_2']
32 ['relu']
33 ['conv5_3']
34 ['relu']
35 ['conv5_4']
36 ['relu']
37 ['max']
38 ['fc6']
39 ['relu']
40 ['fc7']
41 ['relu']
42 ['fc8']
43 ['softmax']
可知网络结构主要由卷积层,relu层,max池化层,fc全连接层,softmax分类层。
VGG网络参数结构:
获取卷积层1的参数W,b:
print("conv1W", data['layers'][0][0][0][0][0])
print("conv1b", data['layers'][0][0][0][0][1])
利用VGG来观察卷积神经网络在处理图片时,各层对于图片的直观处理效果,代码如下:
利用VGG训练结果,输入一张待处理图片,观察图片在VGG网络中各层是如何变化的
import scipy.io
import numpy as np
import os
import scipy.misc
import matplotlib.pyplot as plt
import tensorflow as tf
import cmd
from cmd import Cmd
from PIL import Image
#卷积层函数定义
def _conv_layer(input, weights, bias):
conv = tf.nn.conv2d(input, tf.constant(weights), strides=(1, 1, 1, 1),
padding='SAME')
return tf.nn.bias_add(conv, bias)
#池化层函数定义
def _pool_layer(input):
return tf.nn.max_pool(input, ksize=(1, 2, 2, 1), strides=(1, 2, 2, 1),
padding='SAME')
#图片数据预处理,均值化处理
def preprocess(image, mean_pixel):
return image - mean_pixel
# #将图片加均值
# def unprocess(image, mean_pixel):
# return image + mean_pixel
#读取某张图片数据
def imread(path):
return scipy.misc.imread(path).astype(np.float)
# #保存图片数据
# def imsave(path, img):
# img = np.clip(img, 0, 255).astype(np.uint8)
# scipy.misc.imsave(path, img)
print ("Functions for VGG ready")
def net(data_path, input_image):
layers = (
'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1',
'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2',
'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3',
'relu3_3', 'conv3_4', 'relu3_4', 'pool3',
'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3',
'relu4_3', 'conv4_4', 'relu4_4', 'pool4',
'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3',
'relu5_3', 'conv5_4', 'relu5_4','pool5',
)
data = scipy.io.loadmat(data_path)
#print("data", data.keys())
#print("layers", data['layers'])
mean = data['normalization'][0][0][0]
mean_pixel = np.mean(mean, axis=(0, 1))
weights = data['layers'][0]
net = {}
current = input_image
for i, name in enumerate(layers):
kind = name[:4]
if kind == 'conv':
kernels, bias = weights[i][0][0][0][0]
# matconvnet: weights are [width, height, in_channels, out_channels]
# tensorflow: weights are [height, width, in_channels, out_channels]
kernels = np.transpose(kernels, (1, 0, 2, 3))
bias = bias.reshape(-1)
current = _conv_layer(current, kernels, bias)
elif kind == 'relu':
current = tf.nn.relu(current)
elif kind == 'pool':
current = _pool_layer(current)
net[name] = current
assert len(net) == len(layers)
return net, mean_pixel, layers
print ("Network for VGG ready")
cwd = os.getcwd()
print("cwd", cwd)
#模型数据路径
VGG_PATH = cwd + "/model/imagenet-vgg-verydeep-19.mat"
#图片路径
IMG_PATH = cwd + "/model/cat.jpg"
#加载待处理图片
input_image = imread(IMG_PATH)
print("input_image", input_image.shape)
#显示原始图片
img = Image.open(IMG_PATH)
plt.figure("origin pic")
plt.imshow(img)
plt.colorbar()
plt.show()
#根据输入图片,定义输入数据的规格
shape = (1, input_image.shape[0], input_image.shape[1], input_image.shape[2])
with tf.Session() as sess:
#定义图片placeholder,用来预选定义图片输入数据规格
image = tf.placeholder('float', shape=shape)
#利用VGG网络来训练图片
nets, mean_pixel, all_layers = net(VGG_PATH, image)
#将图片进行均值化处理
input_image_pre = np.array([preprocess(input_image, mean_pixel)])
print(input_image_pre[0].shape)
#显示去除均值化图片
#im = Image.fromarray(input_image_pre)
plt.figure("preprocess pic")
plt.imshow(input_image_pre[0])
plt.colorbar()
plt.show()
layers = all_layers # For all layers
# layers = ('relu2_1', 'relu3_1', 'relu4_1')
for i, layer in enumerate(layers):
print ("[%d/%d] %s" % (i+1, len(layers), layer))
features = nets[layer].eval(feed_dict={image: input_image_pre})
print (" Type of 'features' is ", type(features))
print (" Shape of 'features' is %s" % (features.shape,))
# Plot response
if 1:
plt.figure(i+1, figsize=(10, 5))
#plt.matshow(features[0, :, :, 0], cmap=plt.cm.gray, fignum=i+1)
plt.matshow(features[0, :, :, 0], fignum=i+1)
plt.title("" + layer)
plt.colorbar()
plt.show()
运行上述程序,依次观察:
原图片以及均值化处理图片
卷积层1以及relu激活函数2
卷积层3以及激活函数4
池化层5
卷积层6以及激活函数7
卷积层8以及激活函数9
池化层10
卷积11,激活函数12,卷积13,激活14
卷积15,激活16,卷积17,激活18
池化19
卷积20 激活21 卷积22 激活23
卷积24 激活25 卷积26 激活27
池化28
卷积29 激活30 卷积31 激活32
卷积33 激活34 卷积35 激活36
池化37
在各层变化过程中,数据规格是如何变化的
[1/37] conv1_1
Type of 'features' is
Shape of 'features' is (1, 500, 500, 64)
[2/37] relu1_1
Type of 'features' is
Shape of 'features' is (1, 500, 500, 64)
[3/37] conv1_2
Type of 'features' is
Shape of 'features' is (1, 500, 500, 64)
[4/37] relu1_2
Type of 'features' is
Shape of 'features' is (1, 500, 500, 64)
[5/37] pool1
Type of 'features' is
Shape of 'features' is (1, 250, 250, 64)
[6/37] conv2_1
Type of 'features' is
Shape of 'features' is (1, 250, 250, 128)
[7/37] relu2_1
Type of 'features' is
Shape of 'features' is (1, 250, 250, 128)
[8/37] conv2_2
Type of 'features' is
Shape of 'features' is (1, 250, 250, 128)
[9/37] relu2_2
Type of 'features' is
Shape of 'features' is (1, 250, 250, 128)
[10/37] pool2
Type of 'features' is
Shape of 'features' is (1, 125, 125, 128)
[11/37] conv3_1
Type of 'features' is
Shape of 'features' is (1, 125, 125, 256)
[12/37] relu3_1
Type of 'features' is
Shape of 'features' is (1, 125, 125, 256)
[13/37] conv3_2
Type of 'features' is
Shape of 'features' is (1, 125, 125, 256)
[14/37] relu3_2
Type of 'features' is
Shape of 'features' is (1, 125, 125, 256)
[15/37] conv3_3
Type of 'features' is
Shape of 'features' is (1, 125, 125, 256)
[16/37] relu3_3
Type of 'features' is
Shape of 'features' is (1, 125, 125, 256)
[17/37] conv3_4
Type of 'features' is
Shape of 'features' is (1, 125, 125, 256)
[18/37] relu3_4
Type of 'features' is
Shape of 'features' is (1, 125, 125, 256)
[19/37] pool3
Type of 'features' is
Shape of 'features' is (1, 63, 63, 256)
[20/37] conv4_1
Type of 'features' is
Shape of 'features' is (1, 63, 63, 512)
[21/37] relu4_1
Type of 'features' is
Shape of 'features' is (1, 63, 63, 512)
[22/37] conv4_2
Type of 'features' is
Shape of 'features' is (1, 63, 63, 512)
[23/37] relu4_2
Type of 'features' is
Shape of 'features' is (1, 63, 63, 512)
[24/37] conv4_3
Type of 'features' is
Shape of 'features' is (1, 63, 63, 512)
[25/37] relu4_3
Type of 'features' is
Shape of 'features' is (1, 63, 63, 512)
[26/37] conv4_4
Type of 'features' is
Shape of 'features' is (1, 63, 63, 512)
[27/37] relu4_4
Type of 'features' is
Shape of 'features' is (1, 63, 63, 512)
[28/37] pool4
Type of 'features' is
Shape of 'features' is (1, 32, 32, 512)
[29/37] conv5_1
Type of 'features' is
Shape of 'features' is (1, 32, 32, 512)
[30/37] relu5_1
Type of 'features' is
Shape of 'features' is (1, 32, 32, 512)
[31/37] conv5_2
Type of 'features' is
Shape of 'features' is (1, 32, 32, 512)
[32/37] relu5_2
Type of 'features' is
Shape of 'features' is (1, 32, 32, 512)
[33/37] conv5_3
Type of 'features' is
Shape of 'features' is (1, 32, 32, 512)
[34/37] relu5_3
Type of 'features' is
Shape of 'features' is (1, 32, 32, 512)
[35/37] conv5_4
Type of 'features' is
Shape of 'features' is (1, 32, 32, 512)
[36/37] relu5_4
Type of 'features' is
Shape of 'features' is (1, 32, 32, 512)
[37/37] pool5
Type of 'features' is
Shape of 'features' is (1, 16, 16, 512)
参考链接:
https://blog.csdn.net/cskywit/article/details/79185792
唐宇迪深度学习