VGG-Nets是由牛津大学计算机视觉组(Visual Geometry Group)和Google Deepmind公司研究员一起研发的深度卷积神经网络,是2014年ImageNet竞赛定位任务的第一名和分类任务的第二名的中的基础网络。VGG可以看成是加深版本的AlexNet. 都是conv layer + FC layer,层数高达十多层,当然以现在的目光看来VGG真的称不上是一个very deep的网络。
闪光点:
与AlexNet相比,可以看出VGG-Nets的卷积核尺寸还是很小的,比如AlexNet第一层的卷积层用到的卷积核尺寸就是11*11,这是一个很大卷积核了。而反观VGG-Nets,用到的卷积核的尺寸无非都是1×1和3×3的小卷积核,可以替代大的filter尺寸。
3×3卷积核的优点:
1*1卷积核的优点:
上个表格是描述的是VGG-Net的网络结构以及诞生过程。VGGNet不像AlexNet那样容易训练,过深的层数在训练的过程中容易导致不收敛,从而模型爆炸,为了解决初始化(权重初始化)等问题,VGG采用的是一种Pre-training的方式,这种方式在经典的神经网络中经常见得到,就是先训练一部分小网络,然后再确保这部分网络稳定之后,再在这基础上逐渐加深。上表从左到右体现的就是这个过程,并且当网络处于D阶段的时候,效果是最优的,因此D阶段的网络也就是VGG-16了!E阶段得到的网络就是VGG-19了!VGG-16的16指的是conv+fc的总层数是16,不包括max pool的层数。
VGG-16的网络结构:
由上图看出,VGG-16的结构非常整洁,深度较AlexNet深得多,里面包含多个conv->conv->max_pool这类的结构,VGG的卷积层都是same的卷积,即卷积过后的输出图像的尺寸与输入是一致的,它的下采样完全是由max pooling来实现。
VGG网络后接3个全连接层,filter的个数(卷积后的输出通道数)从64开始,然后没接一个pooling后其成倍的增加,128、512,VGG的注意贡献是使用小尺寸的filter,及有规则的卷积-池化操作。
代码:
# coding:utf8
########################################################################################
# Davi Frossard, 2016 #
# VGG16 implementation in TensorFlow #
# Details: #
# http://www.cs.toronto.edu/~frossard/post/vgg16/ #
# #
# Model from https://gist.github.com/ksimonyan/211839e770f7b538e2d8#file-readme-md #
# Weights from Caffe converted using https://github.com/ethereon/caffe-tensorflow
# update: 2017-7-30 delphifan
########################################################################################
import tensorflow as tf
import numpy as np
from scipy.misc import imread, imresize
from imagenet_classes import class_names
class vgg16:
def __init__(self, imgs, weights=None, sess=None):
self.imgs = imgs
self.convlayers()
self.fc_layers()
self.probs = tf.nn.softmax(self.fc3l) #计算softmax层输出
if weights is not None and sess is not None: #载入pre-training的权重
self.load_weights(weights, sess)
def convlayers(self):
self.parameters = []
# zero-mean input
# 去RGB均值操作(这里RGB均值为原数据集的均值)
with tf.name_scope('preprocess') as scope:
mean = tf.constant([123.68, 116.779, 103.939],
dtype=tf.float32, shape=[1, 1, 1, 3], name='img_mean')
images = self.imgs-mean
# conv1_1
with tf.name_scope('conv1_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 3, 64], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv1_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv1_2
with tf.name_scope('conv1_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 64], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv1_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[64], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv1_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool1
self.pool1 = tf.nn.max_pool(self.conv1_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool1')
# conv2_1
with tf.name_scope('conv2_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 64, 128], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv2_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv2_2
with tf.name_scope('conv2_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 128], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv2_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[128], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv2_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool2
self.pool2 = tf.nn.max_pool(self.conv2_2,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool2')
# conv3_1
with tf.name_scope('conv3_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 128, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv3_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv3_2
with tf.name_scope('conv3_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv3_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv3_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv3_3
with tf.name_scope('conv3_3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv3_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv3_3 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool3
self.pool3 = tf.nn.max_pool(self.conv3_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool3')
# conv4_1
with tf.name_scope('conv4_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool3, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv4_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv4_2
with tf.name_scope('conv4_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv4_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv4_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv4_3
with tf.name_scope('conv4_3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv4_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv4_3 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool4
self.pool4 = tf.nn.max_pool(self.conv4_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
# conv5_1
with tf.name_scope('conv5_1') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.pool4, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv5_1 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv5_2
with tf.name_scope('conv5_2') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv5_1, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv5_2 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# conv5_3
with tf.name_scope('conv5_3') as scope:
kernel = tf.Variable(tf.truncated_normal([3, 3, 512, 512], dtype=tf.float32,
stddev=1e-1), name='weights')
conv = tf.nn.conv2d(self.conv5_2, kernel, [1, 1, 1, 1], padding='SAME')
biases = tf.Variable(tf.constant(0.0, shape=[512], dtype=tf.float32),
trainable=True, name='biases')
out = tf.nn.bias_add(conv, biases)
self.conv5_3 = tf.nn.relu(out, name=scope)
self.parameters += [kernel, biases]
# pool5
self.pool5 = tf.nn.max_pool(self.conv5_3,
ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1],
padding='SAME',
name='pool4')
def fc_layers(self):
# fc1
with tf.name_scope('fc1') as scope:
# 取出shape中第一个元素后的元素 例如x=[1,2,3] -->x[1:]=[2,3]
# np.prod是计算数组的元素乘积 x=[2,3] np.prod(x) = 2*3 = 6
# 这里代码可以使用 shape = self.pool5.get_shape()
#shape = shape[1].value * shape[2].value * shape[3].value 代替
shape = int(np.prod(self.pool5.get_shape()[1:]))
fc1w = tf.Variable(tf.truncated_normal([shape, 4096],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc1b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
trainable=True, name='biases')
pool5_flat = tf.reshape(self.pool5, [-1, shape])
fc1l = tf.nn.bias_add(tf.matmul(pool5_flat, fc1w), fc1b)
self.fc1 = tf.nn.relu(fc1l)
self.parameters += [fc1w, fc1b]
# fc2
with tf.name_scope('fc2') as scope:
fc2w = tf.Variable(tf.truncated_normal([4096, 4096],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc2b = tf.Variable(tf.constant(1.0, shape=[4096], dtype=tf.float32),
trainable=True, name='biases')
fc2l = tf.nn.bias_add(tf.matmul(self.fc1, fc2w), fc2b)
self.fc2 = tf.nn.relu(fc2l)
self.parameters += [fc2w, fc2b]
# fc3
with tf.name_scope('fc3') as scope:
fc3w = tf.Variable(tf.truncated_normal([4096, 1000],
dtype=tf.float32,
stddev=1e-1), name='weights')
fc3b = tf.Variable(tf.constant(1.0, shape=[1000], dtype=tf.float32),
trainable=True, name='biases')
self.fc3l = tf.nn.bias_add(tf.matmul(self.fc2, fc3w), fc3b)
self.parameters += [fc3w, fc3b]
def load_weights(self, weight_file, sess):
weights = np.load(weight_file)
keys = sorted(weights.keys())
for i, k in enumerate(keys):
print i, k, np.shape(weights[k])
sess.run(self.parameters[i].assign(weights[k]))
if __name__ == '__main__':
sess = tf.Session()
imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
vgg = vgg16(imgs, 'vgg16_weights.npz', sess) # 载入预训练好的模型权重
img1 = imread('images.jpg', mode='RGB') #载入需要判别的图片
img1 = imresize(img1, (224, 224))
img2 = imread('dog.jpg', mode='RGB')
img2 = imresize(img2, (224, 224))
img3 = imread('laska.png', mode='RGB')
img3 = imresize(img3, (224, 224))
#计算VGG16的softmax层输出(返回是列表,每个元素代表一个判别类型的数组)
prob = sess.run(vgg.probs, feed_dict={vgg.imgs: [img1, img2, img3]})
for pro in prob:
# 源代码使用(np.argsort(prob)[::-1])[0:5]
# np.argsort(x)返回的数组值从小到大的索引值
#argsort(-x)从大到小排序返回索引值 [::-1]是使用切片将数组从大到小排序
#preds = (np.argsort(prob)[::-1])[0:5]
preds = (np.argsort(-pro))[0:5] #取出top5的索引
for p in preds:
print class_names[p], pro[p]
print '\n'
参考资源:
https://blog.csdn.net/u011974639/article/details/76146822
https://www.cnblogs.com/skyfsm/p/8451834.html