VGGnet网络结构详解

 VGG网络结构:

VGGnet网络结构详解_第1张图片

下面算一下每一层的像素值计算:
输入:224*224*3
1. conv3 - 64(卷积核的数量):kernel size:3 stride:1 pad:1
像素:(224-3+2*1)/1+1=224 224*224*64
参数: (3*3*3)*64 =1728
2. conv3 - 64:kernel size:3 stride:1 pad:1
像素: (224-3+1*2)/1+1=224 224*224*64
参数: (3*3*64)*64 =36864
3. pool2 kernel size:2 stride:2 pad:0
像素: (224-2)/2 = 112 112*112*64
参数: 0
4.conv3-128:kernel size:3 stride:1 pad:1
像素: (112-3+2*1)/1+1 = 112 112*112*128
参数: (3*3*64)*128 =73728
5.conv3-128:kernel size:3 stride:1 pad:1
像素: (112-3+2*1)/1+1 = 112 112*112*128
参数: (3*3*128)*128 =147456
6.pool2: kernel size:2 stride:2 pad:0
像素: (112-2)/2+1=56 56*56*128
参数:0
7.conv3-256: kernel size:3 stride:1 pad:1
像素: (56-3+2*1)/1+1=56 56*56*256
参数:(3*3*128)*256=294912
8.conv3-256: kernel size:3 stride:1 pad:1
像素: (56-3+2*1)/1+1=56 56*56*256
参数:(3*3*256)*256=589824
9.conv3-256: kernel size:3 stride:1 pad:1
像素: (56-3+2*1)/1+1=56 56*56*256
参数:(3*3*256)*256=589824
10.pool2: kernel size:2 stride:2 pad:0
像素:(56 - 2)/2+1=28 28*28*256
参数:0
11. conv3-512:kernel size:3 stride:1 pad:1
像素:(28-3+2*1)/1+1=28 28*28*512
参数:(3*3*256)*512 = 1179648
12. conv3-512:kernel size:3 stride:1 pad:1
像素:(28-3+2*1)/1+1=28 28*28*512
参数:(3*3*512)*512 = 2359296
13. conv3-512:kernel size:3 stride:1 pad:1
像素:(28-3+2*1)/1+1=28 28*28*512
参数:(3*3*512)*512 = 2359296
14.pool2: kernel size:2 stride:2 pad:0
像素:(28-2)/2+1=14 14*14*512
参数: 0
15. conv3-512:kernel size:3 stride:1 pad:1
像素:(14-3+2*1)/1+1=14 14*14*512
参数:(3*3*512)*512 = 2359296
16. conv3-512:kernel size:3 stride:1 pad:1
像素:(14-3+2*1)/1+1=14 14*14*512
参数:(3*3*512)*512 = 2359296
17. conv3-512:kernel size:3 stride:1 pad:1
像素:(14-3+2*1)/1+1=14 14*14*512
参数:(3*3*512)*512 = 2359296
18.pool2:kernel size:2 stride:2 pad:0
像素:(14-2)/2+1=7 7*7*512
参数:0
19.FC: 4096 neurons
像素:1*1*4096
参数:7*7*512*4096 = 102760448
20.FC: 4096 neurons
像素:1*1*4096
参数:4096*4096 = 16777216
21.FC:1000 neurons
像素:1*1*1000
参数:4096*1000=4096000
总共参数数量大约138M左右。

本文主要工作计算了一下VGG网络各层的输出像素以及所需参数,作为一个理解CNN的练习,VGG网络的特点是利用小的尺寸核代替大的卷积核,然后把网络做深,举个例子,VGG把alexnet最开始的一个7*7的卷积核用3个3*3的卷积核代替,其感受野是一样。关于感受野的计算可以参照另一篇博文。
AlexNet最开始的7*7的卷积核的感受野是:7*7
VGG第一个卷积核的感受野:3*3
第二个卷积核的感受野:(3-1)*1+3=5
第三个卷积核的感受野:(5-1)*1+3=7
可见三个3*3卷积核和一个7*7卷积核的感受野是一样的,但是3*3卷积核可以把网络做的更深。VGGNet不好的一点是它耗费更多计算资源,并且使用了更多的参数,导致更多的内存占用。
代码参考:《Tensorflow实践》——黄文坚

from  datetime import datetime
import tensorflow as tf
import math
import time

batch_size = 16
num_batches = 100
# 用来创建卷积层并把本层的参数存入参数列表
# input_op:输入的tensor name:该层的名称 kh:卷积层的高 kw:卷积层的宽 n_out:输出通道数,dh:步长的高 dw:步长的宽,p是参数列表
def conv_op(input_op,name,kh,kw,n_out,dh,dw,p):
    n_in = input_op.get_shape()[-1].value
    with tf.name_scope(name) as scope:
        kernel = tf.get_variable(scope + "w",shape=[kh,kw,n_in,n_out],dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer_conv2d())
        conv = tf.nn.conv2d(input_op, kernel, (1,dh,dw,1),padding='SAME')
        bias_init_val = tf.constant(0.0, shape=[n_out],dtype=tf.float32)
        biases = tf.Variable(bias_init_val , trainable=True , name='b')
        z = tf.nn.bias_add(conv,biases)
        activation = tf.nn.relu(z,name=scope)
        p += [kernel,biases]
        return activation
# 定义全连接层
def fc_op(input_op,name,n_out,p):
    n_in = input_op.get_shape()[-1].value
    with tf.name_scope(name) as scope:
        kernel = tf.get_variable(scope+'w',shape=[n_in,n_out],dtype=tf.float32,initializer=tf.contrib.layers.xavier_initializer_conv2d()) 
        biases = tf.Variable(tf.constant(0.1,shape=[n_out],dtype=tf.float32),name='b') 
        # tf.nn.relu_layer()用来对输入变量input_op与kernel做乘法并且加上偏置b 
        activation = tf.nn.relu_layer(input_op,kernel,biases,name=scope) 
        p += [kernel,biases] 
        return activation

# 定义最大池化层
def mpool_op(input_op,name,kh,kw,dh,dw):
    return tf.nn.max_pool(input_op,ksize=[1,kh,kw,1],strides=[1,dh,dw,1],padding='SAME',name=name)

#定义网络结构
def inference_op(input_op,keep_prob): 
    p = [] 
    conv1_1 = conv_op(input_op,name='conv1_1',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p) 
    conv1_2 = conv_op(conv1_1,name='conv1_2',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p) 
    pool1 = mpool_op(conv1_2,name='pool1',kh=2,kw=2,dw=2,dh=2) 
    conv2_1 = conv_op(pool1,name='conv2_1',kh=3,kw=3,n_out=128,dh=1,dw=1,p=p) 
    conv2_2 = conv_op(conv2_1,name='conv2_2',kh=3,kw=3,n_out=128,dh=1,dw=1,p=p) 
    pool2 = mpool_op(conv2_2, name='pool2', kh=2, kw=2, dw=2, dh=2) 
    conv3_1 = conv_op(pool2, name='conv3_1', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p) 
    conv3_2 = conv_op(conv3_1, name='conv3_2', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p) 
    conv3_3 = conv_op(conv3_2, name='conv3_3', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p) 
    pool3 = mpool_op(conv3_3, name='pool3', kh=2, kw=2, dw=2, dh=2) 
    conv4_1 = conv_op(pool3, name='conv4_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) 
    conv4_2 = conv_op(conv4_1, name='conv4_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) 
    conv4_3 = conv_op(conv4_2, name='conv4_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) 
    pool4 = mpool_op(conv4_3, name='pool4', kh=2, kw=2, dw=2, dh=2) 
    conv5_1 = conv_op(pool4, name='conv5_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) 
    conv5_2 = conv_op(conv5_1, name='conv5_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) 
    conv5_3 = conv_op(conv5_2, name='conv5_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) 
    pool5 = mpool_op(conv5_3, name='pool5', kh=2, kw=2, dw=2, dh=2)
    shp = pool5.get_shape() 
    flattened_shape = shp[1].value * shp[2].value * shp[3].value 
    resh1 = tf.reshape(pool5,[-1,flattened_shape],name="resh1") 
    fc6 = fc_op(resh1,name="fc6",n_out=4096,p=p) 
    fc6_drop = tf.nn.dropout(fc6,keep_prob,name='fc6_drop') 
    fc7 = fc_op(fc6_drop,name="fc7",n_out=4096,p=p) 
    fc7_drop = tf.nn.dropout(fc7,keep_prob,name="fc7_drop") 
    fc8 = fc_op(fc7_drop,name="fc8",n_out=1000,p=p) 
    softmax = tf.nn.softmax(fc8) 
    predictions = tf.argmax(softmax,1) 
    return predictions,softmax,fc8,p

def time_tensorflow_run(session,target,feed,info_string): 
    num_steps_burn_in = 10 
    # 预热轮数 
    total_duration = 0.0 # 总时间 
    total_duration_squared = 0.0 # 总时间的平方和用以计算方差 
    for i in range(num_batches + num_steps_burn_in):
        start_time = time.time() 
        _ = session.run(target,feed_dict=feed) 
        duration = time.time() - start_time 
        if i >= num_steps_burn_in: # 只考虑预热轮数之后的时间
            if not i % 10:
                print('%s:step %d,duration = %.3f' % (datetime.now(), i - num_steps_burn_in, duration)) 
                total_duration += duration 
                total_duration_squared += duration * duration
                
    mn = total_duration / num_batches # 平均每个batch的时间 
    vr = total_duration_squared / num_batches - mn * mn # 方差 
    sd = math.sqrt(vr) # 标准差 
    print('%s: %s across %d steps, %.3f +/- %.3f sec/batch' % (datetime.now(), info_string, num_batches, mn, sd)) 

def run_benchmark():
    with tf.Graph().as_default():
        image_size = 224 # 输入图像尺寸 
        images = tf.Variable(tf.random_normal([batch_size, image_size, image_size, 3], dtype=tf.float32, stddev=1e-1)) 
        keep_prob = tf.placeholder(tf.float32) 
        prediction,softmax,fc8,p = inference_op(images,keep_prob) 
        init = tf.global_variables_initializer() 
        sess = tf.Session() 
        sess.run(init) 
        time_tensorflow_run(sess, prediction,{keep_prob:1.0}, "Forward") 
        # 用以模拟训练的过程 
        objective = tf.nn.l2_loss(fc8) # 给一个loss 
        grad = tf.gradients(objective, p) # 相对于loss的 所有模型参数的梯度 
        time_tensorflow_run(sess, grad, {keep_prob:0.5},"Forward-backward")
        
run_benchmark()

这个代码只是用来模拟训练过程然后评估每轮的计算时间的,结果如下:

2018-11-27 22:05:00.358924:step 0,duration = 0.320
2018-11-27 22:05:03.578625:step 10,duration = 0.320
2018-11-27 22:05:06.814303:step 20,duration = 0.340
2018-11-27 22:05:10.030007:step 30,duration = 0.324
2018-11-27 22:05:13.241719:step 40,duration = 0.320
2018-11-27 22:05:16.457424:step 50,duration = 0.320
2018-11-27 22:05:19.673130:step 60,duration = 0.324
2018-11-27 22:05:22.892830:step 70,duration = 0.320
2018-11-27 22:05:26.156472:step 80,duration = 0.328
2018-11-27 22:05:29.380166:step 90,duration = 0.324
2018-11-27 22:05:32.276305: Forward across 100 steps, 0.032 +/- 0.097 sec/batch
2018-11-27 22:05:51.071198:step 0,duration = 1.103
2018-11-27 22:06:01.852797:step 10,duration = 1.083
2018-11-27 22:06:12.606436:step 20,duration = 1.071
2018-11-27 22:06:23.388036:step 30,duration = 1.087
2018-11-27 22:06:34.165644:step 40,duration = 1.083
2018-11-27 22:06:44.923280:step 50,duration = 1.075
2018-11-27 22:06:55.764797:step 60,duration = 1.067
2018-11-27 22:07:06.526424:step 70,duration = 1.067
2018-11-27 22:07:17.471807:step 80,duration = 1.083
2018-11-27 22:07:28.273381:step 90,duration = 1.075
2018-11-27 22:07:37.960444: Forward-backward across 100 steps, 0.108 +/- 0.324 sec/batch

选用3*3卷积核的优点:

 

  1. 使网络结构更深,学习到的特征更多,结果更具有判别性(discriminative)
  2. 三层3*3的卷积核比一层7*7的卷积核所需要的参数更少(假设卷积前后的通道数均为C)

 参数和内存占用分析(来源[斯坦福大学CS231课程]课件截图):

VGGnet网络结构详解_第2张图片

 

由分析可以看出:

  • 前面部分的卷积层占用大量内存
  • 后面的三层全连接层占用了大量的参数

 

你可能感兴趣的:(深度学习)