引言:AlexNet是2012年,Hinton的学生Alex Krizhevsky提出的深度卷积神经网络,也是首次在CNN中成功应用Relu、Dropout和LRN等技巧,并以显著的优势赢得了竞争激烈的ILSVRC 2012比赛
相关阅读:
VGGNet以及Tensorflow实现
GoogleInceptionNet_V3以及Tensorflow实现
ResNet_V2以及Tensorflow实现
模块化方法:
本文将通过模块化加注释的方法,来实现AlexNet,这样可以帮助初学者快速读懂其结果以及实现方法
导入包:
from datetime import datetime
import math
import time
import tensorflow as tf
显示网络结构:
# 显示网络每一层的结构
def print_activations(t):
print(t.op.name, ' ', t.get_shape().as_list())
AlexNet网络结构:
# AlexNet网络结构
def inference(images):
parameters = []
# 下句可以将scope内生成的Variable自动命名为conv1/xxx
with tf.name_scope('conv1') as scope:
# 生成卷积核,用截断的正态分布模型生成,卷积核尺寸为11x11
kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 64], dtype = tf.float32,
stddev = 0.1), name = 'weights')
# wx + b 步长为4
conv = tf.nn.conv2d(images, kernel, [1, 4, 4, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [64], dtype = tf.float32),
trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
# Relu激活
conv1 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv1)
# LRN层,但似乎没什么用
lrn1 = tf.nn.lrn(conv1, 4, bias = 1.0, alpha = 0.001 / 9, beta = 0.75, name = 'lrn1')
# 池化层
pool1 = tf.nn.max_pool(lrn1, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1], padding = 'VALID', name = 'pool1')
print_activations(pool1)
with tf.name_scope('conv2') as scope:
# 卷积核尺寸为5x5
kernel = tf.Variable(tf.truncated_normal([5, 5, 64, 192], dtype = tf.float32,
stddev = 0.1), name = 'weights')
# wx + b 步长为1
conv = tf.nn.conv2d(pool1, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [192], dtype = tf.float32),
trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
# Relu激活
conv2 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv2)
# LRN层
lrn2 = tf.nn.lrn(conv2, 4, bias = 1.0, alpha = 0.001 / 9, beta = 0.75, name = 'lrn2')
pool2 = tf.nn.max_pool(lrn2, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1],
padding = 'VALID', name = 'pool2')
print_activations(pool2)
with tf.name_scope('conv3') as scope:
# 卷积核尺寸为3x3
kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384], dtype = tf.float32, stddev = 0.1), name = 'weights')
# wx + b 步长为 1
conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [384], dtype = tf.float32),
trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
# Relu激活
conv3 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv3)
with tf.name_scope('conv4') as scope:
# 卷积核尺寸为3x3
kernel = tf.Variable(tf.truncated_normal([3, 3, 384, 256], dtype = tf.float32, stddev = 0.1), name = 'weights')
# wx + b 步长为1
conv = tf.nn.conv2d(conv3, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [256], dtype = tf.float32),
trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
# Relu激活
conv4 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv4)
with tf.name_scope('conv5') as scope:
# 卷积核尺寸为3x3
kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 256], dtype = tf.float32, stddev = 0.1), name = 'weights')
# wx + b 步长为 1
conv = tf.nn.conv2d(conv4, kernel, [1, 1, 1, 1], padding = 'SAME')
biases = tf.Variable(tf.constant(0.0, shape = [256], dtype = tf.float32),
trainable = True, name = 'biases')
bias = tf.nn.bias_add(conv, biases)
# Relu 激活
conv5 = tf.nn.relu(bias, name = scope)
parameters += [kernel, biases]
print_activations(conv5)
# 最大池化层
pool5 = tf.nn.max_pool(conv5, ksize = [1, 3, 3, 1], strides = [1, 2, 2, 1],
padding = 'VALID', name = 'pool5')
print_activations(pool5)
return pool5, parameters
耗时:
# 评估AlexNet每轮计算耗时
def time_tensorflow_run(session, target, info_string):
'''
session: 会话
target: 输入
info_string: 用于标识前向传播还是反向传播
'''
num_steps_burn_in = 10 # 预热轮数
total_duration = 0.0 # 总时间
total_duration_squared = 0.0 # 总时间平方
for i in range(num_batches + num_steps_burn_in):
start_time = time.time()
_ = session.run(target)
duration = time.time() - start_time
if not i % 10:
print("%s : step %d, duration = %.3f" % (datetime.now(), i - num_steps_burn_in, duration))
total_duration += duration
total_duration_squared += duration * duration
mn = total_duration / num_batches # 均值
vr = total_duration_squared / num_batches - mn * mn # 方差
sd = math.sqrt(vr) # 标准差
print("%s : %s across %d steps, %.3f +/- %.3f sec / batch" %
(datetime.now(), info_string, num_batches, mn, sd))
测试主函数:
注:CNN的训练都是比较耗时的,所以这里也就是测试一下几张随机生成图的前向和反向传播过程
# 评测主函数
def run_benchmark():
with tf.Graph().as_default(): # 定义默认图
image_size = 224
# 随机生成一张图片
images = tf.Variable(tf.random_normal([batch_size,
image_size,
image_size, 3],
dtype = tf.float32,
stddev = 0.1))
pool5, parameters = inference(images)
init = tf.global_variables_initializer()
sess = tf.Session()
sess.run(init)
# 计算前向传播耗时
time_tensorflow_run(sess, pool5, "Forward")
# 添加L2正则化
objective = tf.nn.l2_loss(pool5)
grad = tf.gradients(objective, parameters)
# 计算后向传播耗时
time_tensorflow_run(sess, grad, "Forward-backward")
Tips:如果对grad = tf.gradients(objective, p)的作用不了解可以看我这篇文章
测试:
batch_size = 32
num_batches = 100
run_benchmark()
运行效果:
conv1 [32, 56, 56, 64]
conv1/pool1 [32, 27, 27, 64]
conv2 [32, 27, 27, 192]
conv2/pool2 [32, 13, 13, 192]
conv3 [32, 13, 13, 384]
conv4 [32, 13, 13, 256]
conv5 [32, 13, 13, 256]
conv5/pool5 [32, 6, 6, 256]
2019-01-21 09:27:36.813713 : step 0, duration = 0.049
2019-01-21 09:27:37.285287 : step 10, duration = 0.047
2019-01-21 09:27:37.752207 : step 20, duration = 0.047
2019-01-21 09:27:38.220039 : step 30, duration = 0.047
2019-01-21 09:27:38.688349 : step 40, duration = 0.047
2019-01-21 09:27:39.156889 : step 50, duration = 0.047
2019-01-21 09:27:39.625929 : step 60, duration = 0.047
2019-01-21 09:27:40.093519 : step 70, duration = 0.047
2019-01-21 09:27:40.560311 : step 80, duration = 0.047
2019-01-21 09:27:41.027834 : step 90, duration = 0.047
2019-01-21 09:27:41.450068 : Forward across 100 steps, 0.047 +/- 0.000 sec / batch
2019-01-21 09:27:42.965738 : step 0, duration = 0.130
2019-01-21 09:27:44.266685 : step 10, duration = 0.130
2019-01-21 09:27:45.567230 : step 20, duration = 0.130
2019-01-21 09:27:46.867077 : step 30, duration = 0.130
2019-01-21 09:27:48.171594 : step 40, duration = 0.130
2019-01-21 09:27:49.475733 : step 50, duration = 0.130
2019-01-21 09:27:50.776351 : step 60, duration = 0.130
2019-01-21 09:27:52.075882 : step 70, duration = 0.130
2019-01-21 09:27:53.374543 : step 80, duration = 0.130
2019-01-21 09:27:54.671893 : step 90, duration = 0.130
2019-01-21 09:27:55.845514 : Forward-backward across 100 steps, 0.130 +/- 0.001 sec / batch
[1] Krizhevsky A, Sutskever I, Hinton G E. Imagenet classification with deep convolutional neural networks[C]//Advances in neural information processing systems. 2012: 1097-1105.
[2] Tensorflo实战.黄文坚,唐源
如果觉得我有地方讲的不好的或者有错误的欢迎给我留言,谢谢大家阅读(点个赞我可是会很开心的哦)~