图片数据输入尺寸: 224 × {\times} × 224 × {\times} × 3
第一层卷积层: 96个 11 × 11 × 3 {11 \times 11 \times 3} 11×11×3 的卷积核
第二层卷积层: 256个 5 × 5 × 48 {5 \times 5 \times 48} 5×5×48 的卷积核
第三层卷积层: 384个 3 × 3 × 256 {3 \times 3 \times 256} 3×3×256 的卷积核
第四层卷积层: 384个 3 × 3 × 192 {3 \times 3 \times 192} 3×3×192 的卷积核
第五层卷积层: 256个 3 × 3 × 192 {3 \times 3 \times 192} 3×3×192 的卷积核
第一层和第二层全连接层: 4096个神经元
输出层: 100个类别输出
AlexNet中卷积层和全连接层的神经元均采用ReLU函数作为激活函数,主要原因是ReLU函数在进行梯度下降的计算过程中能显著加快训练过程,也就是非饱和的线性的激活函数 m a x {max} max要快于 s i g m o i d {sigmoid} sigmoid和 t a n h {tanh} tanh等饱和的非线性激活函数的收敛速度。
神经网络因为结构复杂,很容易陷入过拟合的状态,因此过拟合措施很重要 。AlexNet分别从数据预处理和创新型手段dropout两方面来抑制过拟合:
# -*- coding: utf-8 -*-
AlexNet 实现代码
import tensorflow as tf
import numpy as np
def maxPoolLayer(x, kHeight, kWidth, strideX, strideY, name, padding = "SAME"):
return tf.nn.max_pool(x, ksize = [1, kHeight, kWidth, 1], # ksize代表池化的尺寸
strides = [1, strideX, strideY, 1], # 代表步长
padding = padding, name = name) # padding 为SAME代表会自动补全
# 定义dropout
def dropout(x, keepPro, name = None):
return tf.nn.dropout(x, keepPro, name)
# 定义LRN
def LRN(x, R, alpha, beta, name = None, bias = 1.0):
return tf.nn.local_response_normalization(x, depth_radius = R, alpha = alpha,
beta = beta, bias = bias, name = name)
# 定义全连接层
def fullConnectLayer(x, inputD, outputD, reluFlag, name):
with tf.variable_scope(name) as scope:
w = tf.get_variable("w", shape = [inputD, outputD], dtype = "float")
b = tf.get_variable("b", [outputD], dtype = "float")
out = tf.nn.xw_plus_b(x, w, b, name = scope.name)
if reluFlag:
return tf.nn.relu(out)
return out
def convLayer(x, kHeight, kWidth, strideX, strideY,
featureNum, name, padding = "SAME", groups = 1):
channel = int(x.get_shape()[-1])
conv = lambda a, b: tf.nn.conv2d(a, b, strides = [1, strideY, strideX, 1], padding = padding)
with tf.variable_scope(name) as scope:
w = tf.get_variable("w", shape = [kHeight, kWidth, channel/groups, featureNum])
b = tf.get_variable("b", shape = [featureNum])
xNew = tf.split(value = x, num_or_size_splits = groups, axis = 3) # tf.split是将张量进行裁剪的函数:将第三维度的切分为groups大小的张量
wNew = tf.split(value = w, num_or_size_splits = groups, axis = 3)
featureMap = [conv(t1, t2) for t1, t2 in zip(xNew, wNew)]
mergeFeatureMap = tf.concat(axis = 3, values = featureMap)
out = tf.nn.bias_add(mergeFeatureMap, b)
return tf.nn.relu(tf.reshape(out, mergeFeatureMap.get_shape().as_list()), name = scope.name)
class AlexNet(object):
def __init__(self, x, keepPro, classNum, skip, modelPath = "/home/Ada/"):
self.X = x
self.KEEPPRO = keepPro
self.CLASSnum = classNum
self.SKIP = skip
self.MODELPATH = modelPath
def buildCNN(self):
conv1 = convLayer(self.X, 11, 11, 4, 4, 96, "conv1", "VALID" )
lrn1 = LRN(conv1, 2, 2e-05, 0.75, "norm1")
pool1 = maxPoolLayer(lrn1, 3, 3, 2, 2, "pool1", "VALID")
conv2 = convLayer(pool1, 5, 5, 1, 1, 256, "conv2", groups = 2)
lrn2 = LRN(conv2, 2, 2e-05, 0.75, "lrn2")
pool2 = maxPoolLayer(lrn2, 3, 3, 2, 2, "pool2", "VALID")
conv3 = convLayer(pool2, 3, 3, 1, 1, 384, "conv3")
conv4 = convLayer(conv3, 3, 3, 1, 1, 384, "conv4", groups = 2)
conv5 = convLayer(conv4, 3, 3, 1, 1, 256, "conv5", groups = 2)
pool5 = maxPoolLayer(conv5, 3, 3, 2, 2, "pool5", "VALID")
fcIn = tf.reshape(pool5, [-1, 256 * 6 * 6])
fc1 = fullConnectLayer(fcIn, 256 * 6 * 6, 4096, True, "fc6")
dropout1 = dropout(fc1, self.KEEPPRO)
fc2 = fullConnectLayer(dropout1, 4096, 4096, True, "fc7")
dropout2 = dropout(fc2, self.KEEPPRO)
self.fc3 = fullConnectLayer(dropout2, 4096, self.CLASSNUM, True, "fc8")
def loadModel(self, sess):
wDict = np.load(self.MODELPATH, encoding = "bytes").item() # 字典格式
#for layers in model
for name in wDict:
if name not in self.SKIP:
with tf.variable_scope(name, reuse = True):
for p in wDict[name]:
if len(p.shape) == 1:
sess.run(tf.get_variable('b', trainable = False).assign(p))
sess.run(tf.get_variable('w', trainable = False).assign(p))