GoogleNet采用9个Inception模块化的结构,共22层,除了最后一层的输出,其中间结点的分类效果也很好。还使用了辅助类结点(auxiliary classifiers),将中间某一层的输出用作分类,并按一个较小的权重加到最终分类结果中。这样相当于做了模型融合,同时给网络增加了反向传播的梯度信号,也提供了额外的正则化,对于整个网络的训练大有益处。下图是网络的结构图:
from tensorflow.keras import datasets,models,metrics,optimizers,losses,layers,utils
import tensorflow as tf
from tensorflow.keras.datasets import mnist,cifar10,cifar100
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Conv2D,MaxPooling2D,Dropout,BatchNormalization,ReLU,Activation,GlobalAveragePooling2D
import matplotlib.pyplot as plt
import os
# 卷积初始化 卷积3*3 步长1 0补边 批量归一化 relu
class ConvBNRelu(models.Model):
def __init__(self,ch,kernel_size=3,strides=1,padding='SAME'):
super(ConvBNRelu,self).__init__()
self.model=Sequential([
Conv2D(ch,kernel_size=kernel_size,strides=strides,padding=padding),
BatchNormalization(),
Activation('relu')
])
def call(self, inputs, training=None, mask=None):
x=self.model(inputs,training=training)
return x
class Inceptionblk(models.Model):
def __init__(self,ch,strides):
super(Inceptionblk,self).__init__()
self.ch=ch
self.strides=strides
self.conv1=ConvBNRelu(ch,kernel_size=1)
self.conv2 = ConvBNRelu(ch, kernel_size=3, strides=strides)
self.conv3 = ConvBNRelu(ch, kernel_size=5, strides=strides)
self.maxpool=MaxPooling2D(3,1,padding='same')
self.maxpool_conv=ConvBNRelu(1,kernel_size=1)
def call(self, inputs, training=None, mask=None):
x1=self.conv1(inputs,training=training)
x2=self.conv2(x1,training=training)
x3=self.conv3(x1,training=training)
x4=self.maxpool(inputs,training=training)
x5=self.maxpool_conv(x4,training=training)
x=tf.concat([x1,x2,x3,x5],axis=3)
class Inception(models.Model):
def __init__(self,num_layers,n_classes=10,init_ch=16, **kwargs):
super(Inception, self).__init__()
self.in_channels=init_ch#16
self.out_channels=init_ch#16
self.init_ch=init_ch#16
self.num_layers=num_layers#2
self.conv1 = ConvBNRelu(init_ch)
# 动态模块
self.blocks = Sequential(name='dynamic-blocks')
for block_id in range(num_layers):
for layer_id in range(2):
if layer_id==0:
# 步长越大尺寸越小
block = Inceptionblk(self.out_channels, strides=2)
else:
block = Inceptionblk(self.out_channels, strides=1)
self.out_channels *= 2
# 全局平均池化
self.avg_pool = GlobalAveragePooling2D()
self.fc = Dense(n_classes)
def call(self, inputs, training=None, mask=None):
x=self.conv1(inputs,training=training)
out=self.blocks(x,training=training)
avgpool=self.avg_pool(out,training=training)
out=self.fc(avgpool,training=training)
return out
if __name__ == '__main__':
(train_x, train_y), (test_x, test_y) = mnist.load_data()
train_x = train_x.astype('float32')
test_x = test_x.astype('float32')
train_x = train_x / 255
test_x = test_x / 255
train_x, test_x = tf.expand_dims(train_x, axis=3), tf.expand_dims(test_x, axis=3)
print(train_x.shape, test_x.shape)
# y独热
train_y = to_categorical(train_y, 10)
test_y = to_categorical(test_y, 10)
print(train_y.shape, test_y.shape)
# db_train = tf.data.Dataset.from_tensor_slices((train_x, train_y)).batch(256)
# db_test = tf.data.Dataset.from_tensor_slices((test_x, test_y)).batch(256)
model = Inception(3, 10)
# derive input shape for every layers.
model.build(input_shape=(None, 28, 28, 1))
model.summary()
model.compile(optimizer=optimizers.Adam(learning_rate=0.01),loss=losses.categorical_crossentropy,metrics=['accuracy'])
history=model.fit(train_x,train_y,epochs=5,validation_data=(test_x,test_y),batch_size=100)
score=model.evaluate(test_x,test_y)
print('loss',score[0])
print('accuracy',score[1])
train_loss = history.history['loss']
test_loss = history.history['val_loss']
train_acc = history.history['accuracy']
test_acc = history.history['val_accuracy']
plt.figure(figsize=(10, 5))
plt.subplot(121)
plt.title('Train_loss And Test_loss')
plt.plot(train_loss, label='train_loss')
plt.plot(test_loss, label='test_loss')
plt.legend()
plt.subplot(122)
plt.title('Train_acc And Test_acc')
plt.plot(train_acc, label='train_acc')
plt.plot(test_acc, label='test_acc')
plt.legend()
plt.show()
结构图如下:
LeNet是第一个成功应用于手写字体识别的卷积神经网络
ALexNet展示了卷积神经网络的强大性能,开创了卷积神经网络空前的高潮
ZFNet通过可视化展示了卷积神经网络各层的功能和作用
VGG采用堆积的小卷积核替代采用大的卷积核,堆叠的小卷积核的卷积层等同于单个的大卷积核的卷积层,不仅能够增加决策函数的判别性还能减少参数量
GoogleNet增加了卷积神经网络的宽度,在多个不同尺寸的卷积核上进行卷积后再聚合,并使用1*1卷积降维减少参数量
ResNet解决了网络模型的退化问题,允许神经网络更深