系列连载目录
学习借鉴
github:BIGBALLON/cifar-10-cnn
知乎专栏:写给妹子的深度学习教程
Inception-resnet-v2 Caffe 代码:https://github.com/soeaver/caffe-model/blob/master/cls/inception/deploy_inception-resnet-v2.prototxt
Inception-resnet-v1 / v2 Keras 代码:https://github.com/titu1994/Inception-v4
GoogleNet网络详解与keras实现:https://blog.csdn.net/qq_25491201/article/details/78367696
参考
代码下载
硬件
参考【Inception-v4】《Inception-v4, Inception-ResNet and the Impact of Residual Connections on Learning》
[D] Why aren’t Inception-style networks successful on CIFAR-10/100?
图片来源 https://www.zhihu.com/question/50370954
1)导入库,设置好超参数
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="3"
import keras
import numpy as np
import math
from keras.datasets import cifar10
from keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, GlobalAveragePooling2D
from keras.layers import Flatten, Dense, Dropout,BatchNormalization,Activation, Convolution2D, add
from keras.models import Model
from keras.layers import Input, concatenate
from keras import optimizers, regularizers
from keras.preprocessing.image import ImageDataGenerator
from keras.initializers import he_normal
from keras.callbacks import LearningRateScheduler, TensorBoard, ModelCheckpoint
num_classes = 10
batch_size = 64 # 64 or 32 or other
epochs = 300
iterations = 782
USE_BN=True
DROPOUT=0.2 # keep 80%
CONCAT_AXIS=3
weight_decay=1e-4
DATA_FORMAT='channels_last' # Theano:'channels_first' Tensorflow:'channels_last'
log_filepath = './inception_resnet_v2'
2)数据预处理并设置 learning schedule
def color_preprocessing(x_train,x_test):
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
mean = [125.307, 122.95, 113.865]
std = [62.9932, 62.0887, 66.7048]
for i in range(3):
x_train[:,:,:,i] = (x_train[:,:,:,i] - mean[i]) / std[i]
x_test[:,:,:,i] = (x_test[:,:,:,i] - mean[i]) / std[i]
return x_train, x_test
def scheduler(epoch):
if epoch < 100:
return 0.01
if epoch < 200:
return 0.001
return 0.0001
# load data
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
y_train = keras.utils.to_categorical(y_train, num_classes)
y_test = keras.utils.to_categorical(y_test, num_classes)
x_train, x_test = color_preprocessing(x_train, x_test)
3)定义网络结构
def conv_block(x, nb_filter, nb_row, nb_col, border_mode='same', subsample=(1,1), bias=False):
x = Convolution2D(nb_filter, nb_row, nb_col, subsample=subsample, border_mode=border_mode, bias=bias,
init="he_normal",dim_ordering='tf',W_regularizer=regularizers.l2(weight_decay))(x)
x = BatchNormalization(momentum=0.9, epsilon=1e-5)(x)
x = Activation('relu')(x)
return x
inception_v4
,庞然大物)padding
为 valid
,没有标注的表示 same
, 因为数据集分辨率的不同,如下代码全部用 same
表示:def create_stem(img_input,concat_axis):
x = Conv2D(32,kernel_size=(3,3),strides=(2,2),padding='same',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(img_input)
x = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x))
x = Conv2D(32,kernel_size=(3,3),strides=(1,1),padding='same',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(x)
x = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x))
x = Conv2D(64,kernel_size=(3,3),strides=(1,1),padding='same',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(x)
x = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x))
# stem1
x_1 = Conv2D(96,kernel_size=(3,3),strides=(2,2),padding='same',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(x)
x_1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x_1))
x_2 = MaxPooling2D(pool_size=(3,3),strides=2,padding='same',data_format=DATA_FORMAT)(x)
x = concatenate([x_1,x_2],axis=concat_axis)
# stem2
x_1 = Conv2D(64,kernel_size=(1,1),strides=(1,1),padding='same',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(x)
x_1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x_1))
x_1 = Conv2D(96,kernel_size=(3,3),strides=(1,1),padding='same',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(x_1)
x_1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x_1))
x_2 = Conv2D(96,kernel_size=(1,1),strides=(1,1),padding='same',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(x)
x_2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x_2))
x_2 = conv_block(x_2,64,1,7)
x_2 = conv_block(x_2,64,7,1)
x_2 = Conv2D(96,kernel_size=(3,3),strides=(1,1),padding='same',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(x_2)
x_2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x_2))
x = concatenate([x_1,x_2],axis=concat_axis)
# stem3
x_1 = Conv2D(192,kernel_size=(3,3),strides=(2,2),padding='same',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(x)
x_1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x_1))
x_2 = MaxPooling2D(pool_size=(3,3),strides=2,padding='same',data_format=DATA_FORMAT)(x)
x = concatenate([x_1,x_2],axis=concat_axis)
return x
def inception_A(x,params,concat_axis,padding='same',data_format=DATA_FORMAT,scale_residual=False,use_bias=True,kernel_initializer="he_normal",bias_initializer='zeros',kernel_regularizer=None,bias_regularizer=None,activity_regularizer=None,kernel_constraint=None,bias_constraint=None,weight_decay=weight_decay):
(branch1,branch2,branch3,branch4)=params
if weight_decay:
kernel_regularizer=regularizers.l2(weight_decay)
bias_regularizer=regularizers.l2(weight_decay)
else:
kernel_regularizer=None
bias_regularizer=None
#1x1
pathway1=Conv2D(filters=branch1[0],kernel_size=(1,1),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
pathway1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway1))
#1x1->3x3
pathway2=Conv2D(filters=branch2[0],kernel_size=(1,1),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
pathway2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway2))
pathway2=Conv2D(filters=branch2[1],kernel_size=(3,3),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(pathway2)
pathway2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway2))
#1x1->3x3->3x3
pathway3=Conv2D(filters=branch3[0],kernel_size=(1,1),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
pathway3 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway3))
pathway3=Conv2D(filters=branch3[1],kernel_size=(3,3),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(pathway3)
pathway3 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway3))
pathway3=Conv2D(filters=branch3[1],kernel_size=(3,3),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(pathway3)
pathway3 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway3))
# concatenate
pathway_123 = concatenate([pathway1,pathway2,pathway3],axis=concat_axis)
pathway_123 = Conv2D(branch4[0],kernel_size=(1,1),strides=(1,1),padding='same',activation = 'linear',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(pathway_123)
if scale_residual:
x = Lambda(lambda p: p * 0.1)(x)
return add([x,pathway_123])
def reduce_A(x,params,concat_axis,padding='same',data_format=DATA_FORMAT,use_bias=True,kernel_initializer="he_normal",bias_initializer='zeros',kernel_regularizer=None,bias_regularizer=None,activity_regularizer=None,kernel_constraint=None,bias_constraint=None,weight_decay=weight_decay):
(branch1,branch2)=params
if weight_decay:
kernel_regularizer=regularizers.l2(weight_decay)
bias_regularizer=regularizers.l2(weight_decay)
else:
kernel_regularizer=None
bias_regularizer=None
#1x1
pathway1 = Conv2D(filters=branch1[0],kernel_size=(3,3),strides=2,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
pathway1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway1))
#1x1->3x3->3x3
pathway2 = Conv2D(filters=branch2[0],kernel_size=(1,1),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
pathway2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway2))
pathway2 = Conv2D(filters=branch2[1],kernel_size=(3,3),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(pathway2)
pathway2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway2))
pathway2 = Conv2D(filters=branch2[2],kernel_size=(3,3),strides=2,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(pathway2)
pathway2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway2))
# max pooling
pathway3 = MaxPooling2D(pool_size=(3,3),strides=2,padding=padding,data_format=DATA_FORMAT)(x)
return concatenate([pathway1,pathway2,pathway3],axis=concat_axis)
def inception_B(x,params,concat_axis,padding='same',data_format=DATA_FORMAT,scale_residual=False,use_bias=True,kernel_initializer="he_normal",bias_initializer='zeros',kernel_regularizer=None,bias_regularizer=None,activity_regularizer=None,kernel_constraint=None,bias_constraint=None,weight_decay=weight_decay):
(branch1,branch2,branch3)=params
if weight_decay:
kernel_regularizer=regularizers.l2(weight_decay)
bias_regularizer=regularizers.l2(weight_decay)
else:
kernel_regularizer=None
bias_regularizer=None
#1x1
pathway1=Conv2D(filters=branch1[0],kernel_size=(1,1),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
pathway1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway1))
#1x1->1x7->7x1
pathway2=Conv2D(filters=branch2[0],kernel_size=(1,1),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
pathway2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway2))
pathway2 = conv_block(pathway2,branch2[1],1,7)
pathway2 = conv_block(pathway2,branch2[2],7,1)
# concatenate
pathway_12 = concatenate([pathway1,pathway2],axis=concat_axis)
pathway_12 = Conv2D(branch3[0],kernel_size=(1,1),strides=(1,1),padding='same',activation = 'linear',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(pathway_12)
if scale_residual:
x = Lambda(lambda p: p * 0.1)(x)
return add([x,pathway_12])
def reduce_B(x,params,concat_axis,padding='same',data_format=DATA_FORMAT,use_bias=True,kernel_initializer="he_normal",bias_initializer='zeros',kernel_regularizer=None,bias_regularizer=None,activity_regularizer=None,kernel_constraint=None,bias_constraint=None,weight_decay=weight_decay):
(branch1,branch2,branch3)=params
if weight_decay:
kernel_regularizer=regularizers.l2(weight_decay)
bias_regularizer=regularizers.l2(weight_decay)
else:
kernel_regularizer=None
bias_regularizer=None
#1x1->3x3
pathway1 = Conv2D(filters=branch1[0],kernel_size=(1,1),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
pathway1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway1))
pathway1 = Conv2D(filters=branch1[1],kernel_size=(3,3),strides=2,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(pathway1)
pathway1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway1))
#1x1->3x3
pathway2 = Conv2D(filters=branch2[0],kernel_size=(1,1),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
pathway2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway2))
pathway2 = Conv2D(filters=branch2[1],kernel_size=(3,3),strides=2,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(pathway2)
pathway2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway2))
#1x1->3x3->3x3
pathway3 = Conv2D(filters=branch3[0],kernel_size=(1,1),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
pathway3 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway3))
pathway3 = Conv2D(filters=branch3[1],kernel_size=(3,3),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(pathway3)
pathway3 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway3))
pathway3 = Conv2D(filters=branch3[2],kernel_size=(3,3),strides=2,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(pathway3)
pathway3 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway3))
# max pooling
pathway4 = MaxPooling2D(pool_size=(3,3),strides=2,padding=padding,data_format=DATA_FORMAT)(x)
return concatenate([pathway1,pathway2,pathway3,pathway4],axis=concat_axis)
def inception_C(x,params,concat_axis,padding='same',data_format=DATA_FORMAT,scale_residual=False,use_bias=True,kernel_initializer="he_normal",bias_initializer='zeros',kernel_regularizer=None,bias_regularizer=None,activity_regularizer=None,kernel_constraint=None,bias_constraint=None,weight_decay=weight_decay):
(branch1,branch2,branch3)=params
if weight_decay:
kernel_regularizer=regularizers.l2(weight_decay)
bias_regularizer=regularizers.l2(weight_decay)
else:
kernel_regularizer=None
bias_regularizer=None
#1x1
pathway1=Conv2D(filters=branch1[0],kernel_size=(1,1),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
pathway1 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway1))
#1x1->1x3->3x1
pathway2=Conv2D(filters=branch2[0],kernel_size=(1,1),strides=1,padding=padding,data_format=data_format,use_bias=use_bias,kernel_initializer=kernel_initializer,bias_initializer=bias_initializer,kernel_regularizer=kernel_regularizer,bias_regularizer=bias_regularizer,activity_regularizer=activity_regularizer,kernel_constraint=kernel_constraint,bias_constraint=bias_constraint)(x)
pathway2 = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(pathway2))
pathway2 = conv_block(pathway2,branch2[1],1,3)
pathway2 = conv_block(pathway2,branch2[2],3,1)
# concatenate
pathway_12 = concatenate([pathway1,pathway2],axis=concat_axis)
pathway_12 = Conv2D(branch3[0],kernel_size=(1,1),strides=(1,1),padding='same',activation = 'linear',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(pathway_12)
if scale_residual:
x = Lambda(lambda p: p * 0.1)(x)
return add([x,pathway_12])
4)搭建网络
用 3)中设计好的模块来搭建网络
整体结构(图6左边 in paper,同 inception_resnet_v1
,不过 inception module
往往是10A,20B,10C的组合,替换5A,10B,5C的组合)
def create_model(img_input):
# stem
x = create_stem(img_input,concat_axis=CONCAT_AXIS)
# 5 x inception_A
for _ in range(10):
x=inception_A(x,params=[(32,),(32,32),(32,48,64),(384,)],concat_axis=CONCAT_AXIS)
# reduce A
x=reduce_A(x,params=[(384,),(256,256,384)],concat_axis=CONCAT_AXIS) # 768
# 10 x inception_B
for _ in range(20):
x=inception_B(x,params=[(192,),(128,160,192),(1152,)],concat_axis=CONCAT_AXIS)
# reduce B
x=reduce_B(x,params=[(256,384),(256,288),(256,288,320)],concat_axis=CONCAT_AXIS) # 1280
# 5 x inception_C
for _ in range(10):
x=inception_C(x,params=[(192,),(192,224,256),(2144,)],concat_axis=CONCAT_AXIS)
x=GlobalAveragePooling2D()(x)
x=Dropout(DROPOUT)(x)
x = Dense(num_classes,activation='softmax',kernel_initializer="he_normal",
kernel_regularizer=regularizers.l2(weight_decay))(x)
return x
5)生成模型
img_input=Input(shape=(32,32,3))
output = create_model(img_input)
model=Model(img_input,output)
model.summary()
Total params: 53,123,242
Trainable params: 53,066,154
Non-trainable params: 57,088
6)开始训练
# set optimizer
sgd = optimizers.SGD(lr=.1, momentum=0.9, nesterov=True)
model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
# set callback
tb_cb = TensorBoard(log_dir=log_filepath, histogram_freq=0)
change_lr = LearningRateScheduler(scheduler)
cbks = [change_lr,tb_cb]
# set data augmentation
datagen = ImageDataGenerator(horizontal_flip=True,
width_shift_range=0.125,
height_shift_range=0.125,
fill_mode='constant',cval=0.)
datagen.fit(x_train)
# start training
model.fit_generator(datagen.flow(x_train, y_train,batch_size=batch_size),
steps_per_epoch=iterations,
epochs=epochs,
callbacks=cbks,
validation_data=(x_test, y_test))
model.save('inception_resnet_v2.h5')
7)结果分析
training accuracy 和 training loss
把 Inception_resnet_v2
中 stern 结构直接替换成一个卷积,inception 结构不变,因为stern结果会把原图降到1/8的分辨率,对于 ImageNet 还行,CIFRA-10的话有些吃不消了
代码修改部分如下:
def create_stem(img_input,concat_axis):
x = Conv2D(384,kernel_size=(3,3),strides=(1,1),padding='same',
kernel_initializer="he_normal",kernel_regularizer=regularizers.l2(weight_decay))(img_input)
x = Activation('relu')(BatchNormalization(momentum=0.9, epsilon=1e-5)(x))
return x
其他部分同 Inception_resnet_v2
参数量如下(少了一些):
Total params: 52,507,722
Trainable params: 52,451,658
Non-trainable params: 56,064
结果分析如下:
training accuracy 和 training loss
效果飙升,大网络(容量)就是猛,能在训练集上达到100%,就是很难理解一开始精度的停滞不前!大概在第 50 个 epoch 的时候,开始学东西了!先留下一个疑惑。
93%+,长舒一口气,调通了
整体结构(图6左边 in paper,同 inception_resnet_v1
,不过 inception module
往往是10A,20B,10C的组合,替换5A,10B,5C的组合),这里,我们采用 5A、10B、5C 的方式
代码修改如下
def create_model(img_input):
# stem
x = create_stem(img_input,concat_axis=CONCAT_AXIS)
# 5 x inception_A
for _ in range(5):
x=inception_A(x,params=[(32,),(32,32),(32,48,64),(384,)],concat_axis=CONCAT_AXIS)
# reduce A
x=reduce_A(x,params=[(384,),(256,256,384)],concat_axis=CONCAT_AXIS) # 768
# 10 x inception_B
for _ in range(10):
x=inception_B(x,params=[(192,),(128,160,192),(1152,)],concat_axis=CONCAT_AXIS)
# reduce B
x=reduce_B(x,params=[(256,384),(256,288),(256,288,320)],concat_axis=CONCAT_AXIS) # 1280
# 5 x inception_C
for _ in range(5):
x=inception_C(x,params=[(192,),(192,224,256),(2144,)],concat_axis=CONCAT_AXIS)
x=GlobalAveragePooling2D()(x)
x=Dropout(DROPOUT)(x)
x = Dense(num_classes,activation='softmax',kernel_initializer="he_normal",
kernel_regularizer=regularizers.l2(weight_decay))(x)
return x
其它代码同 inception_resnet_v2_slim
参数量如下所示:
Total params: 29,692,042
Trainable params: 29,660,298
Non-trainable params: 31,744
结果分析如下:
training accuracy 和 training loss
一开始精度的停滞不前!大概在第 64 个 epoch 的时候,开始学东西了!先留下一个疑惑。
大部分的时候, inception_resnet_v2_slim_5105
的效果要好一些,loss 也更小
论文中图7,之前 inception_resnet_v2
代码中也有这种结构
也即如下部分代码:
if scale_residual:
x = Lambda(lambda p: p * 0.1)(x)
现在把这个功能激活,修改代码如下 scale_residual=True
def create_model(img_input):
# stem
x = create_stem(img_input,concat_axis=CONCAT_AXIS)
# 5 x inception_A
for _ in range(5):
x=inception_A(x,params=[(32,),(32,32),(32,48,64),(384,)],concat_axis=CONCAT_AXIS,scale_residual=True)
# reduce A
x=reduce_A(x,params=[(384,),(256,256,384)],concat_axis=CONCAT_AXIS) # 768
# 10 x inception_B
for _ in range(10):
x=inception_B(x,params=[(192,),(128,160,192),(1152,)],concat_axis=CONCAT_AXIS,scale_residual=True)
# reduce B
x=reduce_B(x,params=[(256,384),(256,288),(256,288,320)],concat_axis=CONCAT_AXIS) # 1280
# 5 x inception_C
for _ in range(5):
x=inception_C(x,params=[(192,),(192,224,256),(2144,)],concat_axis=CONCAT_AXIS,scale_residual=True)
x=GlobalAveragePooling2D()(x)
x=Dropout(DROPOUT)(x)
x = Dense(num_classes,activation='softmax',kernel_initializer="he_normal",
kernel_regularizer=regularizers.l2(weight_decay))(x)
return x
其它代码同 inception_resnet_v2_slim_5105
参数量如下所示(同 inception_resnet_v2_slim_5105
):
Total params: 29,692,042
Trainable params: 29,660,298
Non-trainable params: 31,744
结果分析如下:
training accuracy 和 training loss
哇,终于消除了一开始精度的停滞不前的情况!