#coding=utf-8
from keras.models import Sequential
from keras.layers import Dense,Flatten
from keras.layers.convolutional import Conv2D,MaxPooling2D
def LeNet(input_shape):
#LeNet5特征能够总结为如下几点:
#1)卷积神经网络使用三个层作为一个系列: 卷积,池化,非线性
#2) 使用卷积提取空间特征
#3)使用映射到空间均值下采样(subsample)
#4)双曲线(tanh)或S型(sigmoid)形式的非线性
#5)多层神经网络(MLP)作为最后的分类器
#6)层与层之间的稀疏连接矩阵避免大的计算成本
model = Sequential()
model.add(Conv2D(32,(5,5),strides=(1,1),input_shape=input_shape,padding='valid',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(64,(5,5),strides=(1,1),padding='valid',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(100,activation='relu'))
model.add(Dense(10,activation='softmax'))
model.compile(optimizer='sgd',loss='categorical_crossentropy',metrics=['accuracy'])
model.summary()
return model
'''
AlexNet将LeNet的思想发扬光大,把CNN的基本原理应用到了很深很宽的网络中。AlexNet主要使用到的新技术点如下。
(1)成功使用ReLU作为CNN的激活函数,并验证其效果在较深的网络超过了Sigmoid,成功解决了Sigmoid在网络较深时的梯度弥散问题。虽然ReLU激活函数在很久之前就被提出了,但是直到AlexNet的出现才将其发扬光大。
(2)训练时使用Dropout随机忽略一部分神经元,以避免模型过拟合。Dropout虽有单独的论文论述,但是AlexNet将其实用化,通过实践证实了它的效果。在AlexNet中主要是最后几个全连接层使用了Dropout。
(3)在CNN中使用重叠的最大池化。此前CNN中普遍使用平均池化,AlexNet全部使用最大池化,避免平均池化的模糊化效果。并且AlexNet中提出让步长比池化核的尺寸小,这样池化层的输出之间会有重叠和覆盖,提升了特征的丰富性。
(4)提出了LRN层,对局部神经元的活动创建竞争机制,使得其中响应比较大的值变得相对更大,并抑制其他反馈较小的神经元,增强了模型的泛化能力。
(5)使用CUDA加速深度卷积网络的训练,利用GPU强大的并行计算能力,处理神经网络训练时大量的矩阵运算。AlexNet使用了两块GTX 580 GPU进行训练,单个GTX 580只有3GB显存,这限制了可训练的网络的最大规模。因此作者将AlexNet分布在两个GPU上,在每个GPU的显存中储存一半的神经元的参数。因为GPU之间通信方便,可以互相访问显存,而不需要通过主机内存,所以同时使用多块GPU也是非常高效的。同时,AlexNet的设计让GPU之间的通信只在网络的某些层进行,控制了通信的性能损耗。
(6)数据增强,随机地从256´256的原始图像中截取224´224大小的区域(以及水平翻转的镜像),相当于增加了(256-224)2´2=2048倍的数据量。如果没有数据增强,仅靠原始的数据量,参数众多的CNN会陷入过拟合中,使用了数据增强后可以大大减轻过拟合,提升泛化能力。进行预测时,则是取图片的四个角加中间共5个位置,并进行左右翻转,一共获得10张图片,对他们进行预测并对10次结果求均值。同时,AlexNet论文中提到了会对图像的RGB数据进行PCA处理,并对主成分做一个标准差为0.1的高斯扰动,增加一些噪声,这个Trick可以让错误率再下降1%。
'''
from keras.models import Sequential
from keras.layers import Dense,Flatten,Dropout
from keras.layers.convolutional import Conv2D,MaxPooling2D
from keras.utils.np_utils import to_categorical
def AlexNet(input_shape):
model = Sequential()
model.add(Conv2D(96,(11,11),strides=(4,4),input_shape=input_shape,padding='valid',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))
model.add(Conv2D(256,(5,5),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))
model.add(Conv2D(384,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(384,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(256,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))
model.add(Flatten())
model.add(Dense(4096,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1000,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.summary()
return model
from keras.models import Sequential
from keras.layers import Dense,Flatten,Dropout
from keras.layers.convolutional import Conv2D,MaxPooling2D
from keras.utils.np_utils import to_categorical
def ZFNet(input_shape):
model = Sequential()
model.add(Conv2D(96,(7,7),strides=(2,2),input_shape=input_shape,padding='valid',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))
model.add(Conv2D(256,(5,5),strides=(2,2),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))
model.add(Conv2D(384,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(384,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(256,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))
model.add(Flatten())
model.add(Dense(4096,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1000,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.summary()
return model
'''
VGGNet在训练时有一个小技巧,先训练级别A的简单网络,再复用A网络的权重来初始化后面的几个复杂模型,这样训练收敛的速度更快。
在预测时,VGG采用Multi-Scale的方法,将图像scale到一个尺寸Q,并将图片输入卷积网络计算。然后在最后一个卷积层使用滑窗的方式
进行分类预测,将不同窗口的分类结果平均,再将不同尺寸Q的结果平均得到最后结果,这样可提高图片数据的利用率并提升预测准确率。
同时在训练中,VGGNet还使用了Multi-Scale的方法做数据增强,将原始图像缩放到不同尺寸S,然后再随机裁切224´224的图片,这样能
增加很多数据量,对于防止模型过拟合有很不错的效果。实践中,作者令S在[256,512]这个区间内取值,使用Multi-Scale获得多个版本的
数据,并将多个版本的数据合在一起进行训练。图9所示为VGGNet使用Multi-Scale训练时得到的结果,可以看到D和E都可以达到7.5%的错
误率。最终提交到ILSVRC 2014的版本是仅使用Single-Scale的6个不同等级的网络与Multi-Scale的D网络的融合,达到了7.3%的错误率。
不过比赛结束后作者发现只融合Multi-Scale的D和E可以达到更好的效果,错误率达到7.0%,再使用其他优化策略最终错误率可达到6.8%左右,
非常接近同年的冠军Google Inceptin Net。同时,作者在对比各级网络时总结出了以下几个观点。
'''
from keras.models import Sequential
from keras.layers import Dense,Flatten,Dropout
from keras.layers.convolutional import Conv2D,MaxPooling2D
def VGG_13(input_shape):
model = Sequential()
model.add(Conv2D(64,(3,3),strides=(1,1),input_shape=input_shape,padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(64,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(128,(3,2),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(128,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(256,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(256,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(512,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(512,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(512,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(512,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(4096,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1000,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.summary()
return model
from keras.models import Sequential
from keras.layers import Dense,Flatten,Dropout
from keras.layers.convolutional import Conv2D,MaxPooling2D
def VGG_16(input_shape):
model = Sequential()
model.add(Conv2D(64,(3,3),strides=(1,1),input_shape=input_shape,padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(64,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(128,(3,2),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(128,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(256,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(256,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(256,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(512,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(512,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(512,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Conv2D(512,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(512,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(Conv2D(512,(3,3),strides=(1,1),padding='same',activation='relu',kernel_initializer='uniform'))
model.add(MaxPooling2D(pool_size=(2,2)))
model.add(Flatten())
model.add(Dense(4096,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4096,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1000,activation='softmax'))
model.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.summary()
return model
'''
Inception V1降低参数量的目的有两点,第一,参数越多模型越庞大,需要供模型学习的数据量就越大,而目前高质量的数据非常昂贵;
第二,参数越多,耗费的计算资源也会更大。Inception V1参数少但效果好的原因除了模型层数更深、表达能力更强外,还有两点:
一是去除了最后的全连接层,用全局平均池化层(即将图片尺寸变为1´1)来取代它。全连接层几乎占据了AlexNet或VGGNet中90%的参数量,
而且会引起过拟合,去除全连接层后模型训练更快并且减轻了过拟合。用全局平均池化层取代全连接层的做法借鉴了Network In Network
(以下简称NIN)论文。二是Inception V1中精心设计的Inception Module提高了参数的利用效率,其结构如图10所示。这一部分也借鉴
了NIN的思想,形象的解释就是Inception Module本身如同大网络中的一个小网络,其结构可以反复堆叠在一起形成大网络。不过Inception
V1比NIN更进一步的是增加了分支网络,NIN则主要是级联的卷积层和MLPConv层。一般来说卷积层要提升表达能力,主要依靠增加输出通道数
但副作用是计算量增大和过拟合。每一个输出通道对应一个滤波器,同一个滤波器共享参数,只能提取一类特征,因此一个输出通道只能做一种
特征处理。而NIN中的MLPConv则拥有更强大的能力,允许在输出通道之间组合信息,因此效果明显。可以说,MLPConv基本等效于普通卷积层后
再连接1´1的卷积和ReLU激活函数。
'''
from keras.models import Model
from keras.layers import Input,Dense,Dropout,BatchNormalization,Conv2D,MaxPooling2D,AveragePooling2D,concatenate
from keras.layers.convolutional import Conv2D,MaxPooling2D,AveragePooling2D
def GoogleNet(input_shape):
def Conv2d_BN(x, nb_filter,kernel_size, padding='same',strides=(1,1),name=None):
if name is not None:
bn_name = name + '_bn'
conv_name = name + '_conv'
else:
bn_name = None
conv_name = None
x = Conv2D(nb_filter,kernel_size,padding=padding,strides=strides,activation='relu',name=conv_name)(x)
x = BatchNormalization(axis=3,name=bn_name)(x)
return x
def Inception(x,nb_filter):
branch1x1 = Conv2d_BN(x,nb_filter,(1,1), padding='same',strides=(1,1),name=None)
branch3x3 = Conv2d_BN(x,nb_filter,(1,1), padding='same',strides=(1,1),name=None)
branch3x3 = Conv2d_BN(branch3x3,nb_filter,(3,3), padding='same',strides=(1,1),name=None)
branch5x5 = Conv2d_BN(x,nb_filter,(1,1), padding='same',strides=(1,1),name=None)
branch5x5 = Conv2d_BN(branch5x5,nb_filter,(1,1), padding='same',strides=(1,1),name=None)
branchpool = MaxPooling2D(pool_size=(3,3),strides=(1,1),padding='same')(x)
branchpool = Conv2d_BN(branchpool,nb_filter,(1,1),padding='same',strides=(1,1),name=None)
x = concatenate([branch1x1,branch3x3,branch5x5,branchpool],axis=3)
return x
inpt = Input(shape=input_shape)
#padding = 'same',填充为(步长-1)/2,还可以用ZeroPadding2D((3,3))
x = Conv2d_BN(inpt,64,(7,7),strides=(2,2),padding='same')
x = MaxPooling2D(pool_size=(3,3),strides=(2,2),padding='same')(x)
x = Conv2d_BN(x,192,(3,3),strides=(1,1),padding='same')
x = MaxPooling2D(pool_size=(3,3),strides=(2,2),padding='same')(x)
x = Inception(x,64)#256
x = Inception(x,120)#480
x = MaxPooling2D(pool_size=(3,3),strides=(2,2),padding='same')(x)
x = Inception(x,128)#512
x = Inception(x,128)
x = Inception(x,128)
x = Inception(x,132)#528
x = Inception(x,208)#832
x = MaxPooling2D(pool_size=(3,3),strides=(2,2),padding='same')(x)
x = Inception(x,208)
x = Inception(x,256)#1024
x = AveragePooling2D(pool_size=(7,7),strides=(7,7),padding='same')(x)
x = Dropout(0.4)(x)
x = Dense(1000,activation='relu')(x)
x = Dense(1000,activation='softmax')(x)
model = Model(inpt,x,name='inception')
model.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.summary()
return model
from keras.models import Model
from keras.layers import Input,Dense,Dropout,BatchNormalization,Conv2D,MaxPooling2D,AveragePooling2D,concatenate,Activation,ZeroPadding2D
from keras.layers import add,Flatten
def Resnet_34(input_shape):
def Conv2d_BN(x, nb_filter,kernel_size, strides=(1,1), padding='same',name=None):
if name is not None:
bn_name = name + '_bn'
conv_name = name + '_conv'
else:
bn_name = None
conv_name = None
x = Conv2D(nb_filter,kernel_size,padding=padding,strides=strides,activation='relu',name=conv_name)(x)
x = BatchNormalization(axis=3,name=bn_name)(x)
return x
def Conv_Block(inpt,nb_filter,kernel_size,strides=(1,1), with_conv_shortcut=False):
x = Conv2d_BN(inpt,nb_filter=nb_filter,kernel_size=kernel_size,strides=strides,padding='same')
x = Conv2d_BN(x, nb_filter=nb_filter, kernel_size=kernel_size,padding='same')
if with_conv_shortcut:
shortcut = Conv2d_BN(inpt,nb_filter=nb_filter,strides=strides,kernel_size=kernel_size)
x = add([x,shortcut])
return x
else:
x = add([x,inpt])
return x
inpt = Input(shape=(224,224,3))
x = ZeroPadding2D((3,3))(inpt)
x = Conv2d_BN(x,nb_filter=64,kernel_size=(7,7),strides=(2,2),padding='valid')
x = MaxPooling2D(pool_size=(3,3),strides=(2,2),padding='same')(x)
#(56,56,64)
x = Conv_Block(x,nb_filter=64,kernel_size=(3,3))
x = Conv_Block(x,nb_filter=64,kernel_size=(3,3))
x = Conv_Block(x,nb_filter=64,kernel_size=(3,3))
#(28,28,128)
x = Conv_Block(x,nb_filter=128,kernel_size=(3,3),strides=(2,2),with_conv_shortcut=True)
x = Conv_Block(x,nb_filter=128,kernel_size=(3,3))
x = Conv_Block(x,nb_filter=128,kernel_size=(3,3))
x = Conv_Block(x,nb_filter=128,kernel_size=(3,3))
#(14,14,256)
x = Conv_Block(x,nb_filter=256,kernel_size=(3,3),strides=(2,2),with_conv_shortcut=True)
x = Conv_Block(x,nb_filter=256,kernel_size=(3,3))
x = Conv_Block(x,nb_filter=256,kernel_size=(3,3))
x = Conv_Block(x,nb_filter=256,kernel_size=(3,3))
x = Conv_Block(x,nb_filter=256,kernel_size=(3,3))
x = Conv_Block(x,nb_filter=256,kernel_size=(3,3))
#(7,7,512)
x = Conv_Block(x,nb_filter=512,kernel_size=(3,3),strides=(2,2),with_conv_shortcut=True)
x = Conv_Block(x,nb_filter=512,kernel_size=(3,3))
x = Conv_Block(x,nb_filter=512,kernel_size=(3,3))
x = AveragePooling2D(pool_size=(7,7))(x)
x = Flatten()(x)
x = Dense(1000,activation='softmax')(x)
model = Model(inputs=inpt,outputs=x)
model.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.summary()
return model
from keras.models import Model
from keras.layers import Input,Dense,BatchNormalization,Conv2D,MaxPooling2D,AveragePooling2D,ZeroPadding2D
from keras.layers import add,Flatten
#from keras.layers.convolutional import Conv2D,MaxPooling2D,AveragePooling2D
from keras.optimizers import SGD
def Resnet_50(input_shape):
def Conv2d_BN(x, nb_filter,kernel_size, strides=(1,1), padding='same',name=None):
if name is not None:
bn_name = name + '_bn'
conv_name = name + '_conv'
else:
bn_name = None
conv_name = None
x = Conv2D(nb_filter,kernel_size,padding=padding,strides=strides,activation='relu',name=conv_name)(x)
x = BatchNormalization(axis=3,name=bn_name)(x)
return x
def Conv_Block(inpt,nb_filter,kernel_size,strides=(1,1), with_conv_shortcut=False):
x = Conv2d_BN(inpt,nb_filter=nb_filter[0],kernel_size=(1,1),strides=strides,padding='same')
x = Conv2d_BN(x, nb_filter=nb_filter[1], kernel_size=(3,3), padding='same')
x = Conv2d_BN(x, nb_filter=nb_filter[2], kernel_size=(1,1), padding='same')
if with_conv_shortcut:
shortcut = Conv2d_BN(inpt,nb_filter=nb_filter[2],strides=strides,kernel_size=kernel_size)
x = add([x,shortcut])
return x
else:
x = add([x,inpt])
return x
inpt = Input(shape=input_shape)
x = ZeroPadding2D((3,3))(inpt)
x = Conv2d_BN(x,nb_filter=64,kernel_size=(7,7),strides=(2,2),padding='valid')
x = MaxPooling2D(pool_size=(3,3),strides=(2,2),padding='same')(x)
x = Conv_Block(x,nb_filter=[64,64,256],kernel_size=(3,3),strides=(1,1),with_conv_shortcut=True)
x = Conv_Block(x,nb_filter=[64,64,256],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[64,64,256],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[128,128,512],kernel_size=(3,3),strides=(2,2),with_conv_shortcut=True)
x = Conv_Block(x,nb_filter=[128,128,512],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[128,128,512],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[128,128,512],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[256,256,1024],kernel_size=(3,3),strides=(2,2),with_conv_shortcut=True)
x = Conv_Block(x,nb_filter=[256,256,1024],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[256,256,1024],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[256,256,1024],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[256,256,1024],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[256,256,1024],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[512,512,2048],kernel_size=(3,3),strides=(2,2),with_conv_shortcut=True)
x = Conv_Block(x,nb_filter=[512,512,2048],kernel_size=(3,3))
x = Conv_Block(x,nb_filter=[512,512,2048],kernel_size=(3,3))
x = AveragePooling2D(pool_size=(7,7))(x)
x = Flatten()(x)
x = Dense(1000,activation='softmax')(x)
model = Model(inputs=inpt,outputs=x)
sgd = SGD(decay=0.0001,momentum=0.9)
model.compile(loss='categorical_crossentropy',optimizer='sgd',metrics=['accuracy'])
model.summary()
from keras.models import Model
from keras.layers import Input, Conv2D, GlobalAveragePooling2D, Dropout
from keras.layers import Activation, BatchNormalization, add, Reshape
from keras.applications.mobilenet import relu6, DepthwiseConv2D
from keras.utils.vis_utils import plot_model
from keras import backend as K
'''
| - data/
| - train/
| - class 0/
| - image.jpg
....
| - class 1/
....
| - class n/
| - validation/
| - class 0/
| - class 1/
....
| - class n/
'''
def _conv_block(inputs, filters, kernel, strides):
"""Convolution Block
This function defines a 2D convolution operation with BN and relu6.
# Arguments
inputs: Tensor, input tensor of conv layer.
filters: Integer, the dimensionality of the output space.
kernel: An integer or tuple/list of 2 integers, specifying the
width and height of the 2D convolution window.
strides: An integer or tuple/list of 2 integers,
specifying the strides of the convolution along the width and height.
Can be a single integer to specify the same value for
all spatial dimensions.
# Returns
Output tensor.
"""
channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
x = Conv2D(filters, kernel, padding='same', strides=strides)(inputs)
x = BatchNormalization(axis=channel_axis)(x)
return Activation(relu6)(x)
def _bottleneck(inputs, filters, kernel, t, s, r=False):
"""Bottleneck
This function defines a basic bottleneck structure.
# Arguments
inputs: Tensor, input tensor of conv layer.
filters: Integer, the dimensionality of the output space.
kernel: An integer or tuple/list of 2 integers, specifying the
width and height of the 2D convolution window.
t: Integer, expansion factor.
t is always applied to the input size.
s: An integer or tuple/list of 2 integers,specifying the strides
of the convolution along the width and height.Can be a single
integer to specify the same value for all spatial dimensions.
r: Boolean, Whether to use the residuals.
# Returns
Output tensor.
"""
channel_axis = 1 if K.image_data_format() == 'channels_first' else -1
tchannel = K.int_shape(inputs)[channel_axis] * t
x = _conv_block(inputs, tchannel, (1, 1), (1, 1))
x = DepthwiseConv2D(kernel, strides=(s, s), depth_multiplier=1, padding='same')(x)
x = BatchNormalization(axis=channel_axis)(x)
x = Activation(relu6)(x)
x = Conv2D(filters, (1, 1), strides=(1, 1), padding='same')(x)
x = BatchNormalization(axis=channel_axis)(x)
if r:
x = add([x, inputs])
return x
def _inverted_residual_block(inputs, filters, kernel, t, strides, n):
"""Inverted Residual Block
This function defines a sequence of 1 or more identical layers.
# Arguments
inputs: Tensor, input tensor of conv layer.
filters: Integer, the dimensionality of the output space.
kernel: An integer or tuple/list of 2 integers, specifying the
width and height of the 2D convolution window.
t: Integer, expansion factor.
t is always applied to the input size.
s: An integer or tuple/list of 2 integers,specifying the strides
of the convolution along the width and height.Can be a single
integer to specify the same value for all spatial dimensions.
n: Integer, layer repeat times.
# Returns
Output tensor.
"""
x = _bottleneck(inputs, filters, kernel, t, strides)
for i in range(1, n):
x = _bottleneck(x, filters, kernel, t, 1, True)
return x
def MobileNetv2(input_shape, k):
"""MobileNetv2
This function defines a MobileNetv2 architectures.
# Arguments
input_shape: An integer or tuple/list of 3 integers, shape
of input tensor.
k: Integer, layer repeat times.
# Returns
MobileNetv2 model.
"""
inputs = Input(shape=input_shape)
x = _conv_block(inputs, 32, (3, 3), strides=(2, 2))
x = _inverted_residual_block(x, 16, (3, 3), t=1, strides=1, n=1)
x = _inverted_residual_block(x, 24, (3, 3), t=6, strides=2, n=2)
x = _inverted_residual_block(x, 32, (3, 3), t=6, strides=2, n=3)
x = _inverted_residual_block(x, 64, (3, 3), t=6, strides=2, n=4)
x = _inverted_residual_block(x, 96, (3, 3), t=6, strides=1, n=3)
x = _inverted_residual_block(x, 160, (3, 3), t=6, strides=2, n=3)
x = _inverted_residual_block(x, 320, (3, 3), t=6, strides=1, n=1)
x = _conv_block(x, 1280, (1, 1), strides=(1, 1))
x = GlobalAveragePooling2D()(x)
x = Reshape((1, 1, 1280))(x)
x = Dropout(0.3, name='Dropout')(x)
x = Conv2D(k, (1, 1), padding='same')(x)
x = Activation('softmax', name='softmax')(x)
output = Reshape((k,))(x)
model = Model(inputs, output)
plot_model(model, to_file='images/MobileNetv2.png', show_shapes=True)
return model
if __name__ == '__main__':
MobileNetv2((224, 224, 3), 1000) #推荐输入图片大小
'''
这个网络的优点如下:
1.减轻了梯度弥散的问题,使模型不容易过拟合
2.增强了特征在各个层之间的流动,因为每一层都与初始输入层还有最后的由loss function得到的梯度直接相连
3.大大减少了参数个数,提高训练效率
'''
# -*- coding: utf-8 -*-
"""DenseNet models for Keras.
基于keras的DenseNet模型
# Reference paper
论文
- [Densely Connected Convolutional Networks]
密集连接卷积网络
(https://arxiv.org/abs/1608.06993) (CVPR 2017 Best Paper Award)
CVPR是IEEE Conference on Computer Vision and Pattern Recognition的缩写,即IEEE国际计算机视觉与模式识别会议。
# Reference implementation
参考实现
- [Torch DenseNets]
(https://github.com/liuzhuang13/DenseNet/blob/master/models/densenet.lua)
- [TensorNets]
(https://github.com/taehoonlee/tensornets/blob/master/tensornets/densenets.py)
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
from .. import backend as K
from ..models import Model
from ..layers import Activation
from ..layers import AveragePooling2D
from ..layers import BatchNormalization
from ..layers import Concatenate
from ..layers import Conv2D
from ..layers import Dense
from ..layers import GlobalAveragePooling2D
from ..layers import GlobalMaxPooling2D
from ..layers import Input
from ..layers import MaxPooling2D
from ..layers import ZeroPadding2D
from ..utils.data_utils import get_file
from ..engine.topology import get_source_inputs
from . import imagenet_utils
from .imagenet_utils import decode_predictions
from .imagenet_utils import _obtain_input_shape
# 权重下载地址
DENSENET121_WEIGHT_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/densenet121_weights_tf_dim_ordering_tf_kernels.h5'
DENSENET121_WEIGHT_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5'
DENSENET169_WEIGHT_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/densenet169_weights_tf_dim_ordering_tf_kernels.h5'
DENSENET169_WEIGHT_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5'
DENSENET201_WEIGHT_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/densenet201_weights_tf_dim_ordering_tf_kernels.h5'
DENSENET201_WEIGHT_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5'
def dense_block(x, blocks, name):
"""A dense block.
密集的模块
# Arguments
参数
x: input tensor.
x: 输入参数
blocks: integer, the number of building blocks.
blocks: 整型,生成块的个数。
name: string, block label.
name: 字符串,块的标签
# Returns
返回
output tensor for the block.
为块输出张量
"""
for i in range(blocks):
x = conv_block(x, 32, name=name + '_block' + str(i + 1))
return x
def transition_block(x, reduction, name):
"""A transition block.
转换块
# Arguments
参数
x: input tensor.
x: 输入参数
reduction: float, compression rate at transition layers.
reduction: 浮点数,转换层的压缩率
name: string, block label.
name: 字符串,块标签
# Returns
返回
output tensor for the block.
块输出张量
"""
bn_axis = 3 if K.image_data_format() == 'channels_last' else 1
x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name=name + '_bn')(x)
x = Activation('relu', name=name + '_relu')(x)
x = Conv2D(int(K.int_shape(x)[bn_axis] * reduction), 1, use_bias=False,
name=name + '_conv')(x)
x = AveragePooling2D(2, strides=2, name=name + '_pool')(x)
return x
def conv_block(x, growth_rate, name):
"""A building block for a dense block.
密集块正在建立的块
# Arguments
参数
x: input tensor.
x: 输入张量
growth_rate: float, growth rate at dense layers.
growth_rate:浮点数,密集层的增长率。
name: string, block label.
name: 字符串,块标签
# Returns
返回
output tensor for the block.
块输出张量
"""
bn_axis = 3 if K.image_data_format() == 'channels_last' else 1
x1 = BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name=name + '_0_bn')(x)
x1 = Activation('relu', name=name + '_0_relu')(x1)
x1 = Conv2D(4 * growth_rate, 1, use_bias=False,
name=name + '_1_conv')(x1)
x1 = BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name=name + '_1_bn')(x1)
x1 = Activation('relu', name=name + '_1_relu')(x1)
x1 = Conv2D(growth_rate, 3, padding='same', use_bias=False,
name=name + '_2_conv')(x1)
x = Concatenate(axis=bn_axis, name=name + '_concat')([x, x1])
return x
def DenseNet(blocks,
include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000):
"""Instantiates the DenseNet architecture.
实例化DenseNet结构
Optionally loads weights pre-trained
on ImageNet. Note that when using TensorFlow,
for best performance you should set
`image_data_format='channels_last'` in your Keras config
at ~/.keras/keras.json.
可选择加载预训练的ImageNet权重。注意,如果是Tensorflow,最好在Keras配置中设置`image_data_format='channels_last'
The model and the weights are compatible with
TensorFlow, Theano, and CNTK. The data format
convention used by the model is the one
specified in your Keras config file.
模型和权重兼容TensorFlow, Theano, and CNTK.模型使用的数据格式约定是Keras配置文件中指定的一种格式。
# Arguments
参数
blocks: numbers of building blocks for the four dense layers.
blocks: (构建)4个密集层需要块数量
include_top: whether to include the fully-connected
layer at the top of the network.
include_top: 在网络的顶层(一般指最后一层)师傅包含全连接层
weights: one of `None` (random initialization),
'imagenet' (pre-training on ImageNet),
or the path to the weights file to be loaded.
以下的一个:`None` (随机初始化),'imagenet' (ImageNet预训练),或者下载权重文件的路径。
input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
to use as image input for the model.
input_tensor: 可选的Keras张量(即,`layers.Input()`的输出),用作模型的图像输入。
input_shape: optional shape tuple, only to be specified
if `include_top` is False (otherwise the input shape
has to be `(224, 224, 3)` (with `channels_last` data format)
or `(3, 224, 224)` (with `channels_first` data format).
It should have exactly 3 inputs channels.
input_shape: 可选的形状元组,只有`include_top`是False(否则,输入形状必须
是“(224, 224, 3)”(带有`channels_first` 数据格式。))时需要确认,它应该有3个输入通道。
pooling: optional pooling mode for feature extraction
when `include_top` is `False`.
可选,当 `include_top`是FALSE,特征提取的池化模式。
- `None` means that the output of the model will be
the 4D tensor output of the
last convolutional layer.
`None` 表示,模型输出层是4维张量,从上一个的卷积层输出。
- `avg` means that global average pooling
will be applied to the output of the
last convolutional layer, and thus
the output of the model will be a 2D tensor.
`avg`表示全局平均池化被应用到上一个的卷积层输出,所以模型输出是2维张量。
- `max` means that global max pooling will
be applied.
`max` 表示全局最大池化被应用
classes: optional number of classes to classify images
into, only to be specified if `include_top` is True, and
if no `weights` argument is specified.
classes: 可选的类数分类的图像,只有指定,如果'include_top'是真的,如果没有'weights'参数被指定。
# Returns
返回
A Keras model instance.
一个Keras模型实例
# Raises
补充
ValueError: in case of invalid argument for `weights`,
or invalid input shape.
ValueError: weights`无效的参数,或者无效的输入形状
"""
if not (weights in {'imagenet', None} or os.path.exists(weights)):
raise ValueError('The `weights` argument should be either '
'`None` (random initialization), `imagenet` '
'(pre-training on ImageNet), '
'or the path to the weights file to be loaded.')
if weights == 'imagenet' and include_top and classes != 1000:
raise ValueError('If using `weights` as imagenet with `include_top`'
' as true, `classes` should be 1000')
# Determine proper input shape
input_shape = _obtain_input_shape(input_shape,
default_size=224,
min_size=221,
data_format=K.image_data_format(),
require_flatten=include_top,
weights=weights)
if input_tensor is None:
img_input = Input(shape=input_shape)
else:
if not K.is_keras_tensor(input_tensor):
img_input = Input(tensor=input_tensor, shape=input_shape)
else:
img_input = input_tensor
bn_axis = 3 if K.image_data_format() == 'channels_last' else 1
x = ZeroPadding2D(padding=((3, 3), (3, 3)))(img_input)
x = Conv2D(64, 7, strides=2, use_bias=False, name='conv1/conv')(x)
x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name='conv1/bn')(x)
x = Activation('relu', name='conv1/relu')(x)
x = ZeroPadding2D(padding=((1, 1), (1, 1)))(x)
x = MaxPooling2D(3, strides=2, name='pool1')(x)
x = dense_block(x, blocks[0], name='conv2')
x = transition_block(x, 0.5, name='pool2')
x = dense_block(x, blocks[1], name='conv3')
x = transition_block(x, 0.5, name='pool3')
x = dense_block(x, blocks[2], name='conv4')
x = transition_block(x, 0.5, name='pool4')
x = dense_block(x, blocks[3], name='conv5')
x = BatchNormalization(axis=bn_axis, epsilon=1.001e-5,
name='bn')(x)
if include_top:
x = GlobalAveragePooling2D(name='avg_pool')(x)
x = Dense(classes, activation='softmax', name='fc1000')(x)
else:
if pooling == 'avg':
x = GlobalAveragePooling2D(name='avg_pool')(x)
elif pooling == 'max':
x = GlobalMaxPooling2D(name='max_pool')(x)
# Ensure that the model takes into account
# any potential predecessors of `input_tensor`.
# 确保模型考虑到任何潜在的前缀“input_tensor”。
if input_tensor is not None:
inputs = get_source_inputs(input_tensor)
else:
inputs = img_input
# Create model.
# 建立模型
if blocks == [6, 12, 24, 16]:
model = Model(inputs, x, name='densenet121')
elif blocks == [6, 12, 32, 32]:
model = Model(inputs, x, name='densenet169')
elif blocks == [6, 12, 48, 32]:
model = Model(inputs, x, name='densenet201')
else:
model = Model(inputs, x, name='densenet')
# Load weights.
# 加载权重
if weights == 'imagenet':
if include_top:
if blocks == [6, 12, 24, 16]:
weights_path = get_file(
'densenet121_weights_tf_dim_ordering_tf_kernels.h5',
DENSENET121_WEIGHT_PATH,
cache_subdir='models',
file_hash='0962ca643bae20f9b6771cb844dca3b0')
elif blocks == [6, 12, 32, 32]:
weights_path = get_file(
'densenet169_weights_tf_dim_ordering_tf_kernels.h5',
DENSENET169_WEIGHT_PATH,
cache_subdir='models',
file_hash='bcf9965cf5064a5f9eb6d7dc69386f43')
elif blocks == [6, 12, 48, 32]:
weights_path = get_file(
'densenet201_weights_tf_dim_ordering_tf_kernels.h5',
DENSENET201_WEIGHT_PATH,
cache_subdir='models',
file_hash='7bb75edd58cb43163be7e0005fbe95ef')
else:
if blocks == [6, 12, 24, 16]:
weights_path = get_file(
'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5',
DENSENET121_WEIGHT_PATH_NO_TOP,
cache_subdir='models',
file_hash='4912a53fbd2a69346e7f2c0b5ec8c6d3')
elif blocks == [6, 12, 32, 32]:
weights_path = get_file(
'densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5',
DENSENET169_WEIGHT_PATH_NO_TOP,
cache_subdir='models',
file_hash='50662582284e4cf834ce40ab4dfa58c6')
elif blocks == [6, 12, 48, 32]:
weights_path = get_file(
'densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5',
DENSENET201_WEIGHT_PATH_NO_TOP,
cache_subdir='models',
file_hash='1c2de60ee40562448dbac34a0737e798')
model.load_weights(weights_path)
elif weights is not None:
model.load_weights(weights)
return model
def DenseNet121(include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000):
return DenseNet([6, 12, 24, 16],
include_top, weights,
input_tensor, input_shape,
pooling, classes)
def DenseNet169(include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000):
return DenseNet([6, 12, 32, 32],
include_top, weights,
input_tensor, input_shape,
pooling, classes)
def DenseNet201(include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000):
return DenseNet([6, 12, 48, 32],
include_top, weights,
input_tensor, input_shape,
pooling, classes)
def preprocess_input(x, data_format=None):
"""Preprocesses a numpy array encoding a batch of images.
预处理:对一批图像进行编码numpy数组。
# Arguments
参数
x: a 3D or 4D numpy array consists of RGB values within [0, 255].
x: 3维或4维的numpy数组,组成了基于[0, 255]之间的RGB值(例如:(255,255,255))
data_format: data format of the image tensor.
data_format: 图像张量的数据格式。
# Returns
返回
Preprocessed array.
预处理后的数组。
"""
return imagenet_utils.preprocess_input(x, data_format, mode='torch')
setattr(DenseNet121, '__doc__', DenseNet.__doc__)
setattr(DenseNet169, '__doc__', DenseNet.__doc__)
setattr(DenseNet201, '__doc__', DenseNet.__doc__)
参考文献:https://blog.csdn.net/maxiao1204/article/details/65653781
https://blog.csdn.net/wmy199216/article/details/71171401#
https://www.jianshu.com/p/1cf3b543afff?utm_source=oschina-app
https://blog.csdn.net/wyx100/article/details/81510902