Inception-V3模型是谷歌在大型图像数据库ImageNet 上训练好了一个图像分类模型,这个模型可以对1000种类别的图片进行图像分类。 Inception V3优化了Inception Module的结构,现在Inception Module有35´35、17´17和8´8三种不同结构,如图所示。这些Inception Module只在网络的后部出现,前部还是普通的卷积层。并且Inception V3除了在Inception Module中使用分支,还在分支中使用了分支(8´8的结构中),可以说是Network In Network In Network。
(1) 继续改进Inception结构:nxn卷积核并非最小,可采用一组:1xn和nx1卷积核替换。3x3换成3x1和1x3,且参数量能再降低33%。
(2)RMSProp优化器
(3)使用了LabelSmoothing
LabelSmoothing:假设我们的分类只有两个,一个是猫一个不是猫,分别用1和0表示。Label Smoothing的工作原理是对原来的[0 1]这种标注做一个改动,假设我们给定Label Smoothing的值为0.1:
[ 0 , 1 ] × ( 1 − 0.1 ) + 0.1 / 2 = [ 0.05 , 0.95 ] [0,1]\times(1-0.1)+0.1/2=[0.05,0.95] [0,1]×(1−0.1)+0.1/2=[0.05,0.95]
可以看到,原来的[0,1]编码变成了[0.05,0.95]了。这个label_smoothing的值假设为ϵ,那么就是说,原来分类准确的时候,p=1,不准确为p=0,现在变成了p=1−ϵ和ϵ,也就是说对分类准确做了一点惩罚。Label Smoothing在很多问题上对模型都有一定的提升。
(4)辅助分类器使用了BatchNorm
class InceptionV3:
def __init__(self, structShow=False):
self.structShow = structShow
self.image = data(shape=[Img_chs, Img_size, Img_size], dtype='float32', name='image')
self.label = data(shape=[Label_size], dtype='int64', name='label')
self.predict = self.get_Net()
def InceptionV3_ModelA(self, input, model_size, downsample=False):
input_chs, con1_chs, con31_chs, con3_chs, con51_chs, con5_chs, pool1_chs = model_size
stride = 2 if downsample else 1
padding = 'VALID' if downsample else 'SAME'
if downsample == False:
conv1 = conv2d(input, con1_chs, filter_size=1, padding='SAME', act='relu')
conv1 = batch_norm(conv1)
conv31 = conv2d(input, con31_chs, filter_size=1, padding='SAME', act='relu')
conv31 = batch_norm(conv31)
conv3 = conv2d(conv31, con3_chs, filter_size=3, stride = stride, padding=padding, act='relu')
conv3 = batch_norm(conv3)
conv51 = conv2d(input, con51_chs, filter_size=1, padding='SAME', act='relu')
conv51 = batch_norm(conv51)
conv5 = conv2d(conv51, con5_chs, filter_size=3, padding='SAME', act='relu')
conv5 = batch_norm(conv5)
conv5 = conv2d(conv5, con5_chs, filter_size=3, stride = stride, padding=padding, act='relu')
conv5 = batch_norm(conv5)
pool1 = pool2d(input, pool_size=3, pool_stride=stride, pool_padding=padding, pool_type='max')
convp = conv2d(pool1, pool1_chs, filter_size=1, padding='SAME', act='relu')
convp = batch_norm(convp)
if downsample:
return concat([conv3, conv5, convp], axis=1)
return concat([conv1, conv3, conv5, convp], axis=1)
def InceptionV3_ModelB(self, input, model_size, downsample=False):
input_chs, con1_chs, con31_chs, con3_chs, con51_chs, con5_chs, pool1_chs = model_size
stride = 2 if downsample else 1
padding = 'VALID' if downsample else 'SAME'
pool1 = pool2d(input, pool_size=3, pool_stride=stride, pool_padding=padding, pool_type='max')
convp = conv2d(pool1, pool1_chs, filter_size=1, padding='SAME', act='relu')
convp = batch_norm(convp)
if downsample:
conv31 = conv2d(input, con31_chs, filter_size=1, padding='SAME', act='relu')
conv31 = batch_norm(conv31)
conv3 = conv2d(conv31, con3_chs, filter_size=3, stride=stride, padding=padding, act='relu')
conv3 = batch_norm(conv3)
conv51 = conv2d(input, con51_chs, filter_size=1, padding='SAME', act='relu')
conv51 = batch_norm(conv51)
conv5 = conv2d(conv51, con5_chs, filter_size=(1, 7), padding='SAME', act='relu')
conv5 = batch_norm(conv5)
conv5 = conv2d(conv5, con5_chs, filter_size=(7, 1), padding='SAME', act='relu')
conv5 = batch_norm(conv5)
conv5 = conv2d(conv5, con5_chs, filter_size=3, stride=stride, padding=padding, act='relu')
conv5 = batch_norm(conv5)
else:
conv1 = conv2d(input, con1_chs, filter_size=1, padding='SAME', act='relu')
conv1 = batch_norm(conv1)
conv31 = conv2d(input, con31_chs, filter_size=1, padding='SAME', act='relu')
conv31 = batch_norm(conv31)
conv3 = conv2d(conv31, con3_chs, filter_size=(1, 7), stride=stride, padding=padding, act='relu')
conv3 = batch_norm(conv3)
conv3 = conv2d(conv3, con3_chs, filter_size=(7, 1), stride=stride, padding=padding, act='relu')
conv3 = batch_norm(conv3)
conv51 = conv2d(input, con51_chs, filter_size=1, padding='SAME', act='relu')
conv51 = batch_norm(conv51)
conv5 = conv2d(conv51, con5_chs, filter_size=(1, 7), padding='SAME', act='relu')
conv5 = batch_norm(conv5)
conv5 = conv2d(conv5, con5_chs, filter_size=(7, 1), padding='SAME', act='relu')
conv5 = batch_norm(conv5)
conv5 = conv2d(conv5, con5_chs, filter_size=(1, 7), padding='SAME', act='relu')
conv5 = batch_norm(conv5)
conv5 = conv2d(conv5, con5_chs, filter_size=(7, 1), padding='SAME', act='relu')
conv5 = batch_norm(conv5)
if downsample:
return concat([conv3, conv5, convp], axis=1)
return concat([conv1, conv3, conv5, convp], axis=1)
def InceptionV3_ModelC(self, input, model_size):
input_chs, con1_chs, con31_chs, con3_chs, con51_chs, con5_chs, pool1_chs = model_size
pool1 = pool2d(input, pool_size=3, pool_stride=1, pool_padding='SAME', pool_type='max')
convp = conv2d(pool1, pool1_chs, filter_size=1, padding='SAME', act='relu')
convp = batch_norm(convp)
conv1 = conv2d(input, con1_chs, filter_size=1, padding='SAME', act='relu')
conv1 = batch_norm(conv1)
conv30 = conv2d(input, con31_chs, filter_size=1, padding='SAME', act='relu')
conv30 = batch_norm(conv30)
conv31 = conv2d(conv30, con3_chs, filter_size=(1, 3), stride=1, padding='SAME', act='relu')
conv31 = batch_norm(conv31)
conv32 = conv2d(conv30, con3_chs, filter_size=(3, 1), stride=1, padding='SAME', act='relu')
conv32 = batch_norm(conv32)
conv3 = concat([conv31,conv32],axis=1)
conv50 = conv2d(input, con51_chs, filter_size=1, padding='SAME', act='relu')
conv50 = batch_norm(conv50)
conv50 = conv2d(conv50, con51_chs, filter_size=3, padding='SAME', act='relu')
conv50 = batch_norm(conv50)
conv51 = conv2d(conv50, con5_chs, filter_size=(1, 3), padding='SAME', act='relu')
conv51 = batch_norm(conv51)
conv52 = conv2d(conv50, con5_chs, filter_size=(3, 1), padding='SAME', act='relu')
conv52 = batch_norm(conv52)
conv5 = concat([conv51, conv52], axis=1)
return concat([conv1, conv3, conv5, convp], axis=1)
def InceptionV1_Out(self, input, name=None):
pool = pool2d(input, pool_size=5, pool_stride=3, pool_type='max', pool_padding='VALID')
conv1 = conv2d(pool, 128, filter_size=1, padding='SAME', act='relu')
conv1 = batch_norm(conv1)
conv2 = conv2d(conv1, 128, filter_size=1, padding='SAME', act='relu')
conv2 = batch_norm(conv2)
flat = flatten(conv2, axis=1)
dp = dropout(flat, 0.3)
output = fc(dp, Labels_nums, act='softmax',name=name)
return output
def get_Net(self):
# region conv pool
conv1 = conv2d(self.image, Conv1_chs, filter_size=3, stride=2, padding='VALID', act='relu')
conv1 = batch_norm(conv1)
conv2 = conv2d(conv1, Conv2_chs, filter_size=3, padding='VALID', act='relu')
conv2 = batch_norm(conv2)
conv3 = conv2d(conv2, Conv3_chs, filter_size=3, padding='SAME', act='relu')
conv3 = batch_norm(conv3)
pool1 = pool2d(conv3, pool_size=3, pool_stride=2, pool_type='max', pool_padding='SAME')
conv4 = conv2d(pool1, Conv4_chs, filter_size=3, padding='VALID', act='relu')
conv4 = batch_norm(conv4)
conv5 = conv2d(conv4, Conv5_chs, filter_size=3, stride=2, padding='VALID', act='relu')
conv5 = batch_norm(conv5)
conv6 = conv2d(conv5, Conv6_chs, filter_size=3, stride=1, padding='SAME', act='relu')
conv6 = batch_norm(conv6)
# endregion
# region inception3
inception3a = self.InceptionV3_ModelA(conv6, Icp3a_size)
inception3b = self.InceptionV3_ModelA(inception3a, Icp3b_size)
inception3c = self.InceptionV3_ModelA(inception3b, Icp3c_size, downsample=True)
# endregion
# region inception3
inception5a = self.InceptionV3_ModelB(inception3c, Icp5a_size)
inception5b = self.InceptionV3_ModelB(inception5a, Icp5b_size)
inception5c = self.InceptionV3_ModelB(inception5b, Icp5c_size)
inception5d = self.InceptionV3_ModelB(inception5c, Icp5d_size)
auxout = self.InceptionV1_Out(inception5d, 'auxout')
inception5e = self.InceptionV3_ModelB(inception5d, Icp5e_size,downsample=True)
# endregion
# region inception5
inception2a = self.InceptionV3_ModelC(inception5e, Icp2a_size)
inception2b = self.InceptionV3_ModelC(inception2a, Icp2b_size)
# endregion
# region output
pool = pool2d(inception2b, pool_size=8, pool_stride=1, pool_type='max', pool_padding='VALID')
flat = flatten(pool, axis=1)
dp = dropout(flat, 0.4)
output = fc(dp, Labels_nums, act='softmax',name='output')
# endregion
if self.structShow:
print(conv1.name, conv1.shape)
print(conv2.name, conv2.shape)
print(conv3.name, conv3.shape)
print(pool1.name, pool1.shape)
print(conv4.name, conv4.shape)
print(conv5.name, conv5.shape)
print(conv6.name, conv6.shape)
print(inception3a.name, inception3a.shape)
print(inception3b.name, inception3b.shape)
print(inception3c.name, inception3c.shape)
print(inception5a.name, inception5a.shape)
print(inception5b.name, inception5b.shape)
print(inception5c.name, inception5c.shape)
print(inception5d.name, inception5d.shape)
print(inception5e.name, inception5e.shape)
print(inception2a.name, inception2a.shape)
print(inception2b.name, inception2b.shape)
print(pool.name, pool.shape)
print(flat.name, flat.shape)
print(output.name, output.shape)
# if self.trainModel == True:
# return [output, auxout]
# return output
return [output, auxout]
def ConvBNReLU(in_channels,out_channels,kernel_size,stride=1,padding=0):
return nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_size, stride=stride,padding=padding),
nn.BatchNorm2d(out_channels),
nn.ReLU6(inplace=True),
)
def ConvBNReLUFactorization(in_channels,out_channels,kernel_sizes,paddings):
return nn.Sequential(
nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=kernel_sizes, stride=1,padding=paddings),
nn.BatchNorm2d(out_channels),
nn.ReLU6(inplace=True)
)
class InceptionV3ModuleA(nn.Module):
def __init__(self, in_channels,out_channels1,out_channels2reduce, out_channels2, out_channels3reduce, out_channels3, out_channels4):
super(InceptionV3ModuleA, self).__init__()
self.branch1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels1,kernel_size=1)
self.branch2 = nn.Sequential(
ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1),
ConvBNReLU(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_size=5, padding=2),
)
self.branch3 = nn.Sequential(
ConvBNReLU(in_channels=in_channels,out_channels=out_channels3reduce,kernel_size=1),
ConvBNReLU(in_channels=out_channels3reduce, out_channels=out_channels3, kernel_size=3, padding=1),
ConvBNReLU(in_channels=out_channels3, out_channels=out_channels3, kernel_size=3, padding=1),
)
self.branch4 = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
ConvBNReLU(in_channels=in_channels, out_channels=out_channels4, kernel_size=1),
)
def forward(self, x):
out1 = self.branch1(x)
out2 = self.branch2(x)
out3 = self.branch3(x)
out4 = self.branch4(x)
out = torch.cat([out1, out2, out3, out4], dim=1)
return out
class InceptionV3ModuleB(nn.Module):
def __init__(self, in_channels,out_channels1,out_channels2reduce, out_channels2, out_channels3reduce, out_channels3, out_channels4):
super(InceptionV3ModuleB, self).__init__()
self.branch1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels1,kernel_size=1)
self.branch2 = nn.Sequential(
ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1),
ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2reduce, kernel_sizes=[1,7],paddings=[0,3]),
ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_sizes=[7,1],paddings=[3, 0]),
)
self.branch3 = nn.Sequential(
ConvBNReLU(in_channels=in_channels,out_channels=out_channels3reduce,kernel_size=1),
ConvBNReLUFactorization(in_channels=out_channels3reduce, out_channels=out_channels3reduce,kernel_sizes=[1, 7], paddings=[0, 3]),
ConvBNReLUFactorization(in_channels=out_channels3reduce, out_channels=out_channels3reduce,kernel_sizes=[7, 1], paddings=[3, 0]),
ConvBNReLUFactorization(in_channels=out_channels3reduce, out_channels=out_channels3reduce,kernel_sizes=[1, 7], paddings=[0, 3]),
ConvBNReLUFactorization(in_channels=out_channels3reduce, out_channels=out_channels3,kernel_sizes=[7, 1], paddings=[3, 0]),
)
self.branch4 = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
ConvBNReLU(in_channels=in_channels, out_channels=out_channels4, kernel_size=1),
)
def forward(self, x):
out1 = self.branch1(x)
out2 = self.branch2(x)
out3 = self.branch3(x)
out4 = self.branch4(x)
out = torch.cat([out1, out2, out3, out4], dim=1)
return out
class InceptionV3ModuleC(nn.Module):
def __init__(self, in_channels,out_channels1,out_channels2reduce, out_channels2, out_channels3reduce, out_channels3, out_channels4):
super(InceptionV3ModuleC, self).__init__()
self.branch1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels1,kernel_size=1)
self.branch2_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1)
self.branch2_conv2a = ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_sizes=[1,3],paddings=[0,1])
self.branch2_conv2b = ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_sizes=[3,1],paddings=[1, 0])
self.branch3_conv1 = ConvBNReLU(in_channels=in_channels,out_channels=out_channels3reduce,kernel_size=1)
self.branch3_conv2 = ConvBNReLU(in_channels=out_channels3reduce, out_channels=out_channels3, kernel_size=3,stride=1,padding=1)
self.branch3_conv3a = ConvBNReLUFactorization(in_channels=out_channels3, out_channels=out_channels3, kernel_sizes=[3, 1],paddings=[1, 0])
self.branch3_conv3b = ConvBNReLUFactorization(in_channels=out_channels3, out_channels=out_channels3, kernel_sizes=[1, 3],paddings=[0, 1])
self.branch4 = nn.Sequential(
nn.MaxPool2d(kernel_size=3, stride=1, padding=1),
ConvBNReLU(in_channels=in_channels, out_channels=out_channels4, kernel_size=1),
)
def forward(self, x):
out1 = self.branch1(x)
x2 = self.branch2_conv1(x)
out2 = torch.cat([self.branch2_conv2a(x2), self.branch2_conv2b(x2)],dim=1)
x3 = self.branch3_conv2(self.branch3_conv1(x))
out3 = torch.cat([self.branch3_conv3a(x3), self.branch3_conv3b(x3)], dim=1)
out4 = self.branch4(x)
out = torch.cat([out1, out2, out3, out4], dim=1)
return out
class InceptionV3ModuleD(nn.Module):
def __init__(self, in_channels,out_channels1reduce,out_channels1,out_channels2reduce, out_channels2):
super(InceptionV3ModuleD, self).__init__()
self.branch1 = nn.Sequential(
ConvBNReLU(in_channels=in_channels, out_channels=out_channels1reduce, kernel_size=1),
ConvBNReLU(in_channels=out_channels1reduce, out_channels=out_channels1, kernel_size=3,stride=2)
)
self.branch2 = nn.Sequential(
ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1),
ConvBNReLU(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_size=3, stride=1, padding=1),
ConvBNReLU(in_channels=out_channels2, out_channels=out_channels2, kernel_size=3, stride=2),
)
self.branch3 = nn.MaxPool2d(kernel_size=3,stride=2)
def forward(self, x):
out1 = self.branch1(x)
out2 = self.branch2(x)
out3 = self.branch3(x)
out = torch.cat([out1, out2, out3], dim=1)
return out
class InceptionV3ModuleE(nn.Module):
def __init__(self, in_channels, out_channels1reduce,out_channels1, out_channels2reduce, out_channels2):
super(InceptionV3ModuleE, self).__init__()
self.branch1 = nn.Sequential(
ConvBNReLU(in_channels=in_channels, out_channels=out_channels1reduce, kernel_size=1),
ConvBNReLU(in_channels=out_channels1reduce, out_channels=out_channels1, kernel_size=3, stride=2),
)
self.branch2 = nn.Sequential(
ConvBNReLU(in_channels=in_channels, out_channels=out_channels2reduce, kernel_size=1),
ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2reduce,kernel_sizes=[1, 7], paddings=[0, 3]),
ConvBNReLUFactorization(in_channels=out_channels2reduce, out_channels=out_channels2reduce,kernel_sizes=[7, 1], paddings=[3, 0]),
ConvBNReLU(in_channels=out_channels2reduce, out_channels=out_channels2, kernel_size=3, stride=2),
)
self.branch3 = nn.MaxPool2d(kernel_size=3, stride=2)
def forward(self, x):
out1 = self.branch1(x)
out2 = self.branch2(x)
out3 = self.branch3(x)
out = torch.cat([out1, out2, out3], dim=1)
return out
class InceptionAux(nn.Module):
def __init__(self, in_channels,out_channels):
super(InceptionAux, self).__init__()
self.auxiliary_avgpool = nn.AvgPool2d(kernel_size=5, stride=3)
self.auxiliary_conv1 = ConvBNReLU(in_channels=in_channels, out_channels=128, kernel_size=1)
self.auxiliary_conv2 = nn.Conv2d(in_channels=128, out_channels=768, kernel_size=5,stride=1)
self.auxiliary_dropout = nn.Dropout(p=0.7)
self.auxiliary_linear1 = nn.Linear(in_features=768, out_features=out_channels)
def forward(self, x):
x = self.auxiliary_conv1(self.auxiliary_avgpool(x))
x = self.auxiliary_conv2(x)
x = x.view(x.size(0), -1)
out = self.auxiliary_linear1(self.auxiliary_dropout(x))
return out
class InceptionV3(nn.Module):
def __init__(self, num_classes=1000, stage='train'):
super(InceptionV3, self).__init__()
self.stage = stage
self.block1 = nn.Sequential(
ConvBNReLU(in_channels=3, out_channels=32, kernel_size=3, stride=2),
ConvBNReLU(in_channels=32, out_channels=32, kernel_size=3, stride=1),
ConvBNReLU(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1),
nn.MaxPool2d(kernel_size=3, stride=2)
)
self.block2 = nn.Sequential(
ConvBNReLU(in_channels=64, out_channels=80, kernel_size=3, stride=1),
ConvBNReLU(in_channels=80, out_channels=192, kernel_size=3, stride=1, padding=1),
nn.MaxPool2d(kernel_size=3, stride=2)
)
self.block3 = nn.Sequential(
InceptionV3ModuleA(in_channels=192, out_channels1=64,out_channels2reduce=48, out_channels2=64, out_channels3reduce=64, out_channels3=96, out_channels4=32),
InceptionV3ModuleA(in_channels=256, out_channels1=64,out_channels2reduce=48, out_channels2=64, out_channels3reduce=64, out_channels3=96, out_channels4=64),
InceptionV3ModuleA(in_channels=288, out_channels1=64,out_channels2reduce=48, out_channels2=64, out_channels3reduce=64, out_channels3=96, out_channels4=64)
)
self.block4 = nn.Sequential(
InceptionV3ModuleD(in_channels=288, out_channels1reduce=384,out_channels1=384,out_channels2reduce=64, out_channels2=96),
InceptionV3ModuleB(in_channels=768, out_channels1=192, out_channels2reduce=128, out_channels2=192, out_channels3reduce=128,out_channels3=192, out_channels4=192),
InceptionV3ModuleB(in_channels=768, out_channels1=192, out_channels2reduce=160, out_channels2=192,out_channels3reduce=160, out_channels3=192, out_channels4=192),
InceptionV3ModuleB(in_channels=768, out_channels1=192, out_channels2reduce=160, out_channels2=192,out_channels3reduce=160, out_channels3=192, out_channels4=192),
InceptionV3ModuleB(in_channels=768, out_channels1=192, out_channels2reduce=192, out_channels2=192,out_channels3reduce=192, out_channels3=192, out_channels4=192),
)
if self.stage=='train':
self.aux_logits = InceptionAux(in_channels=768,out_channels=num_classes)
self.block5 = nn.Sequential(
InceptionV3ModuleE(in_channels=768, out_channels1reduce=192,out_channels1=320, out_channels2reduce=192, out_channels2=192),
InceptionV3ModuleC(in_channels=1280, out_channels1=320, out_channels2reduce=384, out_channels2=384, out_channels3reduce=448,out_channels3=384, out_channels4=192),
InceptionV3ModuleC(in_channels=2048, out_channels1=320, out_channels2reduce=384, out_channels2=384,out_channels3reduce=448, out_channels3=384, out_channels4=192),
)
self.max_pool = nn.MaxPool2d(kernel_size=8,stride=1)
self.dropout = nn.Dropout(p=0.5)
self.linear = nn.Linear(2048, num_classes)
def forward(self, x):
x = self.block1(x)
x = self.block2(x)
x = self.block3(x)
aux = x = self.block4(x)
x = self.block5(x)
x = self.max_pool(x)
x = self.dropout(x)
x = x.view(x.size(0),-1)
out = self.linear(x)
if self.stage == 'train':
aux = self.aux_logits(aux)
return aux,out
else:
return out