GoogLeNet中的基础卷积块叫作Inception块,得名于同名电影《盗梦空间》(Inception)其结构如下图所示。
由上图可以看出,Inception块里有4条并行的线路。前3条线路使用窗口大小分别用 1 × 1 1\times 1 1×1、 3 × 3 3\times 3 3×3 和 5 × 5 5\times 5 5×5 的卷积层来抽取不同空间尺寸下的信息,其中中间2个线路会对输入先做 1 × 1 1\times 1 1×1 卷积来减少输入通道数,以降低模型复杂度。第四条线路则使用 3 × 3 3\times 3 3×3 最大池化层,后接 1 × 1 1\times 1 1×1 卷积层来改变通道数。4条线路都使用了合适的填充来使输入与输出的高和宽一致。最后我们将每条线路的输出在通道维上连结,并输入接下来的层中去。
class Inception(tf.keras.Model):
def __init__(self, c1, c2, c3, c4):
super().__init__()
self.conv1 = tf.keras.layers.Conv2D(c1, kernel_size=1,
activation='relu', padding='same')
self.conv2_1 = tf.keras.layers.Conv2D(c2[0], kernel_size=1,
activation='relu', padding='same')
self.conv2_2 = tf.keras.layers.Conv2D(c2[1], kernel_size=3,
activation='relu', padding='same')
self.conv3_1 = tf.keras.layers.Conv2D(c3[0], kernel_size=1,
activation='relu', padding='same')
self.conv3_2 = tf.keras.layers.Conv2D(c3[1], kernel_size=5,
activation='relu', padding='same')
self.pool4_1 = tf.keras.layers.MaxPool2D(pool_size=3, padding='same',
strides=1)
self.conv4_2 = tf.keras.layers.Conv2D(c4, kernel_size=1,
activation='relu', padding='same')
def call(self, inputs):
x1 = self.conv1(inputs)
x2 = self.conv2_2(self.conv2_1(inputs))
x3 = self.conv3_2(self.conv3_1(inputs))
x4 = self.conv4_2(self.pool4_1(inputs))
return tf.concat((x1, x2, x3, x4), axis=-1)
class GoogLeNet(tf.keras.Model):
def __init__(self):
super().__init__()
self.conv1 = tf.keras.layers.Conv2D(filters=64, kernel_size=7, strides=2,
activation='relu', padding='same')
self.pool1 = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')
self.conv2 = tf.keras.layers.Conv2D(filters=64, kernel_size=1,
activation='relu', padding='same')
self.conv3 = tf.keras.layers.Conv2D(filters=192, kernel_size=3,
activation='relu', padding='same')
self.pool2 = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')
self.inception1 = Inception(64, (96, 128), (16, 32), 32)
self.inception2 = Inception(128, (128, 192), (32, 96), 64)
self.pool3 = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')
self.inception3 = Inception(192, (96, 208), (16, 48), 64)
self.inception4 = Inception(160, (112, 224), (24, 64), 64)
self.inception5 = Inception(128, (128, 256), (24, 64), 64)
self.inception6 = Inception(112, (144, 288), (32, 64), 64)
self.inception7 = Inception(256, (160, 320), (32, 128), 128)
self.pool4 = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')
self.inception8 = Inception(256, (160, 320), (32, 128), 128)
self.inception9 = Inception(384, (192, 384), (48, 128), 128)
self.gap = tf.keras.layers.GlobalAvgPool2D()
self.dense = tf.keras.layers.Dense(10)
def call(self, inputs):
x = self.pool1(self.conv1(inputs))
x = self.pool2(self.conv3(self.conv2(x)))
x = self.pool3(self.inception2(self.inception1(x)))
x = self.pool4(self.inception7(self.inception6(self.inception5(self.inception4(self.inception3(x))))))
x = self.dense(self.gap(self.inception9(self.inception8(x))))
return x
net = GoogLeNet()
X = tf.random.uniform(shape=(1, 96, 96, 1))
for layer in net.layers:
X = layer(X)
print(layer.name, 'output shape:\t', X.shape)
conv2d output shape: (1, 48, 48, 64)
max_pooling2d output shape: (1, 24, 24, 64)
conv2d_1 output shape: (1, 24, 24, 64)
conv2d_2 output shape: (1, 24, 24, 192)
max_pooling2d_1 output shape: (1, 12, 12, 192)
inception output shape: (1, 12, 12, 256)
inception_1 output shape: (1, 12, 12, 480)
max_pooling2d_4 output shape: (1, 6, 6, 480)
inception_2 output shape: (1, 6, 6, 512)
inception_3 output shape: (1, 6, 6, 512)
inception_4 output shape: (1, 6, 6, 512)
inception_5 output shape: (1, 6, 6, 528)
inception_6 output shape: (1, 6, 6, 832)
max_pooling2d_10 output shape: (1, 3, 3, 832)
inception_7 output shape: (1, 3, 3, 832)
inception_8 output shape: (1, 3, 3, 1024)
global_average_pooling2d output shape: (1, 1024)
dense output shape: (1, 10)
def Inception(c1, c2, c3, c4, input_):
inputs = tf.keras.layers.Input(shape=input_.shape[1:])
x1 = tf.keras.layers.Conv2D(c1, kernel_size=1,
activation='relu', padding='same')(inputs)
x2_1 = tf.keras.layers.Conv2D(c2[0], kernel_size=1,
activation='relu', padding='same')(inputs)
x2_2 = tf.keras.layers.Conv2D(c2[1], kernel_size=3,
activation='relu', padding='same')(x2_1)
x3_1 = tf.keras.layers.Conv2D(c3[0], kernel_size=1,
activation='relu', padding='same')(inputs)
x3_2 = tf.keras.layers.Conv2D(c3[1], kernel_size=5,
activation='relu', padding='same')(x3_1)
x4_1 = tf.keras.layers.MaxPool2D(pool_size=3, padding='same',
strides=1)(inputs)
x4_2 = tf.keras.layers.Conv2D(c4, kernel_size=1,
activation='relu', padding='same')(x4_1)
outputs = tf.concat((x1, x2_2, x3_2, x4_2), axis=-1)
return tf.keras.Model(inputs=inputs, outputs=outputs)
inputs的形状不包括第一个(即batch_size)。
构建Inception函数后,可以绘制Inception结构。
X = tf.random.uniform((1, 12, 12, 192))
inception = Inception(64, (96, 128), (16, 32), 32, input_=X)
tf.keras.utils.plot_model(inception, show_shapes=True, dpi=64)
用类绘制Inception结构的时候画不出这样的效果,如果有大佬知道怎么搞请告诉我一下……
def GoogLeNet(input_):
inputs = tf.keras.layers.Input(shape=input_.shape[1:])
x = tf.keras.layers.Conv2D(filters=64, kernel_size=7, strides=2,
activation='relu', padding='same')(inputs)
x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
x = tf.keras.layers.Conv2D(filters=64, kernel_size=1,
activation='relu', padding='same')(x)
x = tf.keras.layers.Conv2D(filters=192, kernel_size=3,
activation='relu', padding='same')(x)
x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
x = Inception(64, (96, 128), (16, 32), 32, x)(x)
x = Inception(128, (128, 192), (32, 96), 64, x)(x)
x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
x = Inception(192, (96, 208), (16, 48), 64, x)(x)
x = Inception(160, (112, 224), (24, 64), 64, x)(x)
x = Inception(128, (128, 256), (24, 64), 64, x)(x)
x = Inception(112, (144, 288), (32, 64), 64, x)(x)
x = Inception(256, (160, 320), (32, 128), 128, x)(x)
x = tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same')(x)
x = Inception(256, (160, 320), (32, 128), 128, x)(x)
x = Inception(384, (192, 384), (48, 128), 128, x)(x)
x = tf.keras.layers.GlobalAvgPool2D()(x)
outputs = tf.keras.layers.Dense(10)(x)
return tf.keras.Model(inputs=inputs, outputs=outputs)
当然,因为大多数时候我们使用 GoogLeNet 来分类图片,所以输入数据的通道数为3(灰度图为1),所以 GoogLeNet 模型也可以定义为:
def GoogLeNet():
inputs = tf.keras.layers.Input(shape=(None, None, 3))
model = [
tf.keras.layers.Conv2D(filters=64, kernel_size=7, strides=2,
activation='relu', padding='same'),
tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'),
tf.keras.layers.Conv2D(filters=64, kernel_size=1,
activation='relu', padding='same'),
tf.keras.layers.Conv2D(filters=192, kernel_size=3,
activation='relu', padding='same'),
tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'),
Inception(64, (96, 128), (16, 32), 32),
Inception(128, (128, 192), (32, 96), 64),
tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'),
Inception(192, (96, 208), (16, 48), 64),
Inception(160, (112, 224), (24, 64), 64),
Inception(128, (128, 256), (24, 64), 64),
Inception(112, (144, 288), (32, 64), 64),
Inception(256, (160, 320), (32, 128), 128),
tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'),
Inception(256, (160, 320), (32, 128), 128),
Inception(384, (192, 384), (48, 128), 128),
tf.keras.layers.GlobalAvgPool2D()
]
x = inputs
for layer in model:
x = layer(x)
outputs = tf.keras.layers.Dense(10)(x)
return tf.keras.Model(inputs=inputs, outputs=outputs)
net = GoogLeNet(X)
X = tf.random.uniform(shape=(1, 96, 96, 1))
net(X)
<tf.Tensor: id=7428, shape=(1, 10), dtype=float32, numpy=
array([[ 0.00416765, -0.00549915, 0.00206655, -0.00362523, -0.00011301,
0.00076362, 0.00382673, 0.00137803, -0.00135476, 0.00257596]],
dtype=float32)>
class Inception(tf.keras.Model):
def __init__(self, c1, c2, c3, c4):
super().__init__()
self.conv1 = tf.keras.layers.Conv2D(c1, kernel_size=1,
activation='relu', padding='same')
self.conv2_1 = tf.keras.layers.Conv2D(c2[0], kernel_size=1,
activation='relu', padding='same')
self.conv2_2 = tf.keras.layers.Conv2D(c2[1], kernel_size=3,
activation='relu', padding='same')
self.conv3_1 = tf.keras.layers.Conv2D(c3[0], kernel_size=1,
activation='relu', padding='same')
self.conv3_2 = tf.keras.layers.Conv2D(c3[1], kernel_size=5,
activation='relu', padding='same')
self.pool4_1 = tf.keras.layers.MaxPool2D(pool_size=3, padding='same',
strides=1)
self.conv4_2 = tf.keras.layers.Conv2D(c4, kernel_size=1,
activation='relu', padding='same')
def call(self, inputs):
x1 = self.conv1(inputs)
x2 = self.conv2_2(self.conv2_1(inputs))
x3 = self.conv3_2(self.conv3_1(inputs))
x4 = self.conv4_2(self.pool4_1(inputs))
return tf.concat((x1, x2, x3, x4), axis=-1)
def GoogLeNet(input_):
inputs = tf.keras.layers.Input(shape=input_.shape[1:])
model = [
tf.keras.layers.Conv2D(filters=64, kernel_size=7, strides=2,
activation='relu', padding='same'),
tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'),
tf.keras.layers.Conv2D(filters=64, kernel_size=1,
activation='relu', padding='same'),
tf.keras.layers.Conv2D(filters=192, kernel_size=3,
activation='relu', padding='same'),
tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'),
Inception(64, (96, 128), (16, 32), 32),
Inception(128, (128, 192), (32, 96), 64),
tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'),
Inception(192, (96, 208), (16, 48), 64),
Inception(160, (112, 224), (24, 64), 64),
Inception(128, (128, 256), (24, 64), 64),
Inception(112, (144, 288), (32, 64), 64),
Inception(256, (160, 320), (32, 128), 128),
tf.keras.layers.MaxPool2D(pool_size=3, strides=2, padding='same'),
Inception(256, (160, 320), (32, 128), 128),
Inception(384, (192, 384), (48, 128), 128),
tf.keras.layers.GlobalAvgPool2D()
]
x = inputs
for layer in model:
x = layer(x)
outputs = tf.keras.layers.Dense(10)(x)
return tf.keras.Model(inputs=inputs, outputs=outputs)
net = GoogLeNet(X)
X = tf.random.uniform(shape=(1, 96, 96, 1))
net(X)
<tf.Tensor: id=12274, shape=(1, 10), dtype=float32, numpy=
array([[-0.00310602, -0.00069746, -0.00513642, 0.00356312, -0.00862163,
-0.00839284, -0.00085803, -0.00360573, -0.0004233 , 0.00441669]],
dtype=float32)>