efficientdet = EfficientDet()
EfficientNetB0
1.1 EfficientNet() # 初步提取特征
1.2 fpn_features = build_wBiFPN(fpn_features, fpn_num_filters[phi], i, freeze_bn=freeze_bn) # 进一步提取特征
1.3.1 classification = [class_net.call([feature, i]) for i, feature in enumerate(fpn_features)] # 分类
1.3.2 regression = [box_net.call([feature, i]) for i, feature in enumerate(fpn_features)] # 回归
(1) Efficientdet()
input
↓
backbones()
↓
build_wBiFPN() or build_BiFPN()
↓
ClassNet() and BoxNet()
↓
regression,classification
def Efficientdet(phi, num_classes=20, num_anchors=9, freeze_bn=False):
assert phi in range(8)
fpn_num_filters = [64, 88, 112, 160, 224, 288, 384,384]
fpn_cell_repeats = [3, 4, 5, 6, 7, 7, 8, 8]
box_class_repeats = [3, 3, 3, 4, 4, 4, 5, 5]
image_sizes = [512, 640, 768, 896, 1024, 1280, 1408, 1536]
backbones = [EfficientNetB0, EfficientNetB1, EfficientNetB2,
EfficientNetB3, EfficientNetB4, EfficientNetB5, EfficientNetB6, EfficientNetB7]
input_size = image_sizes[phi]
input_shape = (input_size, input_size, 3)
image_input = layers.Input(input_shape)
features = backbones[phi](input_tensor=image_input, freeze_bn=freeze_bn)
fpn_features = features
if phi < 6:
for i in range(fpn_cell_repeats[phi]):
fpn_features = build_wBiFPN(fpn_features, fpn_num_filters[phi], i, freeze_bn=freeze_bn)
else:
for i in range(fpn_cell_repeats[phi]):
fpn_features = build_BiFPN(fpn_features, fpn_num_filters[phi], i, freeze_bn=freeze_bn)
box_net = BoxNet(fpn_num_filters[phi], box_class_repeats[phi], num_anchors=num_anchors, freeze_bn=freeze_bn,
name='box_net')
class_net = ClassNet(fpn_num_filters[phi], box_class_repeats[phi], num_classes=num_classes, num_anchors=num_anchors,
freeze_bn=freeze_bn, name='class_net')
classification = [class_net.call([feature, i]) for i, feature in enumerate(fpn_features)]
classification = layers.Concatenate(axis=1, name='classification')(classification)
regression = [box_net.call([feature, i]) for i, feature in enumerate(fpn_features)]
regression = layers.Concatenate(axis=1, name='regression')(regression)
model = models.Model(inputs=[image_input], outputs=[regression,classification], name='efficientdet')
return model
(2) EfficientNet()
input[-1,h,w,3]
↓
Conv2D
BatchNormalization
Activation
↓
mb_conv_block
↓
features[p3,p4,p5]
# c3,c4,c5 分别是输入进行3、4、5下采样的结果
# example:EfficientNetB0
def EfficientNetB0(include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000,
**kwargs):
return EfficientNet(1.0, 1.0, 224, 0.2,
model_name='efficientnet-b0',
include_top=include_top, weights=weights,
input_tensor=input_tensor, input_shape=input_shape,
pooling=pooling, classes=classes,
**kwargs)
def EfficientNet(width_coefficient,
depth_coefficient,
default_resolution,
dropout_rate=0.2,
drop_connect_rate=0.2,
depth_divisor=8,
blocks_args=DEFAULT_BLOCKS_ARGS,
model_name='efficientnet',
include_top=True,
weights='imagenet',
input_tensor=None,
input_shape=None,
pooling=None,
classes=1000,
freeze_bn=False,
**kwargs):
features = []
if input_tensor is None:
img_input = layers.Input(shape=input_shape)
else:
img_input = input_tensor
bn_axis = 3
activation = get_swish(**kwargs) # x*sigmoid(x)
# Build stem
x = img_input
x = layers.Conv2D(round_filters(32, width_coefficient, depth_divisor), 3,
strides=(2, 2),
padding='same',
use_bias=False,
kernel_initializer=CONV_KERNEL_INITIALIZER,
name='stem_conv')(x)
# x = BatchNormalization(freeze=freeze_bn, axis=bn_axis, name='stem_bn')(x)
x = layers.BatchNormalization(axis=bn_axis, name='stem_bn')(x)
x = layers.Activation(activation, name='stem_activation')(x)
# Build blocks
num_blocks_total = sum(block_args.num_repeat for block_args in blocks_args)
block_num = 0
for idx, block_args in enumerate(blocks_args):
assert block_args.num_repeat > 0
# Update block input and output filters based on depth multiplier.
block_args = block_args._replace(
input_filters=round_filters(block_args.input_filters,
width_coefficient, depth_divisor),
output_filters=round_filters(block_args.output_filters,
width_coefficient, depth_divisor),
num_repeat=round_repeats(block_args.num_repeat, depth_coefficient))
# The first block needs to take care of stride and filter size increase.
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
x = mb_conv_block(x, block_args,
activation=activation,
drop_rate=drop_rate,
prefix='block{}a_'.format(idx + 1),
freeze_bn=freeze_bn
)
block_num += 1
if block_args.num_repeat > 1:
# pylint: disable=protected-access
block_args = block_args._replace(
input_filters=block_args.output_filters, strides=[1, 1])
# pylint: enable=protected-access
for bidx in xrange(block_args.num_repeat - 1):
drop_rate = drop_connect_rate * float(block_num) / num_blocks_total
block_prefix = 'block{}{}_'.format(
idx + 1,
string.ascii_lowercase[bidx + 1]
)
x = mb_conv_block(x, block_args,
activation=activation,
drop_rate=drop_rate,
prefix=block_prefix,
freeze_bn=freeze_bn
)
block_num += 1
if idx < len(blocks_args) - 1 and blocks_args[idx + 1].strides[0] == 2:
features.append(x)
elif idx == len(blocks_args) - 1:
features.append(x)
return features
(3) build_wBiFPN()
a.构架
if id == 0:
↓
C3, C4, C5 = features
C3-->Conv2D+BatchNormalization-->P3_in
C4-->Conv2D+BatchNormalization-->P4_in_1
C4-->Conv2D+BatchNormalization-->P4_in_2
C5-->Conv2D+BatchNormalization-->P5_in_1
C5-->Conv2D+BatchNormalization-->P5_in_2
C5-->Conv2D+BatchNormalization+MaxPooling2D-->P6_in
P6_in-->MaxPooling2D-->P7_in
构建BIFPN的上采样循环
P7_in-->UpSampling2D-->P7_U
[P6_in, P7_U]-->wBiFPNAdd+Activation+SeparableConvBlock-->P6_td
P6_td-->UpSampling2D-->P6_U
[P5_in_1, P6_U]-->wBiFPNAdd+Activation+SeparableConvBlock-->P5_td
P5_td-->UpSampling2D-->P5_U
[P4_in_1, P5_U]-->wBiFPNAdd+Activation+SeparableConvBlock-->P4_td
P4_td-->UpSampling2D-->P4_U
[P3_in, P4_U]-->wBiFPNAdd+Activation+SeparableConvBlock-->P3_out
构建BIFPN的下采样循环
P3_out-->MaxPooling2D-->P3_D
P4_in_2, P4_td, P3_D-->wBiFPNAdd+Activation+SeparableConvBlock-->P4_out
P4_out-->MaxPooling2D-->P4_D
P5_in_2, P5_td, P4_D-->wBiFPNAdd+Activation+SeparableConvBlock-->P5_out
P5_out-->MaxPooling2D-->P5_D
P6_in, P6_td, P5_D-->wBiFPNAdd+Activation+SeparableConvBlock-->P6_out
P6_out-->MaxPooling2D-->P6_D
[P7_in, P6_D-->wBiFPNAdd+Activation+SeparableConvBlock-->P7_out
↓
else:
P3_in, P4_in, P5_in, P6_in, P7_in = features
↓
P7_in-->UpSampling2D-->P7_U
P6_in, P7_U-->wBiFPNAdd+Activation+SeparableConvBlock-->P6_td
P6_td-->UpSampling2D-->P6_U
P5_in, P6_U-->wBiFPNAdd+Activation+SeparableConvBlock-->P5_td
P5_td-->UpSampling2D-->P5_U
P4_in, P5_U-->wBiFPNAdd+Activation+SeparableConvBlock-->P4_td
P4_td-->UpSampling2D-->P4_U
P3_in, P4_U-->wBiFPNAdd+Activation+SeparableConvBlock-->P3_out
P3_out-->MaxPooling2D-->P3_D
P4_in, P4_td, P3_D-->wBiFPNAdd+Activation+SeparableConvBlock-->P4_out
P4_out-->MaxPooling2D-->P4_D
P5_in, P5_td, P4_D-->wBiFPNAdd+Activation+SeparableConvBlock-->P5_out
P5_out-->MaxPooling2D-->P5_D
P6_in, P6_td, P5_D-->wBiFPNAdd+Activation+SeparableConvBlock-->P6_out
P6_out-->MaxPooling2D-->P6_D
P7_in, P6_D-->wBiFPNAdd+Activation+SeparableConvBlock-->P7_out
↓
P3_out, P4_out, P5_out, P6_out, P7_out
b. 代码
def build_wBiFPN(features, num_channels, id, freeze_bn=False):
if id == 0:
_, _, C3, C4, C5 = features
# 第一次BIFPN需要 下采样 与 降通道 获得 p3_in p4_in p5_in p6_in p7_in
#-----------------------------下采样 与 降通道----------------------------#
P3_in = C3
P3_in = layers.Conv2D(num_channels, kernel_size=1, padding='same',
name=f'fpn_cells/cell_{id}/fnode3/resample_0_0_8/conv2d')(P3_in)
P3_in = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON,
name=f'fpn_cells/cell_{id}/fnode3/resample_0_0_8/bn')(P3_in)
P4_in = C4
P4_in_1 = layers.Conv2D(num_channels, kernel_size=1, padding='same',
name=f'fpn_cells/cell_{id}/fnode2/resample_0_1_7/conv2d')(P4_in)
P4_in_1 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON,
name=f'fpn_cells/cell_{id}/fnode2/resample_0_1_7/bn')(P4_in_1)
P4_in_2 = layers.Conv2D(num_channels, kernel_size=1, padding='same',
name=f'fpn_cells/cell_{id}/fnode4/resample_0_1_9/conv2d')(P4_in)
P4_in_2 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON,
name=f'fpn_cells/cell_{id}/fnode4/resample_0_1_9/bn')(P4_in_2)
P5_in = C5
P5_in_1 = layers.Conv2D(num_channels, kernel_size=1, padding='same',
name=f'fpn_cells/cell_{id}/fnode1/resample_0_2_6/conv2d')(P5_in)
P5_in_1 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON,
name=f'fpn_cells/cell_{id}/fnode1/resample_0_2_6/bn')(P5_in_1)
P5_in_2 = layers.Conv2D(num_channels, kernel_size=1, padding='same',
name=f'fpn_cells/cell_{id}/fnode5/resample_0_2_10/conv2d')(P5_in)
P5_in_2 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON,
name=f'fpn_cells/cell_{id}/fnode5/resample_0_2_10/bn')(P5_in_2)
P6_in = layers.Conv2D(num_channels, kernel_size=1, padding='same', name='resample_p6/conv2d')(C5)
P6_in = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, name='resample_p6/bn')(P6_in)
P6_in = layers.MaxPooling2D(pool_size=3, strides=2, padding='same', name='resample_p6/maxpool')(P6_in)
P7_in = layers.MaxPooling2D(pool_size=3, strides=2, padding='same', name='resample_p7/maxpool')(P6_in)
#-------------------------------------------------------------------------#
#--------------------------构建BIFPN的上下采样循环-------------------------#
P7_U = layers.UpSampling2D()(P7_in)
P6_td = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode0/add')([P6_in, P7_U])
P6_td = layers.Activation(lambda x: tf.nn.swish(x))(P6_td)
P6_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode0/op_after_combine5')(P6_td)
P6_U = layers.UpSampling2D()(P6_td)
P5_td = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode1/add')([P5_in_1, P6_U])
P5_td = layers.Activation(lambda x: tf.nn.swish(x))(P5_td)
P5_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode1/op_after_combine6')(P5_td)
P5_U = layers.UpSampling2D()(P5_td)
P4_td = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode2/add')([P4_in_1, P5_U])
P4_td = layers.Activation(lambda x: tf.nn.swish(x))(P4_td)
P4_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode2/op_after_combine7')(P4_td)
P4_U = layers.UpSampling2D()(P4_td)
P3_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode3/add')([P3_in, P4_U])
P3_out = layers.Activation(lambda x: tf.nn.swish(x))(P3_out)
P3_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode3/op_after_combine8')(P3_out)
P3_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P3_out)
P4_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode4/add')([P4_in_2, P4_td, P3_D])
P4_out = layers.Activation(lambda x: tf.nn.swish(x))(P4_out)
P4_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode4/op_after_combine9')(P4_out)
P4_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P4_out)
P5_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode5/add')([P5_in_2, P5_td, P4_D])
P5_out = layers.Activation(lambda x: tf.nn.swish(x))(P5_out)
P5_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode5/op_after_combine10')(P5_out)
P5_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P5_out)
P6_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode6/add')([P6_in, P6_td, P5_D])
P6_out = layers.Activation(lambda x: tf.nn.swish(x))(P6_out)
P6_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode6/op_after_combine11')(P6_out)
P6_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P6_out)
P7_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode7/add')([P7_in, P6_D])
P7_out = layers.Activation(lambda x: tf.nn.swish(x))(P7_out)
P7_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode7/op_after_combine12')(P7_out)
else:
P3_in, P4_in, P5_in, P6_in, P7_in = features
P7_U = layers.UpSampling2D()(P7_in)
P6_td = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode0/add')([P6_in, P7_U])
P6_td = layers.Activation(lambda x: tf.nn.swish(x))(P6_td)
P6_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode0/op_after_combine5')(P6_td)
P6_U = layers.UpSampling2D()(P6_td)
P5_td = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode1/add')([P5_in, P6_U])
P5_td = layers.Activation(lambda x: tf.nn.swish(x))(P5_td)
P5_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode1/op_after_combine6')(P5_td)
P5_U = layers.UpSampling2D()(P5_td)
P4_td = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode2/add')([P4_in, P5_U])
P4_td = layers.Activation(lambda x: tf.nn.swish(x))(P4_td)
P4_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode2/op_after_combine7')(P4_td)
P4_U = layers.UpSampling2D()(P4_td)
P3_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode3/add')([P3_in, P4_U])
P3_out = layers.Activation(lambda x: tf.nn.swish(x))(P3_out)
P3_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode3/op_after_combine8')(P3_out)
P3_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P3_out)
P4_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode4/add')([P4_in, P4_td, P3_D])
P4_out = layers.Activation(lambda x: tf.nn.swish(x))(P4_out)
P4_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode4/op_after_combine9')(P4_out)
P4_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P4_out)
P5_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode5/add')([P5_in, P5_td, P4_D])
P5_out = layers.Activation(lambda x: tf.nn.swish(x))(P5_out)
P5_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode5/op_after_combine10')(P5_out)
P5_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P5_out)
P6_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode6/add')([P6_in, P6_td, P5_D])
P6_out = layers.Activation(lambda x: tf.nn.swish(x))(P6_out)
P6_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode6/op_after_combine11')(P6_out)
P6_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P6_out)
P7_out = wBiFPNAdd(name=f'fpn_cells/cell_{id}/fnode7/add')([P7_in, P6_D])
P7_out = layers.Activation(lambda x: tf.nn.swish(x))(P7_out)
P7_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode7/op_after_combine12')(P7_out)
return [P3_out, P4_out, P5_out, P6_out, P7_out]
(4) build_BiFPN()
a. 构架
if id == 0:
↓
C3, C4, C5 = features
C3-->Conv2D+BatchNormalization-->P3_in
C4-->Conv2D+BatchNormalization-->P4_in_1
C4-->Conv2D+BatchNormalization-->P4_in_2
C5-->Conv2D+BatchNormalization-->P5_in_1
C5-->Conv2D+BatchNormalization-->P5_in_2
C5-->Conv2D+BatchNormalization+MaxPooling2D-->P6_in
P6_in-->MaxPooling2D-->P7_in
构建wBIFPN的上采样循环
P7_in-->UpSampling2D-->P7_U
[P6_in, P7_U]-->Add+Activation+SeparableConvBlock-->P6_td
P6_td-->UpSampling2D-->P6_U
[P5_in_1, P6_U]-->Add+Activation+SeparableConvBlock-->P5_td
P5_td-->UpSampling2D-->P5_U
[P4_in_1, P5_U]-->Add+Activation+SeparableConvBlock-->P4_td
P4_td-->UpSampling2D-->P4_U
[P3_in, P4_U]-->Add+Activation+SeparableConvBlock-->P3_out
构建wBIFPN的下采样循环
P3_out-->MaxPooling2D-->P3_D
P4_in_2, P4_td, P3_D-->Add+Activation+SeparableConvBlock-->P4_out
P4_out-->MaxPooling2D-->P4_D
P5_in_2, P5_td, P4_D-->Add+Activation+SeparableConvBlock-->P5_out
P5_out-->MaxPooling2D-->P5_D
P6_in, P6_td, P5_D-->Add+Activation+SeparableConvBlock-->P6_out
P6_out-->MaxPooling2D-->P6_D
[P7_in, P6_D-->Add+Activation+SeparableConvBlock-->P7_out
↓
else:
P3_in, P4_in, P5_in, P6_in, P7_in = features
↓
P7_in-->UpSampling2D-->P7_U
P6_in, P7_U-->Add+Activation+SeparableConvBlock-->P6_td
P6_td-->UpSampling2D-->P6_U
P5_in, P6_U-->Add+Activation+SeparableConvBlock-->P5_td
P5_td-->UpSampling2D-->P5_U
P4_in, P5_U-->Add+Activation+SeparableConvBlock-->P4_td
P4_td-->UpSampling2D-->P4_U
P3_in, P4_U-->Add+Activation+SeparableConvBlock-->P3_out
P3_out-->MaxPooling2D-->P3_D
P4_in, P4_td, P3_D-->Add+Activation+SeparableConvBlock-->P4_out
P4_out-->MaxPooling2D-->P4_D
P5_in, P5_td, P4_D-->Add+Activation+SeparableConvBlock-->P5_out
P5_out-->MaxPooling2D-->P5_D
P6_in, P6_td, P5_D-->Add+Activation+SeparableConvBlock-->P6_out
P6_out-->MaxPooling2D-->P6_D
P7_in, P6_D-->Add+Activation+SeparableConvBlock-->P7_out
↓
P3_out, P4_out, P5_out, P6_out, P7_out
b. 代码
def build_BiFPN(features, num_channels, id, freeze_bn=False):
if id == 0:
# 第一次BIFPN需要 下采样 与 降通道 获得 p3_in p4_in p5_in p6_in p7_in
#-----------------------------下采样 与 降通道----------------------------#
_, _, C3, C4, C5 = features
P3_in = C3
P3_in = layers.Conv2D(num_channels, kernel_size=1, padding='same',
name=f'fpn_cells/cell_{id}/fnode3/resample_0_0_8/conv2d')(P3_in)
P3_in = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON,
name=f'fpn_cells/cell_{id}/fnode3/resample_0_0_8/bn')(P3_in)
P4_in = C4
P4_in_1 = layers.Conv2D(num_channels, kernel_size=1, padding='same',
name=f'fpn_cells/cell_{id}/fnode2/resample_0_1_7/conv2d')(P4_in)
P4_in_1 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON,
name=f'fpn_cells/cell_{id}/fnode2/resample_0_1_7/bn')(P4_in_1)
P4_in_2 = layers.Conv2D(num_channels, kernel_size=1, padding='same',
name=f'fpn_cells/cell_{id}/fnode4/resample_0_1_9/conv2d')(P4_in)
P4_in_2 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON,
name=f'fpn_cells/cell_{id}/fnode4/resample_0_1_9/bn')(P4_in_2)
P5_in = C5
P5_in_1 = layers.Conv2D(num_channels, kernel_size=1, padding='same',
name=f'fpn_cells/cell_{id}/fnode1/resample_0_2_6/conv2d')(P5_in)
P5_in_1 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON,
name=f'fpn_cells/cell_{id}/fnode1/resample_0_2_6/bn')(P5_in_1)
P5_in_2 = layers.Conv2D(num_channels, kernel_size=1, padding='same',
name=f'fpn_cells/cell_{id}/fnode5/resample_0_2_10/conv2d')(P5_in)
P5_in_2 = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON,
name=f'fpn_cells/cell_{id}/fnode5/resample_0_2_10/bn')(P5_in_2)
P6_in = layers.Conv2D(num_channels, kernel_size=1, padding='same', name='resample_p6/conv2d')(C5)
P6_in = layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, name='resample_p6/bn')(P6_in)
P6_in = layers.MaxPooling2D(pool_size=3, strides=2, padding='same', name='resample_p6/maxpool')(P6_in)
P7_in = layers.MaxPooling2D(pool_size=3, strides=2, padding='same', name='resample_p7/maxpool')(P6_in)
#-------------------------------------------------------------------------#
#--------------------------构建BIFPN的上下采样循环-------------------------#
P7_U = layers.UpSampling2D()(P7_in)
P6_td = layers.Add(name=f'fpn_cells/cell_{id}/fnode0/add')([P6_in, P7_U])
P6_td = layers.Activation(lambda x: tf.nn.swish(x))(P6_td)
P6_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode0/op_after_combine5')(P6_td)
P6_U = layers.UpSampling2D()(P6_td)
P5_td = layers.Add(name=f'fpn_cells/cell_{id}/fnode1/add')([P5_in_1, P6_U])
P5_td = layers.Activation(lambda x: tf.nn.swish(x))(P5_td)
P5_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode1/op_after_combine6')(P5_td)
P5_U = layers.UpSampling2D()(P5_td)
P4_td = layers.Add(name=f'fpn_cells/cell_{id}/fnode2/add')([P4_in_1, P5_U])
P4_td = layers.Activation(lambda x: tf.nn.swish(x))(P4_td)
P4_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode2/op_after_combine7')(P4_td)
P4_U = layers.UpSampling2D()(P4_td)
P3_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode3/add')([P3_in, P4_U])
P3_out = layers.Activation(lambda x: tf.nn.swish(x))(P3_out)
P3_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode3/op_after_combine8')(P3_out)
P3_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P3_out)
P4_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode4/add')([P4_in_2, P4_td, P3_D])
P4_out = layers.Activation(lambda x: tf.nn.swish(x))(P4_out)
P4_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode4/op_after_combine9')(P4_out)
P4_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P4_out)
P5_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode5/add')([P5_in_2, P5_td, P4_D])
P5_out = layers.Activation(lambda x: tf.nn.swish(x))(P5_out)
P5_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode5/op_after_combine10')(P5_out)
P5_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P5_out)
P6_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode6/add')([P6_in, P6_td, P5_D])
P6_out = layers.Activation(lambda x: tf.nn.swish(x))(P6_out)
P6_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode6/op_after_combine11')(P6_out)
P6_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P6_out)
P7_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode7/add')([P7_in, P6_D])
P7_out = layers.Activation(lambda x: tf.nn.swish(x))(P7_out)
P7_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode7/op_after_combine12')(P7_out)
else:
P3_in, P4_in, P5_in, P6_in, P7_in = features
P7_U = layers.UpSampling2D()(P7_in)
P6_td = layers.Add(name=f'fpn_cells/cell_{id}/fnode0/add')([P6_in, P7_U])
P6_td = layers.Activation(lambda x: tf.nn.swish(x))(P6_td)
P6_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode0/op_after_combine5')(P6_td)
P6_U = layers.UpSampling2D()(P6_td)
P5_td = layers.Add(name=f'fpn_cells/cell_{id}/fnode1/add')([P5_in, P6_U])
P5_td = layers.Activation(lambda x: tf.nn.swish(x))(P5_td)
P5_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode1/op_after_combine6')(P5_td)
P5_U = layers.UpSampling2D()(P5_td)
P4_td = layers.Add(name=f'fpn_cells/cell_{id}/fnode2/add')([P4_in, P5_U])
P4_td = layers.Activation(lambda x: tf.nn.swish(x))(P4_td)
P4_td = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode2/op_after_combine7')(P4_td)
P4_U = layers.UpSampling2D()(P4_td)
P3_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode3/add')([P3_in, P4_U])
P3_out = layers.Activation(lambda x: tf.nn.swish(x))(P3_out)
P3_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode3/op_after_combine8')(P3_out)
P3_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P3_out)
P4_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode4/add')([P4_in, P4_td, P3_D])
P4_out = layers.Activation(lambda x: tf.nn.swish(x))(P4_out)
P4_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode4/op_after_combine9')(P4_out)
P4_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P4_out)
P5_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode5/add')([P5_in, P5_td, P4_D])
P5_out = layers.Activation(lambda x: tf.nn.swish(x))(P5_out)
P5_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode5/op_after_combine10')(P5_out)
P5_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P5_out)
P6_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode6/add')([P6_in, P6_td, P5_D])
P6_out = layers.Activation(lambda x: tf.nn.swish(x))(P6_out)
P6_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode6/op_after_combine11')(P6_out)
P6_D = layers.MaxPooling2D(pool_size=3, strides=2, padding='same')(P6_out)
P7_out = layers.Add(name=f'fpn_cells/cell_{id}/fnode7/add')([P7_in, P6_D])
P7_out = layers.Activation(lambda x: tf.nn.swish(x))(P7_out)
P7_out = SeparableConvBlock(num_channels=num_channels, kernel_size=3, strides=1,
name=f'fpn_cells/cell_{id}/fnode7/op_after_combine12')(P7_out)
return [P3_out, P4_out, P5_out, P6_out, P7_out]
(5) regression
a. 结构
features
↓
(convs+bns+relu)*depth
↓
SeparableConv2D
↓
activation(sigmoid)
b. 代码
class BoxNet:
def __init__(self, width, depth, num_anchors=9, freeze_bn=False, name='box_net', **kwargs):
self.name = name
self.width = width
self.depth = depth
self.num_anchors = num_anchors
options = {
'kernel_size': 3,
'strides': 1,
'padding': 'same',
'bias_initializer': 'zeros',
'depthwise_initializer': initializers.VarianceScaling(),
'pointwise_initializer': initializers.VarianceScaling(),
}
self.convs = [layers.SeparableConv2D(filters=width, name=f'{self.name}/box-{i}', **options) for i in range(depth)]
self.head = layers.SeparableConv2D(filters=num_anchors * 4, name=f'{self.name}/box-predict', **options)
self.bns = [
[layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, name=f'{self.name}/box-{i}-bn-{j}') for j in
range(3, 8)]
for i in range(depth)]
self.relu = layers.Lambda(lambda x: tf.nn.swish(x))
self.reshape = layers.Reshape((-1, 4))
def call(self, inputs):
feature, level = inputs
for i in range(self.depth):
feature = self.convs[i](feature)
feature = self.bns[i][level](feature)
feature = self.relu(feature)
outputs = self.head(feature)
outputs = self.reshape(outputs)
return outputs
(6) classification
a. 结构
features
↓
SeparableConv2D
↓
SeparableConv2D
↓
BatchNormalization
↓
swish
b. 代码
class ClassNet:
def __init__(self, width, depth, num_classes=20, num_anchors=9, freeze_bn=False, name='class_net', **kwargs):
self.name = name
self.width = width
self.depth = depth
self.num_classes = num_classes
self.num_anchors = num_anchors
options = {
'kernel_size': 3,
'strides': 1,
'padding': 'same',
'depthwise_initializer': initializers.VarianceScaling(),
'pointwise_initializer': initializers.VarianceScaling(),
}
self.convs = [layers.SeparableConv2D(filters=width, bias_initializer='zeros', name=f'{self.name}/class-{i}',
**options)
for i in range(depth)]
self.head = layers.SeparableConv2D(filters=num_classes * num_anchors,
bias_initializer=PriorProbability(probability=0.01),
name=f'{self.name}/class-predict', **options)
self.bns = [
[layers.BatchNormalization(momentum=MOMENTUM, epsilon=EPSILON, name=f'{self.name}/class-{i}-bn-{j}') for j
in range(3, 8)]
for i in range(depth)]
self.relu = layers.Lambda(lambda x: tf.nn.swish(x))
self.reshape = layers.Reshape((-1, num_classes))
self.activation = layers.Activation('sigmoid')
def call(self, inputs):
feature, level = inputs
for i in range(self.depth):
feature = self.convs[i](feature)
feature = self.bns[i][level](feature)
feature = self.relu(feature)
outputs = self.head(feature)
outputs = self.reshape(outputs)
outputs = self.activation(outputs)
return outputs
‘regression’ : smooth_l1()
‘classification’: focal()
3.2.1 smooth_l1()
f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma
f(x) = |x| - 0.5 / sigma ^2 otherwise
def smooth_l1(sigma=3.0):
sigma_squared = sigma ** 2
def _smooth_l1(y_true, y_pred):
regression = y_pred
regression_target = y_true[:, :, :-1]
anchor_state = y_true[:, :, -1]
indices = backend.where(keras.backend.equal(anchor_state, 1))
regression = backend.gather_nd(regression, indices)
regression_target = backend.gather_nd(regression_target, indices)
regression_diff = regression - regression_target
regression_diff = keras.backend.abs(regression_diff)
regression_loss = backend.where(
keras.backend.less(regression_diff, 1.0 / sigma_squared),
0.5 * sigma_squared * keras.backend.pow(regression_diff, 2),
regression_diff - 0.5 / sigma_squared
)
# compute the normalizer: the number of positive anchors
normalizer = keras.backend.maximum(1, keras.backend.shape(indices)[0])
normalizer = keras.backend.cast(normalizer, dtype=keras.backend.floatx())
return keras.backend.sum(regression_loss) / normalizer / 4
return _smooth_l1
(2) focal()
a . 结构
if anchor_state == 1:
cls_loss_for_object = a*(1-p)^gamma *log(p,p_hat)
else:
focal_weight_for_back = (1-a)*p^gamma *log(p,p_hat)
total_loss = cls_loss_for_object + focal_weight_for_back
b. 代码
def focal(alpha=0.25, gamma=2.0):
def _focal(y_true, y_pred):
# y_true [batch_size, num_anchor, num_classes+1]
# y_pred [batch_size, num_anchor, num_classes]
labels = y_true[:, :, :-1]
anchor_state = y_true[:, :, -1] # -1 是需要忽略的, 0 是背景, 1 是存在目标
classification = y_pred
# 找出存在目标的先验框
indices_for_object = backend.where(keras.backend.equal(anchor_state, 1))
labels_for_object = backend.gather_nd(labels, indices_for_object)
classification_for_object = backend.gather_nd(classification, indices_for_object)
# 计算每一个先验框应该有的权重
alpha_factor_for_object = keras.backend.ones_like(labels_for_object) * alpha
alpha_factor_for_object = backend.where(keras.backend.equal(labels_for_object, 1), alpha_factor_for_object, 1 - alpha_factor_for_object)
focal_weight_for_object = backend.where(keras.backend.equal(labels_for_object, 1), 1 - classification_for_object, classification_for_object)
focal_weight_for_object = alpha_factor_for_object * focal_weight_for_object ** gamma
# 将权重乘上所求得的交叉熵
cls_loss_for_object = focal_weight_for_object * keras.backend.binary_crossentropy(labels_for_object, classification_for_object)
# 找出实际上为背景的先验框
indices_for_back = backend.where(keras.backend.equal(anchor_state, 0))
labels_for_back = backend.gather_nd(labels, indices_for_back)
classification_for_back = backend.gather_nd(classification, indices_for_back)
# 计算每一个先验框应该有的权重
alpha_factor_for_back = keras.backend.ones_like(labels_for_back) * (1 - alpha)
focal_weight_for_back = classification_for_back
focal_weight_for_back = alpha_factor_for_back * focal_weight_for_back ** gamma
# 将权重乘上所求得的交叉熵
cls_loss_for_back = focal_weight_for_back * keras.backend.binary_crossentropy(labels_for_back, classification_for_back)
# 标准化,实际上是正样本的数量
normalizer = tf.where(keras.backend.equal(anchor_state, 1))
normalizer = keras.backend.cast(keras.backend.shape(normalizer)[0], keras.backend.floatx())
normalizer = keras.backend.maximum(keras.backend.cast_to_floatx(1.0), normalizer)
# 将所获得的loss除上正样本的数量
cls_loss_for_object = keras.backend.sum(cls_loss_for_object)
cls_loss_for_back = keras.backend.sum(cls_loss_for_back)
# 总的loss
loss = (cls_loss_for_object + cls_loss_for_back)/normalizer
return loss
return _focal
3.1 image = Image.open(img) # 打开图片
3.2 r_image = efficientdet.detect_image(image) # 检测路标
3.2.1 crop_img = letterbox_image(image, [self.model_image_size[0],self.model_image_size[1]]) # 加灰条
3.2.2 photo = np.reshape(preprocess_input(photo),[1,self.model_image_size[0],self.model_image_size[1],self.model_image_size[2]]) # 图片预处理,归一化
3.2.3 preds = self.Efficientdet.predict(photo) # 预测
3.2.4 results = self.bbox_util.detection_out(preds, self.prior, confidence_threshold=self.confidence) # 将预测结果进行解码、NMS
3.2.5 筛选出其中得分高于confidence的框
3.2.6 boxes = efficientdet_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape) # 去掉灰条
3.2.7 调整框
3.2.8 画框
3.3 r_image.show() # 显示
4.1 model = Efficientdet(phi,num_classes=NUM_CLASSES) #
4.2 priors = get_anchors(image_sizes[phi])
4.3 bbox_util = BBoxUtility(NUM_CLASSES, priors)
4.4 设置训练参数logging、reduce_lr、checkpoint、early_stopping
4.5 gen.generate(True) # 生成标签
4.5.1 每一行,打乱顺序,取出图片地址和框、类别信息
4.5.2 assignment = self.bbox_util.assign_boxes(y) # 计算真实框对应的先验框,与这个先验框应当有的预测结果
(1) ingored_boxes = np.apply_along_axis(self.ignore_box, 1, boxes[:, :4]) # 找出忽略的框
(2) encoded_boxes = np.apply_along_axis(self.encode_box, 1, boxes[:, :4]) # 找出编码的框
4.5.3 返回经过处理的框、类别、图片信息
4.6 'regression' : smooth_l1()
4.7 'classification': focal()