在普通卷积的基础上加上偏移量offsets :
偏移量是通过卷积学习到的,有一个额外的conv层来学习offset,共享input feature maps。然后input feature maps和offset共同作为deformable conv层的输入,deformable conv层操作采样点发生偏移,再进行卷积。
class ConvOffset2D(Conv2D):
Convolutional layer responsible for learning the 2D offsets and output the
deformed feature map using bilinear interpolation
Note that this layer does not perform convolution on the deformed feature
map. See get_deform_cnn in cnn.py for usage
def __init__(self, filters, init_normal_stddev=0.01, **kwargs):
filters : int
Number of channel of the input feature map
init_normal_stddev : float
Normal kernel initialization
Pass to superclass. See Con2D layer in Keras
self.filters = filters
super(ConvOffset2D, self).__init__(
self.filters * 2, (3, 3), padding='same', use_bias=False,
kernel_initializer=RandomNormal(0, init_normal_stddev),
def call(self, x):
"""Return the deformed featured map"""
x_shape = x.get_shape()
offsets = super(ConvOffset2D, self).call(x)
# offsets: (b*c, h, w, 2)
offsets = self._to_bc_h_w_2(offsets, x_shape)
# x: (b*c, h, w)
x = self._to_bc_h_w(x, x_shape)
# X_offset: (b*c, h, w)
x_offset = tf_batch_map_offsets(x, offsets)
# x_offset: (b, h, w, c)
x_offset = self._to_b_h_w_c(x_offset, x_shape)
return x_offset
def compute_output_shape(self, input_shape):
"""Output shape is the same as input shape
Because this layer does only the deformation part
return input_shape
def _to_bc_h_w_2(x, x_shape):
"""(b, h, w, 2c) -> (b*c, h, w, 2)"""
x = tf.transpose(x, [0, 3, 1, 2])
x = tf.reshape(x, (-1, int(x_shape[1]), int(x_shape[2]), 2))
return x
def _to_bc_h_w(x, x_shape):
"""(b, h, w, c) -> (b*c, h, w)"""
x = tf.transpose(x, [0, 3, 1, 2])
x = tf.reshape(x, (-1, int(x_shape[1]), int(x_shape[2])))
return x
def _to_b_h_w_c(x, x_shape):
"""(b*c, h, w) -> (b, h, w, c)"""
x = tf.reshape(
x, (-1, int(x_shape[3]), int(x_shape[1]), int(x_shape[2]))
x = tf.transpose(x, [0, 2, 3, 1])
return x
def get_deform_cnn(trainable):
inputs = l = Input((28, 28, 1), name='input')
# conv11
l = Conv2D(32, (3, 3), padding='same', name='conv11', trainable=trainable)(l)
l = Activation('relu', name='conv11_relu')(l)
l = BatchNormalization(name='conv11_bn')(l)
# conv12
l_offset = ConvOffset2D(32, name='conv12_offset')(l)
l = Conv2D(64, (3, 3), padding='same', strides=(2, 2), name='conv12', trainable=trainable)(l_offset)
l = Activation('relu', name='conv12_relu')(l)
l = BatchNormalization(name='conv12_bn')(l)
# conv21
l_offset = ConvOffset2D(64, name='conv21_offset')(l)
l = Conv2D(128, (3, 3), padding='same', name='conv21', trainable=trainable)(l_offset)
l = Activation('relu', name='conv21_relu')(l)
l = BatchNormalization(name='conv21_bn')(l)
# conv22
l_offset = ConvOffset2D(128, name='conv22_offset')(l)
l = Conv2D(128, (3, 3), padding='same', strides=(2, 2), name='conv22', trainable=trainable)(l_offset)
l = Activation('relu', name='conv22_relu')(l)
l = BatchNormalization(name='conv22_bn')(l)
# out
l = GlobalAvgPool2D(name='avg_pool')(l)
l = Dense(10, name='fc1', trainable=trainable)(l)
outputs = l = Activation('softmax', name='out')(l)
return inputs, outputs