Deformable convolutional networks(可变形卷积)使用keras/tensorflow主要代码及注释

以下对Deformable convolutional networks(可变形卷积)keras/tensorflow主要的代码进行理解和注释

代码原地址:https://github.com/kastnerkyle/deform-conv

layers.py

from __future__ import absolute_import, division


import tensorflow as tf
from keras.layers import Conv2D
from keras.initializers import RandomNormal
from deform_conv.deform_conv import tf_batch_map_offsets


class ConvOffset2D(Conv2D): #继承2D卷积
    """ConvOffset2D"""

    def __init__(self, filters, init_normal_stddev=0.01, **kwargs):
        """Init"""

        self.filters = filters
        super(ConvOffset2D, self).__init__(
            self.filters * 2, (3, 3), padding='same', use_bias=False, #由于要计算x,y坐标的偏移量,所以需要两倍的channels
            # TODO gradients are near zero if init is zeros
            kernel_initializer='zeros',
            # kernel_initializer=RandomNormal(0, init_normal_stddev),
            **kwargs
        )

    def call(self, x):
        # TODO offsets probably have no nonlinearity?
        x_shape = x.get_shape() # 输入tensor的shape=(b,h,w,c)
        offsets = super(ConvOffset2D, self).call(x) #进行对输入卷积,2*channels,shape=(b,h,w,2c)

        offsets = self._to_bc_h_w_2(offsets, x_shape) #将offses的shape转化为(bc,h,w,2),两个通道分别表示x,y的偏移量
        x = self._to_bc_h_w(x, x_shape)#将输入shape变为(bc,h,w)
        x_offset = tf_batch_map_offsets(x, offsets) #得到片以后新坐标的所有像素值
        x_offset = self._to_b_h_w_c(x_offset, x_shape)# 变换维度
        return x_offset

    def compute_output_shape(self, input_shape):
        return input_shape

    @staticmethod
    def _to_bc_h_w_2(x, x_shape):
        """(b, h, w, 2c) -> (b*c, h, w, 2)"""
        x = tf.transpose(x, [0, 3, 1, 2])#交换维度(b,2c,h,w)
        x = tf.reshape(x, (-1, int(x_shape[1]), int(x_shape[2]), 2))#(bc,h,w,2)
        return x

    @staticmethod
    def _to_bc_h_w(x, x_shape):
        """(b, h, w, c) -> (b*c, h, w)"""
        x = tf.transpose(x, [0, 3, 1, 2])#交换维度(b,c,h,w)
        x = tf.reshape(x, (-1, int(x_shape[1]), int(x_shape[2])))#(bc,h,w)
        return x

    @staticmethod
    def _to_b_h_w_c(x, x_shape):
        """(b*c, h, w) -> (b, h, w, c)"""
        x = tf.reshape(
            x, (-1, int(x_shape[3]), int(x_shape[1]), int(x_shape[2]))
        )
        x = tf.transpose(x, [0, 2, 3, 1])
        return x

deform_conv.py 

from __future__ import absolute_import, division

import numpy as np
from scipy.ndimage.interpolation import map_coordinates as sp_map_coordinates
import tensorflow as tf


def tf_flatten(a):
    """Flatten tensor"""
    return tf.reshape(a, [-1])


def tf_repeat(a, repeats, axis=0):
    """TensorFlow version of np.repeat for 1D"""
    # https://github.com/tensorflow/tensorflow/issues/8521
    assert len(a.get_shape()) == 1

    a = tf.expand_dims(a, -1)
    a = tf.tile(a, [1, repeats])
    a = tf_flatten(a)
    return a


def tf_repeat_2d(a, repeats):
    """Tensorflow version of np.repeat for 2D"""

    assert len(a.get_shape()) == 2 # 二维
    a = tf.expand_dims(a, 0) # 在第0维之前扩一维
    a = tf.tile(a, [repeats, 1, 1]) # 在第0维重复repeats次
    return a


def tf_map_coordinates(input, coords, order=1):
    """Tensorflow verion of scipy.ndimage.map_coordinates
    Note that coords is transposed and only 2D is supported
    Parameters
    ----------
    input : tf.Tensor. shape = (s, s)
    coords : tf.Tensor. shape = (n_points, 2)
    """

    assert order == 1

    coords_lt = tf.cast(tf.floor(coords), 'int32')
    coords_rb = tf.cast(tf.ceil(coords), 'int32')
    coords_lb = tf.stack([coords_lt[:, 0], coords_rb[:, 1]], axis=1)
    coords_rt = tf.stack([coords_rb[:, 0], coords_lt[:, 1]], axis=1)

    vals_lt = tf.gather_nd(input, coords_lt)
    vals_rb = tf.gather_nd(input, coords_rb)
    vals_lb = tf.gather_nd(input, coords_lb)
    vals_rt = tf.gather_nd(input, coords_rt)

    coords_offset_lt = coords - tf.cast(coords_lt, 'float32')
    vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[:, 0]
    vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[:, 0]
    mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[:, 1]

    return mapped_vals


def sp_batch_map_coordinates(inputs, coords):
    """Reference implementation for batch_map_coordinates"""
    coords = coords.clip(0, inputs.shape[1] - 1)
    mapped_vals = np.array([
        sp_map_coordinates(input, coord.T, mode='nearest', order=1)
        for input, coord in zip(inputs, coords)
    ])
    return mapped_vals


def tf_batch_map_coordinates(input, coords, order=1):
    """Batch version of tf_map_coordinates
    Only supports 2D feature maps
    Parameters
    ----------
    input : tf.Tensor. shape = (b, s, s)
    coords : tf.Tensor. shape = (b, n_points, 2)
    """

    input_shape = tf.shape(input)
    batch_size = input_shape[0]
    input_size = input_shape[1]
    n_coords = tf.shape(coords)[1]

    coords = tf.clip_by_value(coords, 0, tf.cast(input_size, 'float32') - 1)#基于定义的min与max对tesor数据进行截断操作,目的是为了应对梯度爆发或者梯度消失的情况
    coords_lt = tf.cast(tf.floor(coords), 'int32')#双线性插值,左上角的值,所有坐标向下取整
    coords_rb = tf.cast(tf.ceil(coords), 'int32')#右下角的值,向上取整
    coords_lb = tf.stack([coords_lt[..., 0], coords_rb[..., 1]], axis=-1)#左下角的值是,x最小,y最大,按通道堆叠左上角的x,右下角的y即可
    coords_rt = tf.stack([coords_rb[..., 0], coords_lt[..., 1]], axis=-1)#同上,x最大y嘴小

    idx = tf_repeat(tf.range(batch_size), n_coords)

    def _get_vals_by_coords(input, coords):
        indices = tf.stack([
            idx, tf_flatten(coords[..., 0]), tf_flatten(coords[..., 1])
        ], axis=-1) # 根据batch,x,y建立索引
        vals = tf.gather_nd(input, indices)#取得输入对应索引位置处的值,vals为一维
        vals = tf.reshape(vals, (batch_size, n_coords))#转化成二维
        return vals

    vals_lt = _get_vals_by_coords(input, coords_lt) #获取四个角的像素值
    vals_rb = _get_vals_by_coords(input, coords_rb)
    vals_lb = _get_vals_by_coords(input, coords_lb)
    vals_rt = _get_vals_by_coords(input, coords_rt)

    coords_offset_lt = coords - tf.cast(coords_lt, 'float32')#进行双线性插值,得到目标坐标的像素值
    vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[..., 0]
    vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[..., 0]
    mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[..., 1]

    return mapped_vals #得到偏移后坐标的所有像素值


def sp_batch_map_offsets(input, offsets):
    """Reference implementation for tf_batch_map_offsets"""

    batch_size = input.shape[0]
    input_size = input.shape[1]

    offsets = offsets.reshape(batch_size, -1, 2)
    grid = np.stack(np.mgrid[:input_size, :input_size], -1).reshape(-1, 2)
    grid = np.repeat([grid], batch_size, axis=0)
    coords = offsets + grid
    coords = coords.clip(0, input_size - 1)

    mapped_vals = sp_batch_map_coordinates(input, coords)
    return mapped_vals


def tf_batch_map_offsets(input, offsets, order=1):
    """Batch map offsets into input
    Parameters
    ---------
    input : tf.Tensor. shape = (b, s, s) 
    offsets: tf.Tensor. shape = (b, s, s, 2)
    """

    input_shape = tf.shape(input) #(bc,h,w) ,h=w
    batch_size = input_shape[0] #bc
    input_size = input_shape[1]#h=w

    offsets = tf.reshape(offsets, (batch_size, -1, 2))#(bc,h*w,2)
    grid = tf.meshgrid(
        tf.range(input_size), tf.range(input_size), indexing='ij'
    )#广播,将一个以为tensor进行广播,当存在两个输入时=(a,b),
    #先将a按行广播为size(b)行,再将b按列广播为size(a)列(当index=‘xy’时,为笛卡尔坐标系,当index=‘ij’,则为矩阵坐标系,将前面顺序交换)
    #input_size=3,range(3)=[0,1,2]有
#  [[0 0 0]
#  [1 1 1]
#  [2 2 2]]
# [[0 1 2]
#  [0 1 2]
#  [0 1 2]]
    grid = tf.stack(grid, axis=-1) #将两个通道堆叠在一起,则生成一个2通道的tensor,shape=(h,h,2)/(3,3,2),每个位置上是一个坐标(ij)
    #[[[0 0]
#   [0 1]
#   [0 2]]

#  [[1 0]
#   [1 1]
#   [1 2]]

#  [[2 0]
#   [2 1]
#   [2 2]]]
    grid = tf.cast(grid, 'float32')
    grid = tf.reshape(grid, (-1, 2)) #(h*h,2) 变成二维,每个元素表示一个坐标
#     [[0. 0.]
#  [0. 1.]
#  [0. 2.]
#  [1. 0.]
#  [1. 1.]
#  [1. 2.]
#  [2. 0.]
#  [2. 1.]
#  [2. 2.]]
    grid = tf_repeat_2d(grid, batch_size) # 重复第0维,bc次,shape=(bc,h*h,2)
    coords = offsets + grid #每个通道的坐标都加上偏移量
    # 坐标变成了小数,需要
    mapped_vals = tf_batch_map_coordinates(input, coords)
    return mapped_vals

cnn.py

from __future__ import absolute_import, division


from keras.layers import Input, Conv2D, Activation, GlobalAvgPool2D, Dense, BatchNormalization
from deform_conv.layers import ConvOffset2D


def get_cnn():
    inputs = l = Input((28, 28, 1), name='input')

    # conv11
    l = Conv2D(32, (3, 3), padding='same', name='conv11')(l)
    l = Activation('relu', name='conv11_relu')(l)
    l = BatchNormalization(name='conv11_bn')(l)

    # conv12
    l = Conv2D(64, (3, 3), padding='same', strides=(2, 2), name='conv12')(l)
    l = Activation('relu', name='conv12_relu')(l)
    l = BatchNormalization(name='conv12_bn')(l)

    # conv21
    l = Conv2D(128, (3, 3), padding='same', name='conv21')(l)
    l = Activation('relu', name='conv21_relu')(l)
    l = BatchNormalization(name='conv21_bn')(l)

    # conv22
    l = Conv2D(128, (3, 3), padding='same', strides=(2, 2), name='conv22')(l)
    l = Activation('relu', name='conv22_relu')(l)
    l = BatchNormalization(name='conv22_bn')(l)

    # out
    l = GlobalAvgPool2D(name='avg_pool')(l)
    l = Dense(10, name='fc1')(l)
    outputs = l = Activation('softmax', name='out')(l)

    return inputs, outputs


def get_deform_cnn(trainable):
    inputs = l = Input((28, 28, 1), name='input')

    # conv11
    l = Conv2D(32, (3, 3), padding='same', name='conv11', trainable=trainable)(l)
    l = Activation('relu', name='conv11_relu')(l)
    l = BatchNormalization(name='conv11_bn')(l)

    # conv12
    l_offset = ConvOffset2D(32, name='conv12_offset')(l)
    l = Conv2D(64, (3, 3), padding='same', strides=(2, 2), name='conv12', trainable=trainable)(l_offset)
    l = Activation('relu', name='conv12_relu')(l)
    l = BatchNormalization(name='conv12_bn')(l)

    # conv21
    l_offset = ConvOffset2D(64, name='conv21_offset')(l)
    l = Conv2D(128, (3, 3), padding='same', name='conv21', trainable=trainable)(l_offset)
    l = Activation('relu', name='conv21_relu')(l)
    l = BatchNormalization(name='conv21_bn')(l)

    # conv22
    l_offset = ConvOffset2D(128, name='conv22_offset')(l)
    l = Conv2D(128, (3, 3), padding='same', strides=(2, 2), name='conv22', trainable=trainable)(l_offset)
    l = Activation('relu', name='conv22_relu')(l)
    l = BatchNormalization(name='conv22_bn')(l)

    # out
    l = GlobalAvgPool2D(name='avg_pool')(l)
    l = Dense(10, name='fc1', trainable=trainable)(l)
    outputs = l = Activation('softmax', name='out')(l)

    return inputs, outputs

本代码中使用tensorflow2.0的一些函数测试

import tensorflow as tf
import numpy as np

# x = [1, 2, 3]
# y = [4, 5, 6]

# X, Y = tf.meshgrid(x, y)
# print(X.numpy())
# print(Y.numpy())
# print(X.shape)
# print(Y.shape)

# x = tf.range(-2,2,1)
# y = tf.range(-2,2,1)
# z = tf.range(-2,2,1)

# X,Y,Z = tf.meshgrid(x,y,z)
# print(x.numpy())
# print(y.numpy())
# print(z.numpy())
# print(X.numpy())
# print(Y.numpy())
# print(Z.numpy())
# print(X.shape)
# print(Y.shape)
# print(Z.shape)

def tf_flatten(a):
    """Flatten tensor"""
    return tf.reshape(a, [-1])
def tf_repeat(a, repeats, axis=0):
    """TensorFlow version of np.repeat for 1D"""
    # https://github.com/tensorflow/tensorflow/issues/8521
    assert len(a.get_shape()) == 1

    a = tf.expand_dims(a, -1)
    a = tf.tile(a, [1, repeats])
    a = tf_flatten(a)
    return a
def tf_repeat_2d(a, repeats):
    """Tensorflow version of np.repeat for 2D"""

    assert len(a.get_shape()) == 2
    a = tf.expand_dims(a, 0)
    a = tf.tile(a, [repeats, 1, 1])
    return a
x = tf.range(3)
# y = tf.range(3)
# # z = tf.range(3)
# c,d=tf.meshgrid(x,y,indexing='ij')
# a = tf.meshgrid(x,y,indexing='ij')
# print(c.numpy())
# print(d.numpy())
# print(a[0].shape)
# a = tf.stack(a,-1)
# print(y.shape)
# print(a.numpy())
# print(a.shape)
# # print(b.numpy())
# a = tf.cast(a, 'float32')
# print(a.numpy())
# a = tf.reshape(a, (-1, 2))
# print(a.shape)
# print(a.numpy())
# a = tf_repeat_2d(a, 3)
# print(a.numpy())
# print(a.shape)

# offsets = np.array([[0.1 ,0.2],
# [-0.1 ,-0.2],
#  [0., 2.],
#  [1., 0.],
#  [1., 1.],
#  [1., 2.],
#  [2. ,0.],
#  [2., 1.],
#  [2. ,2.]])

# coo = offsets+a
# print(coo.numpy())
p = tf_repeat(x,3)
print(p.numpy())
q = np.array([0 ,1 ,2,0, 1, 2,0, 1 ,2])
# t = tf.range(9)
indices = tf.stack([
            p, q
        ], axis=-1)
print(indices.numpy())

k=tf.Variable([[1, 2, 3, 4, 5],
                 [6, 7, 8, 9, 10],
                 [11, 12, 13, 14, 15]])
val = tf.gather_nd(k,indices)
print(val.numpy())
val = tf.reshape(val, (1, 9))
print(val.numpy())

 

你可能感兴趣的:(深度学习)