KPConv核心代码阅读

论文:https://arxiv.org/abs/1904.08889
代码:https://github.com/HuguesTHOMAS/KPConv

本文主要研究 KPConv 的结构,我们从training_ModelNet40.py这个文件开始找。
KPConv/training_ModelNet40.py

	# 网络架构
    architecture = ['simple',
                    'resnetb',
                    'resnetb_strided',
                    'resnetb',
                    'resnetb_strided',
                    'resnetb',
                    'resnetb_strided',
                    'resnetb_deformable',
                    'resnetb_deformable_strided',
                    'resnetb_deformable',
                    'global_average']

使用Rigid Kernel

architecture 中的 resnetb 就是 network_blocks 中的 resnetb_block()
KPConv/models/network_blocks.py

def resnetb_block(layer_ind, inputs, features, radius, fdim, config, training):
    """
    Block performing a resnet bottleneck convolution (1conv > KPconv > 1conv + shortcut)
    """

    with tf.variable_scope('conv1'):
        w = weight_variable([int(features.shape[1]), fdim // 2])
        x = conv_ops.unary_convolution(features, w)					# 等价于matmul
        x = leaky_relu(batch_norm(x,
                                  config.use_batch_norm,
                                  config.batch_norm_momentum,
                                  training))

    with tf.variable_scope('conv2'):
        w = weight_variable([config.num_kernel_points, int(x.shape[1]), fdim // 2])	# 创建权重矩阵
        x = KPConv(inputs['points'][layer_ind],										# 调用KPConv卷积操作
                   inputs['points'][layer_ind],
                   inputs['neighbors'][layer_ind],
                   x,
                   w,
                   radius,
                   config)

        x = leaky_relu(batch_norm(x,
                                  config.use_batch_norm,
                                  config.batch_norm_momentum,
                                  training))

    with tf.variable_scope('conv3'):
        w = weight_variable([int(x.shape[1]), 2 * fdim])	
        x = conv_ops.unary_convolution(x, w)					# 等价于matmul
        x = batch_norm(x,
                       config.use_batch_norm,
                       config.batch_norm_momentum,
                       training)

    with tf.variable_scope('shortcut'):	# shortcut,如果维度不匹配则先执行一次matmul
        if int(features.shape[1]) != 2 * fdim:
            w = weight_variable([int(features.shape[1]), 2 * fdim])
            shortcut = conv_ops.unary_convolution(features, w)	# 等价于matmul
            shortcut = batch_norm(shortcut,
                                  config.use_batch_norm,
                                  config.batch_norm_momentum,
                                  training)
        else:
            shortcut = features

    return leaky_relu(x + shortcut)
    
...

def weight_variable(shape):
    # tf.set_random_seed(42)
    initial = tf.truncated_normal(shape, stddev=np.sqrt(2 / shape[-1]))
    initial = tf.round(initial * tf.constant(1000, dtype=tf.float32)) / tf.constant(1000, dtype=tf.float32)
    return tf.Variable(initial, name='weights')

在这里(L15),我们可以看到先随机创建了一个权重矩阵w,然后调用了network_blocksKPConv()

KPConv()

KPConv/models/network_blocks.py

def KPConv(query_points, support_points, neighbors_indices, features, K_values, radius, config):
    """
    返回KPConv的输出特征
    """

    # 从当前半径和配置密度获取KP范围
    extent = config.KP_extent * radius / config.density_parameter

    # Convolution 卷积
    return conv_ops.KPConv(query_points,
                           support_points,
                           neighbors_indices,
                           features,
                           K_values,
                           fixed=config.fixed_kernel_points,
                           KP_extent=extent,
                           KP_influence=config.KP_influence,
                           aggregation_mode=config.convolution_mode,)

KPConv/kernels/convolution_ops.py

# 调用构建卷积操作、调用卷积op
def KPConv(query_points,
           support_points,
           neighbors_indices,
           features,
           K_values,
           fixed='center',
           KP_extent=1.0,
           KP_influence='linear',
           aggregation_mode='sum'):
    """
    This function initiates the kernel point disposition before building KPConv graph ops
    此函数在构建KPConv graph ops之前启动内核点配置
    :param query_points: float32[n_points, dim] - input query points (center of neighborhoods) 局部中心点
    :param support_points: float32[n0_points, dim] - input support points (from which neighbors are taken) 所有点
    :param neighbors_indices: int32[n_points, n_neighbors] - indices of neighbors of each point 中心点的临近点索引
    :param features: float32[n_points, in_fdim] - input features 中心点特征?
    :param K_values: float32[n_kpoints, in_fdim, out_fdim] - weights of the kernel 核权重
    :param fixed: string in ('none', 'center' or 'verticals') - fix position of certain kernel points 核旋转时的固定方式
    :param KP_extent: float32 - influence radius of each kernel point 核影响半径
    :param KP_influence: string in ('constant', 'linear', 'gaussian') - influence function of the kernel points 核影响函数
    :param aggregation_mode: string in ('closest', 'sum') - whether to sum influences, or only keep the closest 特征聚合模式
    :return: output_features float32[n_points, out_fdim]
    """

    # Initial kernel extent for this layer 初始化该层的核范围
    K_radius = 1.5 * KP_extent

    # Number of kernel points 核数
    num_kpoints = int(K_values.shape[0])

    # Check point dimension 检查点维度 (currently only 3D is supported)
    points_dim = int(query_points.shape[1])

    # Create one kernel disposition (as numpy array). Choose the KP distance to center thanks to the KP extent
    # 创建一个内核部署(numpy数组)。根据KP范围选择KP到中心的距离
    from kernels.kernel_points import load_kernels as create_kernel_points
    K_points_numpy = create_kernel_points(K_radius,
                                          num_kpoints,
                                          num_kernels=1,
                                          dimension=points_dim,
                                          fixed=fixed)
    K_points_numpy = K_points_numpy.reshape((num_kpoints, points_dim))

    # Create the tensorflow variable 创建tensorflow变量
    K_points = tf.Variable(K_points_numpy.astype(np.float32),
                           name='kernel_points',
                           trainable=False,
                           dtype=tf.float32)

    return KPConv_ops(query_points,
                      support_points,
                      neighbors_indices,
                      features,
                      K_points,
                      K_values,
                      KP_extent,
                      KP_influence,
                      aggregation_mode)

KPConv/kernels/kernel_points.py

# 调用创建/加载核操作,每次加载核时都会对核进行一个随机旋转。
def load_kernels(radius, num_kpoints, num_kernels, dimension, fixed):

    # 优化次数,确保得到最稳定的配置
    num_tries = 100

    # Kernel directory 核目录
    kernel_dir = 'kernels/dispositions'
    if not exists(kernel_dir):
        makedirs(kernel_dir)

    # Kernel_file 核文件地址
    if dimension == 3:
        kernel_file = join(kernel_dir, 'k_{:03d}_{:s}.ply'.format(num_kpoints, fixed))
    elif dimension == 2:
        kernel_file = join(kernel_dir, 'k_{:03d}_{:s}_2D.ply'.format(num_kpoints, fixed))
    else:
        raise ValueError('Unsupported dimpension of kernel : ' + str(dimension))

    # Check if already done 检查是否已经构建过一个核,是则读取,否则新构建。
    if not exists(kernel_file):

        # Create kernels
        kernel_points, grad_norms = kernel_point_optimization_debug(1.0,
                                                                    num_kpoints,
                                                                    num_kernels=num_tries,
                                                                    dimension=dimension,
                                                                    fixed=fixed,
                                                                    verbose=0)
                                                                    

        # Find best candidate
        best_k = np.argmin(grad_norms[-1, :])

        # Save points
        original_kernel = kernel_points[best_k, :, :]
        write_ply(kernel_file, original_kernel, ['x', 'y', 'z'])

    else:
        data = read_ply(kernel_file)
        original_kernel = np.vstack((data['x'], data['y'], data['z'])).T

    # N.B. 2D kernels are not supported yet 暂不支持2D内核
    if dimension == 2:
        return original_kernel

    # Random rotations depending of the fixed points 取决于固定点的随机旋转
    if fixed == 'verticals':

        # Create random rotations 创建随机旋转矩阵
        thetas = np.random.rand(num_kernels) * 2 * np.pi
        c, s = np.cos(thetas), np.sin(thetas)
        R = np.zeros((num_kernels, 3, 3), dtype=np.float32)
        R[:, 0, 0] = c
        R[:, 1, 1] = c
        R[:, 2, 2] = 1
        R[:, 0, 1] = s
        R[:, 1, 0] = -s

        # Scale kernels 缩放
        original_kernel = radius * np.expand_dims(original_kernel, 0)

        # Rotate kernels 旋转
        kernels = np.matmul(original_kernel, R)

    else:

        # Create random rotations 创建随机旋转矩阵
        u = np.ones((num_kernels, 3))
        v = np.ones((num_kernels, 3))
        wrongs = np.abs(np.sum(u * v, axis=1)) > 0.99
        while np.any(wrongs):
            new_u = np.random.rand(num_kernels, 3) * 2 - 1
            new_u = new_u / np.expand_dims(np.linalg.norm(new_u, axis=1) + 1e-9, -1)
            u[wrongs, :] = new_u[wrongs, :]
            new_v = np.random.rand(num_kernels, 3) * 2 - 1
            new_v = new_v / np.expand_dims(np.linalg.norm(new_v, axis=1) + 1e-9, -1)
            v[wrongs, :] = new_v[wrongs, :]
            wrongs = np.abs(np.sum(u * v, axis=1)) > 0.99

        # Make v perpendicular to u
        v -= np.expand_dims(np.sum(u * v, axis=1), -1) * u
        v = v / np.expand_dims(np.linalg.norm(v, axis=1) + 1e-9, -1)

        # Last rotation vector
        w = np.cross(u, v)
        R = np.stack((u, v, w), axis=-1)

        # Scale kernels 缩放
        original_kernel = radius * np.expand_dims(original_kernel, 0)

        # Rotate kernels 旋转
        kernels = np.matmul(original_kernel, R)

        # Add a small noise 添加噪声
        kernels = kernels
        kernels = kernels + np.random.normal(scale=radius*0.01, size=kernels.shape)

    return kernels

KPConv_ops()

KPConv/kernels/convolution_ops.py

# 调用构建卷积操作、调用卷积op
def KPConv_ops(query_points,
               support_points,
               neighbors_indices,
               features,
               K_points,
               K_values,
               KP_extent,
               KP_influence,
               aggregation_mode):
    """
    This function creates a graph of operations to define Kernel Point Convolution in tensorflow. See KPConv function
    这个函数创建一个操作图来定义tensorflow中的核点卷积。参见KPConv函数
    above for a description of each parameter
    :param query_points:        [n_points, dim]		中心点坐标
    :param support_points:      [n0_points, dim]	所有点坐标
    :param neighbors_indices:   [n_points, n_neighbors]	临近点索引
    :param features:            [n_points, in_fdim]	中心点特征
    :param K_points:            [n_kpoints, dim]	核位置
    :param K_values:            [n_kpoints, in_fdim, out_fdim]	核权重
    :param KP_extent:           float32	核影响半径
    :param KP_influence:        string	核影响函数
    :param aggregation_mode:    string	特征聚合模式
    :return:                    [n_points, out_fdim]
    """

    # Get variables 获取核点数
    n_kp = int(K_points.shape[0])

    # Add a fake point in the last row for shadow neighbors 在所有点最后添加一个伪点
    shadow_point = tf.ones_like(support_points[:1, :]) * 1e6
    support_points = tf.concat([support_points, shadow_point], axis=0)

    # Get neighbor points [n_points, n_neighbors, dim] 获取临近点坐标
    neighbors = tf.gather(support_points, neighbors_indices, axis=0)

    # Center every neighborhood 临近点坐标归心化
    neighbors = neighbors - tf.expand_dims(query_points, 1)

    # Get all difference matrices [n_points, n_neighbors, n_kpoints, dim] 获取所有临近点和核的坐标差异矩阵
    neighbors = tf.expand_dims(neighbors, 2)
    neighbors = tf.tile(neighbors, [1, 1, n_kp, 1])
    differences = neighbors - K_points

    # Get the square distances [n_points, n_neighbors, n_kpoints]	获取平方距离
    sq_distances = tf.reduce_sum(tf.square(differences), axis=3)

    # Get Kernel point influences [n_points, n_kpoints, n_neighbors]	获得核心点影响
    if KP_influence == 'constant':
        # Every point get an influence of 1. 所有点影响力为1
        all_weights = tf.ones_like(sq_distances)
        all_weights = tf.transpose(all_weights, [0, 2, 1])

    elif KP_influence == 'linear':
        # Influence decrease linearly with the distance, and get to zero when d = KP_extent. 
        # 影响随距离线性减小,超过核影响半径时为零。
        all_weights = tf.maximum(1 - tf.sqrt(sq_distances) / KP_extent, 0.0)
        all_weights = tf.transpose(all_weights, [0, 2, 1])

    elif KP_influence == 'gaussian':
        # Influence in gaussian of the distance. 	根据高斯函数计算影响。
        sigma = KP_extent * 0.3
        all_weights = radius_gaussian(sq_distances, sigma)
        all_weights = tf.transpose(all_weights, [0, 2, 1])
    else:
        raise ValueError('Unknown influence function type (config.KP_influence)')

    # In case of closest mode, only the closest KP can influence each point 如使用“最近模式”聚合,只有最近的核点可以影响各个点
    if aggregation_mode == 'closest':
        neighbors_1nn = tf.argmin(sq_distances, axis=2, output_type=tf.int32)
        all_weights *= tf.one_hot(neighbors_1nn, n_kp, axis=1, dtype=tf.float32)

    elif aggregation_mode != 'sum':
        raise ValueError("Unknown convolution mode. Should be 'closest' or 'sum'")

    features = tf.concat([features, tf.zeros_like(features[:1, :])], axis=0)

    # Get the features of each neighborhood [n_points, n_neighbors, in_fdim] 连接每个邻域的特征
    neighborhood_features = tf.gather(features, neighbors_indices, axis=0)

    # Apply distance weights [n_points, n_kpoints, in_fdim] 应用距离权重
    weighted_features = tf.matmul(all_weights, neighborhood_features)

    # Apply network weights [n_kpoints, n_points, out_fdim] 应用网络权重
    weighted_features = tf.transpose(weighted_features, [1, 0, 2])
    kernel_outputs = tf.matmul(weighted_features, K_values)

    # Convolution sum to get [n_points, out_fdim] 卷积求和
    output_features = tf.reduce_sum(kernel_outputs, axis=0)

    return output_features

使用Deformable Kernel

KPConv/kernels/convolution_ops.py

def KPConv_deformable(query_points,
                      support_points,
                      neighbors_indices,
                      features,
                      K_values,
                      fixed='center',
                      KP_extent=1.0,
                      KP_influence='linear',
                      aggregation_mode='sum',
                      modulated=False):
    """
    This function initiates the kernel point disposition before building deformable KPConv graph ops

    :param query_points: float32[n_points, dim] - input query points (center of neighborhoods)
    :param support_points: float32[n0_points, dim] - input support points (from which neighbors are taken)
    :param neighbors_indices: int32[n_points, n_neighbors] - indices of neighbors of each point
    :param features: float32[n_points, in_fdim] - input features
    :param K_values: float32[n_kpoints, in_fdim, out_fdim] - weights of the kernel
    :param fixed: string in ('none', 'center' or 'verticals') - fix position of certain kernel points
    :param KP_extent: float32 - influence radius of each kernel point
    :param KP_influence: string in ('constant', 'linear', 'gaussian') - influence function of the kernel points
    :param aggregation_mode: string in ('closest', 'sum') - behavior of the convolution
    :param modulated: bool - If deformable conv should be modulated 可变形变压器是否应进行调制

    :return: output_features float32[n_points, out_fdim]
    """

    ############
    # Parameters
    ############

    # Radius of the initial positions of the kernel points
    K_radius = 1.5 * KP_extent

    # Number of kernel points
    num_kpoints = int(K_values.shape[0])

    # Check point dimension (currently only 3D is supported)
    points_dim = int(query_points.shape[1])

    #################################
    # Initiate kernel point positions 初始化点云位置
    #################################

    # Create one kernel disposition (as numpy array). Choose the KP distance to center thanks to the KP extent
    K_points_numpy = create_kernel_points(K_radius,
                                          num_kpoints,
                                          num_kernels=1,
                                          dimension=points_dim,
                                          fixed=fixed)
    K_points_numpy = K_points_numpy.reshape((num_kpoints, points_dim))

    # Create the tensorflow variable
    K_points = tf.Variable(K_points_numpy.astype(np.float32),
                           name='kernel_points',
                           trainable=False,
                           dtype=tf.float32)

    #############################	主要变化在以下代码中	#############################
    # Standard KPConv for offsets 偏移的标准KPConv 
    #################################################################################

    # Create independant weight for the first convolution and a bias term as no batch normalization happen
    # 为第一次卷积和偏差项创建独立权重,因为没有发生批量标准化
    if modulated:
        offset_dim = (points_dim + 1) * num_kpoints
    else:
        offset_dim = points_dim * num_kpoints
    shape0 = K_values.shape.as_list()
    shape0[-1] = offset_dim
    K_values0 = tf.Variable(tf.zeros(shape0, dtype=tf.float32), name='offset_conv_weights')
    b0 = tf.Variable(tf.zeros(offset_dim, dtype=tf.float32), name='offset_conv_bias')

    # Get features from standard convolution 从标准卷积中获取特征
    features0 = KPConv_ops(query_points,
                           support_points,
                           neighbors_indices,
                           features,
                           K_points,
                           K_values0,
                           KP_extent,
                           KP_influence,
                           aggregation_mode) + b0

    if modulated:

        # Get offset (in normalized scale) from features 从特征获取偏移(按规格化比例)
        offsets = features0[:, :points_dim * num_kpoints]
        offsets = tf.reshape(offsets, [-1, num_kpoints, points_dim])

        # Get modulations 获取调制
        modulations = 2 * tf.sigmoid(features0[:, points_dim * num_kpoints:])

    else:

        # Get offset (in normalized scale) from features 从特征获取偏移(按规格化比例)
        offsets = tf.reshape(features0, [-1, num_kpoints, points_dim])

        # No modulations 无调制
        modulations = None

    # Rescale offset for this layer 重新缩放此层的偏移
    offsets *= KP_extent

    ###############################
    # Build deformable KPConv graph 构建可变形KPConv图
    ###############################

    # Apply deformed convolution
    return KPConv_deform_ops(query_points,
                             support_points,
                             neighbors_indices,
                             features,
                             K_points,
                             offsets,
                             modulations,
                             K_values,
                             KP_extent,
                             KP_influence,
                             aggregation_mode)

KPConv/kernels/convolution_ops.py

def KPConv_deform_ops(query_points,
                      support_points,
                      neighbors_indices,
                      features,
                      K_points,
                      offsets,
                      modulations,
                      K_values,
                      KP_extent,
                      KP_influence,
                      mode):
    """
    This function creates a graph of operations to define Deformable Kernel Point Convolution in tensorflow. See
    KPConv_deformable function above for a description of each parameter

    :param query_points:        [n_points, dim]
    :param support_points:      [n0_points, dim]
    :param neighbors_indices:   [n_points, n_neighbors]
    :param features:            [n_points, in_fdim]
    :param K_points:            [n_kpoints, dim]
    :param offsets:             [n_points, n_kpoints, dim]
    :param modulations:         [n_points, n_kpoints] or None
    :param K_values:            [n_kpoints, in_fdim, out_fdim]
    :param KP_extent:           float32
    :param KP_influence:        string
    :param mode:                string

    :return:                    [n_points, out_fdim]
    """

    # Get variables 获取核点数
    n_kp = int(K_points.shape[0])
    shadow_ind = tf.shape(support_points)[0]

    # Add a fake point in the last row for shadow neighbors 在所有点最后添加一个伪点
    shadow_point = tf.ones_like(support_points[:1, :]) * 1000
    support_points = tf.concat([support_points, shadow_point], axis=0)

    # Get neighbor points [n_points, n_neighbors, dim] 获取临近点坐标
    neighbors = tf.gather(support_points, neighbors_indices, axis=0)

    # Center every neighborhood	临近点坐标归心化
    neighbors = neighbors - tf.expand_dims(query_points, 1) 

    # Apply offsets to kernel points [n_points, n_kpoints, dim] 对内核点应用偏移
    deformed_K_points = tf.add(offsets, K_points, name='deformed_KP')

    # Get all difference matrices [n_points, n_neighbors, n_kpoints, dim] 获取所有临近点和核的坐标差异矩阵
    neighbors = tf.expand_dims(neighbors, 2)
    neighbors = tf.tile(neighbors, [1, 1, n_kp, 1])
    differences = neighbors - tf.expand_dims(deformed_K_points, 1)

    # Get the square distances [n_points, n_neighbors, n_kpoints] 获取平方距离
    sq_distances = tf.reduce_sum(tf.square(differences), axis=3, name='deformed_d2')

    # Boolean of the neighbors in range of a kernel point [n_points, n_neighbors] 	判断邻域中的点是否在影响半径内
    in_range = tf.cast(tf.reduce_any(tf.less(sq_distances, KP_extent**2), axis=2), tf.int32)

    # New value of max neighbors 计算新的最大邻域值
    new_max_neighb = tf.reduce_max(tf.reduce_sum(in_range, axis=1))

    # For each row of neighbors, indices of the ones that are in range [n_points, new_max_neighb] 
    # 索引邻域内的新点
    new_neighb_bool, new_neighb_inds = tf.math.top_k(in_range, k=new_max_neighb)

    # Gather new neighbor indices [n_points, new_max_neighb] 连接所有邻域索引
    new_neighbors_indices = tf.batch_gather(neighbors_indices, new_neighb_inds)

    # Gather new distances to KP [n_points, new_max_neighb, n_kpoints] 连接所有邻域离核的距离
    new_sq_distances = tf.batch_gather(sq_distances, new_neighb_inds)

    # New shadow neighbors have to point to the last shadow point 新点使用原坐标,不是新点的使用伪造的坐标
    new_neighbors_indices *= new_neighb_bool
    new_neighbors_indices += (1 - new_neighb_bool) * shadow_ind

    # Get Kernel point influences [n_points, n_kpoints, n_neighbors] 获得核心点影响
    if KP_influence == 'constant':
        # Every point get an influence of 1. 所有点影响力为1
        all_weights = tf.cast(new_sq_distances < KP_extent ** 2, tf.float32)
        all_weights = tf.transpose(all_weights, [0, 2, 1])

    elif KP_influence == 'linear':
        # Influence decrease linearly with the distance, and get to zero when d = KP_extent.
        # 影响随距离线性减小,超过核影响半径时为零。
        all_weights = tf.maximum(1 - tf.sqrt(new_sq_distances) / KP_extent, 0.0)
        all_weights = tf.transpose(all_weights, [0, 2, 1])

    elif KP_influence == 'gaussian':
        # Influence in gaussian of the distance. 根据高斯函数计算影响。
        sigma = KP_extent * 0.3
        all_weights = radius_gaussian(new_sq_distances, sigma)
        all_weights = tf.transpose(all_weights, [0, 2, 1])
    else:
        raise ValueError('Unknown influence function type (config.KP_influence)')

    # In case of closest mode, only the closest KP can influence each point 
    # 如使用“最近模式”聚合,只有最近的核点可以影响各个点
    if mode == 'closest':
        neighbors_1nn = tf.argmin(new_sq_distances, axis=2, output_type=tf.int32)
        all_weights *= tf.one_hot(neighbors_1nn, n_kp, axis=1, dtype=tf.float32)

    elif mode != 'sum':
        raise ValueError("Unknown convolution mode. Should be 'closest' or 'sum'")

    features = tf.concat([features, tf.zeros_like(features[:1, :])], axis=0)

    # Get the features of each neighborhood [n_points, new_max_neighb, in_fdim] 连接每个邻域的特征
    neighborhood_features = tf.gather(features, new_neighbors_indices, axis=0)

    # Apply distance weights [n_points, n_kpoints, in_fdim] 应用距离权重
    weighted_features = tf.matmul(all_weights, neighborhood_features)

    # Apply modulations 应用调制
    if modulations is not None:
        weighted_features *= tf.expand_dims(modulations, 2)

    # Apply network weights [n_kpoints, n_points, out_fdim] 应用网络权重
    weighted_features = tf.transpose(weighted_features, [1, 0, 2])
    kernel_outputs = tf.matmul(weighted_features, K_values)

    # Convolution sum [n_points, out_fdim] 卷积求和
    output_features = tf.reduce_sum(kernel_outputs, axis=0)

    return output_features

你可能感兴趣的:(点云,KPConv,python,深度学习)