论文:https://arxiv.org/abs/1904.08889
代码:https://github.com/HuguesTHOMAS/KPConv
本文主要研究 KPConv 的结构,我们从training_ModelNet40.py
这个文件开始找。
KPConv/training_ModelNet40.py
# 网络架构
architecture = ['simple',
'resnetb',
'resnetb_strided',
'resnetb',
'resnetb_strided',
'resnetb',
'resnetb_strided',
'resnetb_deformable',
'resnetb_deformable_strided',
'resnetb_deformable',
'global_average']
architecture
中的 resnetb
就是 network_blocks
中的 resnetb_block()
KPConv/models/network_blocks.py
def resnetb_block(layer_ind, inputs, features, radius, fdim, config, training):
"""
Block performing a resnet bottleneck convolution (1conv > KPconv > 1conv + shortcut)
"""
with tf.variable_scope('conv1'):
w = weight_variable([int(features.shape[1]), fdim // 2])
x = conv_ops.unary_convolution(features, w) # 等价于matmul
x = leaky_relu(batch_norm(x,
config.use_batch_norm,
config.batch_norm_momentum,
training))
with tf.variable_scope('conv2'):
w = weight_variable([config.num_kernel_points, int(x.shape[1]), fdim // 2]) # 创建权重矩阵
x = KPConv(inputs['points'][layer_ind], # 调用KPConv卷积操作
inputs['points'][layer_ind],
inputs['neighbors'][layer_ind],
x,
w,
radius,
config)
x = leaky_relu(batch_norm(x,
config.use_batch_norm,
config.batch_norm_momentum,
training))
with tf.variable_scope('conv3'):
w = weight_variable([int(x.shape[1]), 2 * fdim])
x = conv_ops.unary_convolution(x, w) # 等价于matmul
x = batch_norm(x,
config.use_batch_norm,
config.batch_norm_momentum,
training)
with tf.variable_scope('shortcut'): # shortcut,如果维度不匹配则先执行一次matmul
if int(features.shape[1]) != 2 * fdim:
w = weight_variable([int(features.shape[1]), 2 * fdim])
shortcut = conv_ops.unary_convolution(features, w) # 等价于matmul
shortcut = batch_norm(shortcut,
config.use_batch_norm,
config.batch_norm_momentum,
training)
else:
shortcut = features
return leaky_relu(x + shortcut)
...
def weight_variable(shape):
# tf.set_random_seed(42)
initial = tf.truncated_normal(shape, stddev=np.sqrt(2 / shape[-1]))
initial = tf.round(initial * tf.constant(1000, dtype=tf.float32)) / tf.constant(1000, dtype=tf.float32)
return tf.Variable(initial, name='weights')
在这里(L15),我们可以看到先随机创建了一个权重矩阵w
,然后调用了network_blocks
的KPConv()
。
KPConv/models/network_blocks.py
def KPConv(query_points, support_points, neighbors_indices, features, K_values, radius, config):
"""
返回KPConv的输出特征
"""
# 从当前半径和配置密度获取KP范围
extent = config.KP_extent * radius / config.density_parameter
# Convolution 卷积
return conv_ops.KPConv(query_points,
support_points,
neighbors_indices,
features,
K_values,
fixed=config.fixed_kernel_points,
KP_extent=extent,
KP_influence=config.KP_influence,
aggregation_mode=config.convolution_mode,)
KPConv/kernels/convolution_ops.py
# 调用构建卷积操作、调用卷积op
def KPConv(query_points,
support_points,
neighbors_indices,
features,
K_values,
fixed='center',
KP_extent=1.0,
KP_influence='linear',
aggregation_mode='sum'):
"""
This function initiates the kernel point disposition before building KPConv graph ops
此函数在构建KPConv graph ops之前启动内核点配置
:param query_points: float32[n_points, dim] - input query points (center of neighborhoods) 局部中心点
:param support_points: float32[n0_points, dim] - input support points (from which neighbors are taken) 所有点
:param neighbors_indices: int32[n_points, n_neighbors] - indices of neighbors of each point 中心点的临近点索引
:param features: float32[n_points, in_fdim] - input features 中心点特征?
:param K_values: float32[n_kpoints, in_fdim, out_fdim] - weights of the kernel 核权重
:param fixed: string in ('none', 'center' or 'verticals') - fix position of certain kernel points 核旋转时的固定方式
:param KP_extent: float32 - influence radius of each kernel point 核影响半径
:param KP_influence: string in ('constant', 'linear', 'gaussian') - influence function of the kernel points 核影响函数
:param aggregation_mode: string in ('closest', 'sum') - whether to sum influences, or only keep the closest 特征聚合模式
:return: output_features float32[n_points, out_fdim]
"""
# Initial kernel extent for this layer 初始化该层的核范围
K_radius = 1.5 * KP_extent
# Number of kernel points 核数
num_kpoints = int(K_values.shape[0])
# Check point dimension 检查点维度 (currently only 3D is supported)
points_dim = int(query_points.shape[1])
# Create one kernel disposition (as numpy array). Choose the KP distance to center thanks to the KP extent
# 创建一个内核部署(numpy数组)。根据KP范围选择KP到中心的距离
from kernels.kernel_points import load_kernels as create_kernel_points
K_points_numpy = create_kernel_points(K_radius,
num_kpoints,
num_kernels=1,
dimension=points_dim,
fixed=fixed)
K_points_numpy = K_points_numpy.reshape((num_kpoints, points_dim))
# Create the tensorflow variable 创建tensorflow变量
K_points = tf.Variable(K_points_numpy.astype(np.float32),
name='kernel_points',
trainable=False,
dtype=tf.float32)
return KPConv_ops(query_points,
support_points,
neighbors_indices,
features,
K_points,
K_values,
KP_extent,
KP_influence,
aggregation_mode)
KPConv/kernels/kernel_points.py
# 调用创建/加载核操作,每次加载核时都会对核进行一个随机旋转。
def load_kernels(radius, num_kpoints, num_kernels, dimension, fixed):
# 优化次数,确保得到最稳定的配置
num_tries = 100
# Kernel directory 核目录
kernel_dir = 'kernels/dispositions'
if not exists(kernel_dir):
makedirs(kernel_dir)
# Kernel_file 核文件地址
if dimension == 3:
kernel_file = join(kernel_dir, 'k_{:03d}_{:s}.ply'.format(num_kpoints, fixed))
elif dimension == 2:
kernel_file = join(kernel_dir, 'k_{:03d}_{:s}_2D.ply'.format(num_kpoints, fixed))
else:
raise ValueError('Unsupported dimpension of kernel : ' + str(dimension))
# Check if already done 检查是否已经构建过一个核,是则读取,否则新构建。
if not exists(kernel_file):
# Create kernels
kernel_points, grad_norms = kernel_point_optimization_debug(1.0,
num_kpoints,
num_kernels=num_tries,
dimension=dimension,
fixed=fixed,
verbose=0)
# Find best candidate
best_k = np.argmin(grad_norms[-1, :])
# Save points
original_kernel = kernel_points[best_k, :, :]
write_ply(kernel_file, original_kernel, ['x', 'y', 'z'])
else:
data = read_ply(kernel_file)
original_kernel = np.vstack((data['x'], data['y'], data['z'])).T
# N.B. 2D kernels are not supported yet 暂不支持2D内核
if dimension == 2:
return original_kernel
# Random rotations depending of the fixed points 取决于固定点的随机旋转
if fixed == 'verticals':
# Create random rotations 创建随机旋转矩阵
thetas = np.random.rand(num_kernels) * 2 * np.pi
c, s = np.cos(thetas), np.sin(thetas)
R = np.zeros((num_kernels, 3, 3), dtype=np.float32)
R[:, 0, 0] = c
R[:, 1, 1] = c
R[:, 2, 2] = 1
R[:, 0, 1] = s
R[:, 1, 0] = -s
# Scale kernels 缩放
original_kernel = radius * np.expand_dims(original_kernel, 0)
# Rotate kernels 旋转
kernels = np.matmul(original_kernel, R)
else:
# Create random rotations 创建随机旋转矩阵
u = np.ones((num_kernels, 3))
v = np.ones((num_kernels, 3))
wrongs = np.abs(np.sum(u * v, axis=1)) > 0.99
while np.any(wrongs):
new_u = np.random.rand(num_kernels, 3) * 2 - 1
new_u = new_u / np.expand_dims(np.linalg.norm(new_u, axis=1) + 1e-9, -1)
u[wrongs, :] = new_u[wrongs, :]
new_v = np.random.rand(num_kernels, 3) * 2 - 1
new_v = new_v / np.expand_dims(np.linalg.norm(new_v, axis=1) + 1e-9, -1)
v[wrongs, :] = new_v[wrongs, :]
wrongs = np.abs(np.sum(u * v, axis=1)) > 0.99
# Make v perpendicular to u
v -= np.expand_dims(np.sum(u * v, axis=1), -1) * u
v = v / np.expand_dims(np.linalg.norm(v, axis=1) + 1e-9, -1)
# Last rotation vector
w = np.cross(u, v)
R = np.stack((u, v, w), axis=-1)
# Scale kernels 缩放
original_kernel = radius * np.expand_dims(original_kernel, 0)
# Rotate kernels 旋转
kernels = np.matmul(original_kernel, R)
# Add a small noise 添加噪声
kernels = kernels
kernels = kernels + np.random.normal(scale=radius*0.01, size=kernels.shape)
return kernels
KPConv/kernels/convolution_ops.py
# 调用构建卷积操作、调用卷积op
def KPConv_ops(query_points,
support_points,
neighbors_indices,
features,
K_points,
K_values,
KP_extent,
KP_influence,
aggregation_mode):
"""
This function creates a graph of operations to define Kernel Point Convolution in tensorflow. See KPConv function
这个函数创建一个操作图来定义tensorflow中的核点卷积。参见KPConv函数
above for a description of each parameter
:param query_points: [n_points, dim] 中心点坐标
:param support_points: [n0_points, dim] 所有点坐标
:param neighbors_indices: [n_points, n_neighbors] 临近点索引
:param features: [n_points, in_fdim] 中心点特征
:param K_points: [n_kpoints, dim] 核位置
:param K_values: [n_kpoints, in_fdim, out_fdim] 核权重
:param KP_extent: float32 核影响半径
:param KP_influence: string 核影响函数
:param aggregation_mode: string 特征聚合模式
:return: [n_points, out_fdim]
"""
# Get variables 获取核点数
n_kp = int(K_points.shape[0])
# Add a fake point in the last row for shadow neighbors 在所有点最后添加一个伪点
shadow_point = tf.ones_like(support_points[:1, :]) * 1e6
support_points = tf.concat([support_points, shadow_point], axis=0)
# Get neighbor points [n_points, n_neighbors, dim] 获取临近点坐标
neighbors = tf.gather(support_points, neighbors_indices, axis=0)
# Center every neighborhood 临近点坐标归心化
neighbors = neighbors - tf.expand_dims(query_points, 1)
# Get all difference matrices [n_points, n_neighbors, n_kpoints, dim] 获取所有临近点和核的坐标差异矩阵
neighbors = tf.expand_dims(neighbors, 2)
neighbors = tf.tile(neighbors, [1, 1, n_kp, 1])
differences = neighbors - K_points
# Get the square distances [n_points, n_neighbors, n_kpoints] 获取平方距离
sq_distances = tf.reduce_sum(tf.square(differences), axis=3)
# Get Kernel point influences [n_points, n_kpoints, n_neighbors] 获得核心点影响
if KP_influence == 'constant':
# Every point get an influence of 1. 所有点影响力为1
all_weights = tf.ones_like(sq_distances)
all_weights = tf.transpose(all_weights, [0, 2, 1])
elif KP_influence == 'linear':
# Influence decrease linearly with the distance, and get to zero when d = KP_extent.
# 影响随距离线性减小,超过核影响半径时为零。
all_weights = tf.maximum(1 - tf.sqrt(sq_distances) / KP_extent, 0.0)
all_weights = tf.transpose(all_weights, [0, 2, 1])
elif KP_influence == 'gaussian':
# Influence in gaussian of the distance. 根据高斯函数计算影响。
sigma = KP_extent * 0.3
all_weights = radius_gaussian(sq_distances, sigma)
all_weights = tf.transpose(all_weights, [0, 2, 1])
else:
raise ValueError('Unknown influence function type (config.KP_influence)')
# In case of closest mode, only the closest KP can influence each point 如使用“最近模式”聚合,只有最近的核点可以影响各个点
if aggregation_mode == 'closest':
neighbors_1nn = tf.argmin(sq_distances, axis=2, output_type=tf.int32)
all_weights *= tf.one_hot(neighbors_1nn, n_kp, axis=1, dtype=tf.float32)
elif aggregation_mode != 'sum':
raise ValueError("Unknown convolution mode. Should be 'closest' or 'sum'")
features = tf.concat([features, tf.zeros_like(features[:1, :])], axis=0)
# Get the features of each neighborhood [n_points, n_neighbors, in_fdim] 连接每个邻域的特征
neighborhood_features = tf.gather(features, neighbors_indices, axis=0)
# Apply distance weights [n_points, n_kpoints, in_fdim] 应用距离权重
weighted_features = tf.matmul(all_weights, neighborhood_features)
# Apply network weights [n_kpoints, n_points, out_fdim] 应用网络权重
weighted_features = tf.transpose(weighted_features, [1, 0, 2])
kernel_outputs = tf.matmul(weighted_features, K_values)
# Convolution sum to get [n_points, out_fdim] 卷积求和
output_features = tf.reduce_sum(kernel_outputs, axis=0)
return output_features
KPConv/kernels/convolution_ops.py
def KPConv_deformable(query_points,
support_points,
neighbors_indices,
features,
K_values,
fixed='center',
KP_extent=1.0,
KP_influence='linear',
aggregation_mode='sum',
modulated=False):
"""
This function initiates the kernel point disposition before building deformable KPConv graph ops
:param query_points: float32[n_points, dim] - input query points (center of neighborhoods)
:param support_points: float32[n0_points, dim] - input support points (from which neighbors are taken)
:param neighbors_indices: int32[n_points, n_neighbors] - indices of neighbors of each point
:param features: float32[n_points, in_fdim] - input features
:param K_values: float32[n_kpoints, in_fdim, out_fdim] - weights of the kernel
:param fixed: string in ('none', 'center' or 'verticals') - fix position of certain kernel points
:param KP_extent: float32 - influence radius of each kernel point
:param KP_influence: string in ('constant', 'linear', 'gaussian') - influence function of the kernel points
:param aggregation_mode: string in ('closest', 'sum') - behavior of the convolution
:param modulated: bool - If deformable conv should be modulated 可变形变压器是否应进行调制
:return: output_features float32[n_points, out_fdim]
"""
############
# Parameters
############
# Radius of the initial positions of the kernel points
K_radius = 1.5 * KP_extent
# Number of kernel points
num_kpoints = int(K_values.shape[0])
# Check point dimension (currently only 3D is supported)
points_dim = int(query_points.shape[1])
#################################
# Initiate kernel point positions 初始化点云位置
#################################
# Create one kernel disposition (as numpy array). Choose the KP distance to center thanks to the KP extent
K_points_numpy = create_kernel_points(K_radius,
num_kpoints,
num_kernels=1,
dimension=points_dim,
fixed=fixed)
K_points_numpy = K_points_numpy.reshape((num_kpoints, points_dim))
# Create the tensorflow variable
K_points = tf.Variable(K_points_numpy.astype(np.float32),
name='kernel_points',
trainable=False,
dtype=tf.float32)
############################# 主要变化在以下代码中 #############################
# Standard KPConv for offsets 偏移的标准KPConv
#################################################################################
# Create independant weight for the first convolution and a bias term as no batch normalization happen
# 为第一次卷积和偏差项创建独立权重,因为没有发生批量标准化
if modulated:
offset_dim = (points_dim + 1) * num_kpoints
else:
offset_dim = points_dim * num_kpoints
shape0 = K_values.shape.as_list()
shape0[-1] = offset_dim
K_values0 = tf.Variable(tf.zeros(shape0, dtype=tf.float32), name='offset_conv_weights')
b0 = tf.Variable(tf.zeros(offset_dim, dtype=tf.float32), name='offset_conv_bias')
# Get features from standard convolution 从标准卷积中获取特征
features0 = KPConv_ops(query_points,
support_points,
neighbors_indices,
features,
K_points,
K_values0,
KP_extent,
KP_influence,
aggregation_mode) + b0
if modulated:
# Get offset (in normalized scale) from features 从特征获取偏移(按规格化比例)
offsets = features0[:, :points_dim * num_kpoints]
offsets = tf.reshape(offsets, [-1, num_kpoints, points_dim])
# Get modulations 获取调制
modulations = 2 * tf.sigmoid(features0[:, points_dim * num_kpoints:])
else:
# Get offset (in normalized scale) from features 从特征获取偏移(按规格化比例)
offsets = tf.reshape(features0, [-1, num_kpoints, points_dim])
# No modulations 无调制
modulations = None
# Rescale offset for this layer 重新缩放此层的偏移
offsets *= KP_extent
###############################
# Build deformable KPConv graph 构建可变形KPConv图
###############################
# Apply deformed convolution
return KPConv_deform_ops(query_points,
support_points,
neighbors_indices,
features,
K_points,
offsets,
modulations,
K_values,
KP_extent,
KP_influence,
aggregation_mode)
KPConv/kernels/convolution_ops.py
def KPConv_deform_ops(query_points,
support_points,
neighbors_indices,
features,
K_points,
offsets,
modulations,
K_values,
KP_extent,
KP_influence,
mode):
"""
This function creates a graph of operations to define Deformable Kernel Point Convolution in tensorflow. See
KPConv_deformable function above for a description of each parameter
:param query_points: [n_points, dim]
:param support_points: [n0_points, dim]
:param neighbors_indices: [n_points, n_neighbors]
:param features: [n_points, in_fdim]
:param K_points: [n_kpoints, dim]
:param offsets: [n_points, n_kpoints, dim]
:param modulations: [n_points, n_kpoints] or None
:param K_values: [n_kpoints, in_fdim, out_fdim]
:param KP_extent: float32
:param KP_influence: string
:param mode: string
:return: [n_points, out_fdim]
"""
# Get variables 获取核点数
n_kp = int(K_points.shape[0])
shadow_ind = tf.shape(support_points)[0]
# Add a fake point in the last row for shadow neighbors 在所有点最后添加一个伪点
shadow_point = tf.ones_like(support_points[:1, :]) * 1000
support_points = tf.concat([support_points, shadow_point], axis=0)
# Get neighbor points [n_points, n_neighbors, dim] 获取临近点坐标
neighbors = tf.gather(support_points, neighbors_indices, axis=0)
# Center every neighborhood 临近点坐标归心化
neighbors = neighbors - tf.expand_dims(query_points, 1)
# Apply offsets to kernel points [n_points, n_kpoints, dim] 对内核点应用偏移
deformed_K_points = tf.add(offsets, K_points, name='deformed_KP')
# Get all difference matrices [n_points, n_neighbors, n_kpoints, dim] 获取所有临近点和核的坐标差异矩阵
neighbors = tf.expand_dims(neighbors, 2)
neighbors = tf.tile(neighbors, [1, 1, n_kp, 1])
differences = neighbors - tf.expand_dims(deformed_K_points, 1)
# Get the square distances [n_points, n_neighbors, n_kpoints] 获取平方距离
sq_distances = tf.reduce_sum(tf.square(differences), axis=3, name='deformed_d2')
# Boolean of the neighbors in range of a kernel point [n_points, n_neighbors] 判断邻域中的点是否在影响半径内
in_range = tf.cast(tf.reduce_any(tf.less(sq_distances, KP_extent**2), axis=2), tf.int32)
# New value of max neighbors 计算新的最大邻域值
new_max_neighb = tf.reduce_max(tf.reduce_sum(in_range, axis=1))
# For each row of neighbors, indices of the ones that are in range [n_points, new_max_neighb]
# 索引邻域内的新点
new_neighb_bool, new_neighb_inds = tf.math.top_k(in_range, k=new_max_neighb)
# Gather new neighbor indices [n_points, new_max_neighb] 连接所有邻域索引
new_neighbors_indices = tf.batch_gather(neighbors_indices, new_neighb_inds)
# Gather new distances to KP [n_points, new_max_neighb, n_kpoints] 连接所有邻域离核的距离
new_sq_distances = tf.batch_gather(sq_distances, new_neighb_inds)
# New shadow neighbors have to point to the last shadow point 新点使用原坐标,不是新点的使用伪造的坐标
new_neighbors_indices *= new_neighb_bool
new_neighbors_indices += (1 - new_neighb_bool) * shadow_ind
# Get Kernel point influences [n_points, n_kpoints, n_neighbors] 获得核心点影响
if KP_influence == 'constant':
# Every point get an influence of 1. 所有点影响力为1
all_weights = tf.cast(new_sq_distances < KP_extent ** 2, tf.float32)
all_weights = tf.transpose(all_weights, [0, 2, 1])
elif KP_influence == 'linear':
# Influence decrease linearly with the distance, and get to zero when d = KP_extent.
# 影响随距离线性减小,超过核影响半径时为零。
all_weights = tf.maximum(1 - tf.sqrt(new_sq_distances) / KP_extent, 0.0)
all_weights = tf.transpose(all_weights, [0, 2, 1])
elif KP_influence == 'gaussian':
# Influence in gaussian of the distance. 根据高斯函数计算影响。
sigma = KP_extent * 0.3
all_weights = radius_gaussian(new_sq_distances, sigma)
all_weights = tf.transpose(all_weights, [0, 2, 1])
else:
raise ValueError('Unknown influence function type (config.KP_influence)')
# In case of closest mode, only the closest KP can influence each point
# 如使用“最近模式”聚合,只有最近的核点可以影响各个点
if mode == 'closest':
neighbors_1nn = tf.argmin(new_sq_distances, axis=2, output_type=tf.int32)
all_weights *= tf.one_hot(neighbors_1nn, n_kp, axis=1, dtype=tf.float32)
elif mode != 'sum':
raise ValueError("Unknown convolution mode. Should be 'closest' or 'sum'")
features = tf.concat([features, tf.zeros_like(features[:1, :])], axis=0)
# Get the features of each neighborhood [n_points, new_max_neighb, in_fdim] 连接每个邻域的特征
neighborhood_features = tf.gather(features, new_neighbors_indices, axis=0)
# Apply distance weights [n_points, n_kpoints, in_fdim] 应用距离权重
weighted_features = tf.matmul(all_weights, neighborhood_features)
# Apply modulations 应用调制
if modulations is not None:
weighted_features *= tf.expand_dims(modulations, 2)
# Apply network weights [n_kpoints, n_points, out_fdim] 应用网络权重
weighted_features = tf.transpose(weighted_features, [1, 0, 2])
kernel_outputs = tf.matmul(weighted_features, K_values)
# Convolution sum [n_points, out_fdim] 卷积求和
output_features = tf.reduce_sum(kernel_outputs, axis=0)
return output_features