接下来,我们就看一下这个代码都做了什么操作。以SemanticKitti为例。
由于作者给出的代码是在TensorFlow下的,我改到了Pytorch下面,代码详见我的Github,是实现了SemanticKITTI数据集下的训练。
# utils/data_prepare-semantickitti.py
# line 42-50
points = DP.load_pc_kitti(join(pc_path, scan_id))
labels = DP.load_label_kitti(join(label_path, str(scan_id[:-4]) + '.label'), remap_lut)
sub_points, sub_labels = DP.grid_sub_sampling(points, labels=labels, grid_size=grid_size)
search_tree = KDTree(sub_points)
KDTree_save = join(KDTree_path_out, str(scan_id[:-4]) + '.pkl')
np.save(join(pc_path_out, scan_id)[:-4], sub_points)
np.save(join(label_path_out, scan_id)[:-4], sub_labels)
with open(KDTree_save, 'wb') as f:
pickle.dump(search_tree, f)
可以看到,上述预处理,是把point和label做了grid sampling,并且生成了一个kdtree保存下来。
是用main_SemanticKITTI/SemanticKITTI这个类实现的,我们看看这部分做了什么
class SemanticKITTI:
def __init__(self, test_id):
...
# Generate the input data flow
def get_batch_gen(self, split):
...
def spatially_regular_gen():
# Generator loop
# line 72-79
for i in range(num_per_epoch):
if split != 'test':
cloud_ind = i
pc_path = path_list[cloud_ind]
pc, tree, labels = self.get_data(pc_path)
# crop a small point cloud
pick_idx = np.random.choice(len(pc), 1)
selected_pc, selected_labels, selected_idx = self.crop_pc(pc, labels, tree, pick_idx)
...
...
return gen_func, gen_types, gen_shapes
def get_data(self, file_path): #从file_path所指向的文件中读入point,kdtree和label
...
return points, search_tree, labels
@staticmethod
def crop_pc(points, labels, search_tree, pick_idx):
# crop a fixed size point cloud for training
center_point = points[pick_idx, :].reshape(1, -1)
select_idx = search_tree.query(center_point, k=cfg.num_points)[1][0]
select_idx = DP.shuffle_idx(select_idx)
select_points = points[select_idx]
select_labels = labels[select_idx]
return select_points, select_labels, select_idx
@staticmethod
def get_tf_mapping2():
def tf_map(batch_pc, batch_label, batch_pc_idx, batch_cloud_idx):
features = batch_pc
input_points = []
input_neighbors = []
input_pools = []
input_up_samples = []
for i in range(cfg.num_layers):
neighbour_idx = tf.py_func(DP.knn_search, [batch_pc, batch_pc, cfg.k_n], tf.int32)
sub_points = batch_pc[:, :tf.shape(batch_pc)[1] // cfg.sub_sampling_ratio[i], :]
pool_i = neighbour_idx[:, :tf.shape(batch_pc)[1] // cfg.sub_sampling_ratio[i], :]
up_i = tf.py_func(DP.knn_search, [sub_points, batch_pc, 1], tf.int32)
input_points.append(batch_pc)
input_neighbors.append(neighbour_idx)
input_pools.append(pool_i)
input_up_samples.append(up_i)
batch_pc = sub_points
input_list = input_points + input_neighbors + input_pools + input_up_samples
input_list += [features, batch_label, batch_pc_idx, batch_cloud_idx]
return input_list
return tf_map
def init_input_pipeline(self):
...
上述可以出从预处理的数据得到训练数据的过程:
由于RandLA-Net的结构比较简单,就不画图表示了,直接看代码吧。
# RandLANet.py
# line 103-125
def inference(self, inputs, is_training):
d_out = self.config.d_out
feature = inputs['features']
feature = tf.layers.dense(feature, 8, activation=None, name='fc0')
feature = tf.nn.leaky_relu(tf.layers.batch_normalization(feature, -1, 0.99, 1e-6, training=is_training))
feature = tf.expand_dims(feature, axis=2)
# ###########################Encoder############################
f_encoder_list = []
for i in range(self.config.num_layers):
f_encoder_i = self.dilated_res_block(feature, inputs['xyz'][i], inputs['neigh_idx'][i], d_out[i],
'Encoder_layer_' + str(i), is_training)
f_sampled_i = self.random_sample(f_encoder_i, inputs['sub_idx'][i])
feature = f_sampled_i
if i == 0:
f_encoder_list.append(f_encoder_i)
f_encoder_list.append(f_sampled_i)
# ###########################Encoder############################
feature = helper_tf_util.conv2d(f_encoder_list[-1], f_encoder_list[-1].get_shape()[3].value, [1, 1],
'decoder_0',
[1, 1], 'VALID', True, is_training)
# ###########################Decoder############################
f_decoder_list = []
for j in range(self.config.num_layers):
f_interp_i = self.nearest_interpolation(feature, inputs['interp_idx'][-j - 1])
f_decoder_i = helper_tf_util.conv2d_transpose(tf.concat([f_encoder_list[-j - 2], f_interp_i], axis=3),
f_encoder_list[-j - 2].get_shape()[-1].value, [1, 1],
'Decoder_layer_' + str(j), [1, 1], 'VALID', bn=True,
is_training=is_training)
feature = f_decoder_i
f_decoder_list.append(f_decoder_i)
# ###########################Decoder############################
f_layer_fc1 = helper_tf_util.conv2d(f_decoder_list[-1], 64, [1, 1], 'fc1', [1, 1], 'VALID', True, is_training)
f_layer_fc2 = helper_tf_util.conv2d(f_layer_fc1, 32, [1, 1], 'fc2', [1, 1], 'VALID', True, is_training)
f_layer_drop = helper_tf_util.dropout(f_layer_fc2, keep_prob=0.5, is_training=is_training, scope='dp1')
f_layer_fc3 = helper_tf_util.conv2d(f_layer_drop, self.config.num_classes, [1, 1], 'fc', [1, 1], 'VALID', False,
is_training, activation_fn=None)
f_out = tf.squeeze(f_layer_fc3, [2])
return f_out
inference这个函数就是前向计算的函数,从中可以看到,RandLA-Net有以下结构:
# RandLANet.py
# line 270-277
def dilated_res_block(self, feature, xyz, neigh_idx, d_out, name, is_training):
f_pc = helper_tf_util.conv2d(feature, d_out // 2, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training)
f_pc = self.building_block(xyz, f_pc, neigh_idx, d_out, name + 'LFA', is_training)
f_pc = helper_tf_util.conv2d(f_pc, d_out * 2, [1, 1], name + 'mlp2', [1, 1], 'VALID', True, is_training,
activation_fn=None)
shortcut = helper_tf_util.conv2d(feature, d_out * 2, [1, 1], name + 'shortcut', [1, 1], 'VALID',
activation_fn=None, bn=True, is_training=is_training)
return tf.nn.leaky_relu(f_pc + shortcut)
dilated_res_block有以下结构:
那么building_block的结构:
#line 279-291
def building_block(self, xyz, feature, neigh_idx, d_out, name, is_training):
d_in = feature.get_shape()[-1].value
f_xyz = self.relative_pos_encoding(xyz, neigh_idx)
f_xyz = helper_tf_util.conv2d(f_xyz, d_in, [1, 1], name + 'mlp1', [1, 1], 'VALID', True, is_training)
f_neighbours = self.gather_neighbour(tf.squeeze(feature, axis=2), neigh_idx)
f_concat = tf.concat([f_neighbours, f_xyz], axis=-1)
f_pc_agg = self.att_pooling(f_concat, d_out // 2, name + 'att_pooling_1', is_training)
f_xyz = helper_tf_util.conv2d(f_xyz, d_out // 2, [1, 1], name + 'mlp2', [1, 1], 'VALID', True, is_training)
f_neighbours = self.gather_neighbour(tf.squeeze(f_pc_agg, axis=2), neigh_idx)
f_concat = tf.concat([f_neighbours, f_xyz], axis=-1)
f_pc_agg = self.att_pooling(f_concat, d_out, name + 'att_pooling_2', is_training)
return f_pc_agg
dilated_res_block有以下结构:
通过两次更新参考点本身的特征,将参考点k临近点的信息聚集在参考点上
这个sample的选择的点在构造数据集中已经定了,是通过将点顺序打乱,然后取前面一定数量的点得到的。
主要就是看一下如何做的nearest_interpolation
# line 319-331
def nearest_interpolation(feature, interp_idx):
"""
:param feature: [B, N, d] input features matrix
:param interp_idx: [B, up_num_points, 1] nearest neighbour index
:return: [B, up_num_points, d] interpolated features matrix
"""
feature = tf.squeeze(feature, axis=2)
batch_size = tf.shape(interp_idx)[0]
up_num_points = tf.shape(interp_idx)[1]
interp_idx = tf.reshape(interp_idx, [batch_size, up_num_points])
interpolated_features = tf.batch_gather(feature, interp_idx)
interpolated_features = tf.expand_dims(interpolated_features, axis=2)
return interpolated_features
可以看到是根据interp_idx来上插值feature
# main_SemanticKITTI.py
# line 145
up_i = tf.py_func(DP.knn_search, [sub_points, batch_pc, 1], tf.int32)
可以看到up_i(其实就是interp_idx)是通过找1临近点来确定的。
也就是说nearest_interpolation将最临近点的特征赋值给新点的特征的。这样子做是与Pointnet++的interpolation有一定区别的。
# RandLANet.py
# line 57-76
with tf.variable_scope('loss'):
self.logits = tf.reshape(self.logits, [-1, config.num_classes])
self.labels = tf.reshape(self.labels, [-1])
# Boolean mask of points that should be ignored
ignored_bool = tf.zeros_like(self.labels, dtype=tf.bool)
for ign_label in self.config.ignored_label_inds:
ignored_bool = tf.logical_or(ignored_bool, tf.equal(self.labels, ign_label))
# Collect logits and labels that are not ignored
valid_idx = tf.squeeze(tf.where(tf.logical_not(ignored_bool)))
valid_logits = tf.gather(self.logits, valid_idx, axis=0)
valid_labels_init = tf.gather(self.labels, valid_idx, axis=0)
# Reduce label values in the range of logit shape
reducing_list = tf.range(self.config.num_classes, dtype=tf.int32)
inserted_value = tf.zeros((1,), dtype=tf.int32)
for ign_label in self.config.ignored_label_inds:
reducing_list = tf.concat([reducing_list[:ign_label], inserted_value, reducing_list[ign_label:]], 0)
valid_labels = tf.gather(reducing_list, valid_labels_init)
Loss的计算包含如下结构: