MODEL
Step 1 定义网络输入形状
def placeholder_inputs(batch_size, num_point):
''' Get useful placeholder tensors.
Input:
batch_size: scalar int
num_point: scalar int
Output:
TF placeholders for inputs and ground truths
'''
pointclouds_pl = tf.placeholder(tf.float32,
shape=(batch_size, num_point, 4))
one_hot_vec_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))
# labels_pl is for segmentation label
labels_pl = tf.placeholder(tf.int32, shape=(batch_size, num_point))
centers_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))
heading_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))
heading_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size,))
size_class_label_pl = tf.placeholder(tf.int32, shape=(batch_size,))
size_residual_label_pl = tf.placeholder(tf.float32, shape=(batch_size, 3))
return pointclouds_pl, one_hot_vec_pl, labels_pl, centers_pl, \
heading_class_label_pl, heading_residual_label_pl, \
size_class_label_pl, size_residual_label_pl
Step 2 得到网络模型
def get_model(point_cloud, one_hot_vec, is_training, bn_decay=None):
''' Frustum PointNets model. The model predict 3D object masks and
amodel bounding boxes for objects in frustum point clouds.
Input:
point_cloud: TF tensor in shape (B,N,4)
frustum point clouds with XYZ and intensity in point channels
XYZs are in frustum coordinate
one_hot_vec: TF tensor in shape (B,3)
length-3 vectors indicating predicted object type
is_training: TF boolean scalar
bn_decay: TF float scalar
Output:
end_points: dict (map from name strings to TF tensors)
'''
end_points = {}
# 3D Instance Segmentation PointNet
logits, end_points = get_instance_seg_v1_net( \
point_cloud, one_hot_vec,
is_training, bn_decay, end_points)
end_points['mask_logits'] = logits
# Masking
# select masked points and translate to masked points' centroid
object_point_cloud_xyz, mask_xyz_mean, end_points = \
point_cloud_masking(point_cloud, logits, end_points)
# T-Net and coordinate translation
center_delta, end_points = get_center_regression_net( \
object_point_cloud_xyz, one_hot_vec,
is_training, bn_decay, end_points)
stage1_center = center_delta + mask_xyz_mean # Bx3
end_points['stage1_center'] = stage1_center
# Get object point cloud in object coordinate
object_point_cloud_xyz_new = \
object_point_cloud_xyz - tf.expand_dims(center_delta, 1)
# Amodel Box Estimation PointNet
output, end_points = get_3d_box_estimation_v1_net( \
object_point_cloud_xyz_new, one_hot_vec,
is_training, bn_decay, end_points)
# Parse output to 3D box parameters
end_points = parse_output_to_tensors(output, end_points)
end_points['center'] = end_points['center_boxnet'] + stage1_center # Bx3
return end_points
- 3D Instance Segmentation Pointnet
def get_instance_seg_v1_net(point_cloud, one_hot_vec,
is_training, bn_decay, end_points):
''' 3D instance segmentation PointNet v1 network.
Input:
point_cloud: TF tensor in shape (B,N,4)
frustum point clouds with XYZ and intensity in point channels
XYZs are in frustum coordinate
one_hot_vec: TF tensor in shape (B,3)
length-3 vectors indicating predicted object type
is_training: TF boolean scalar
bn_decay: TF float scalar
end_points: dict
Output:
logits: TF tensor in shape (B,N,2), scores for bkg/clutter and object
end_points: dict
'''
batch_size = point_cloud.get_shape()[0].value
num_point = point_cloud.get_shape()[1].value
net = tf.expand_dims(point_cloud, 2)
net = tf_util.conv2d(net, 64, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv1', bn_decay=bn_decay)
net = tf_util.conv2d(net, 64, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv2', bn_decay=bn_decay)
point_feat = tf_util.conv2d(net, 64, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv3', bn_decay=bn_decay)
net = tf_util.conv2d(point_feat, 128, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv4', bn_decay=bn_decay)
net = tf_util.conv2d(net, 1024, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv5', bn_decay=bn_decay)
global_feat = tf_util.max_pool2d(net, [num_point, 1],
padding='VALID', scope='maxpool')
global_feat = tf.concat([global_feat, tf.expand_dims(tf.expand_dims(one_hot_vec, 1), 1)], axis=3)
global_feat_expand = tf.tile(global_feat, [1, num_point, 1, 1])
concat_feat = tf.concat(axis=3, values=[point_feat, global_feat_expand])
net = tf_util.conv2d(concat_feat, 512, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv6', bn_decay=bn_decay)
net = tf_util.conv2d(net, 256, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv7', bn_decay=bn_decay)
net = tf_util.conv2d(net, 128, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv8', bn_decay=bn_decay)
net = tf_util.conv2d(net, 128, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv9', bn_decay=bn_decay)
net = tf_util.dropout(net, is_training, 'dp1', keep_prob=0.5)
logits = tf_util.conv2d(net, 2, [1, 1],
padding='VALID', stride=[1, 1], activation_fn=None,
scope='conv10')
logits = tf.squeeze(logits, [2]) # BxNxC
return logits, end_points
- Masking
三次坐标系变换:frustum coordinate——>3D mask coordinate——>3D object coordinate- select masked points and translate to masked points' centroid
a. 计算mask,N个点,如果这个点是目标点就为1,不是目标点就为0;
b. 计算所有目标点坐标的均值,找到目标点的中心 mask_xyz_mean
c. 得到点云在目标点中心作为坐标原点的坐标系的坐标 point_cloud_xyz_stage1
d. 对于目标点了来说需要统一成固定的数NUM_OBJECT_POINT个,小于需要增添,大于需要删减
e. 返回目标点和中心
- select masked points and translate to masked points' centroid
def point_cloud_masking(point_cloud, logits, end_points, xyz_only=True):
''' Select point cloud with predicted 3D mask,
translate coordinates to the masked points centroid.
Input:
point_cloud: TF tensor in shape (B,N,C)
logits: TF tensor in shape (B,N,2)
end_points: dict
xyz_only: boolean, if True only return XYZ channels
Output:
object_point_cloud: TF tensor in shape (B,M,3)
for simplicity we only keep XYZ here
M = NUM_OBJECT_POINT as a hyper-parameter
mask_xyz_mean: TF tensor in shape (B,3)
'''
batch_size = point_cloud.get_shape()[0].value
num_point = point_cloud.get_shape()[1].value
mask = tf.slice(logits, [0, 0, 0], [-1, -1, 1]) < \
tf.slice(logits, [0, 0, 1], [-1, -1, 1])
mask = tf.to_float(mask) # BxNx1
mask_count = tf.tile(tf.reduce_sum(mask, axis=1, keep_dims=True),
[1, 1, 3]) # Bx1x3
point_cloud_xyz = tf.slice(point_cloud, [0, 0, 0], [-1, -1, 3]) # BxNx3
mask_xyz_mean = tf.reduce_sum(tf.tile(mask, [1, 1, 3]) * point_cloud_xyz,
axis=1, keep_dims=True) # Bx1x3
mask = tf.squeeze(mask, axis=[2]) # BxN
end_points['mask'] = mask
mask_xyz_mean = mask_xyz_mean / tf.maximum(mask_count, 1) # Bx1x3
# Translate to masked points' centroid
point_cloud_xyz_stage1 = point_cloud_xyz - \
tf.tile(mask_xyz_mean, [1, num_point, 1])
if xyz_only:
point_cloud_stage1 = point_cloud_xyz_stage1
else:
point_cloud_features = tf.slice(point_cloud, [0, 0, 3], [-1, -1, -1])
point_cloud_stage1 = tf.concat( \
[point_cloud_xyz_stage1, point_cloud_features], axis=-1)
num_channels = point_cloud_stage1.get_shape()[2].value
object_point_cloud, _ = tf_gather_object_pc(point_cloud_stage1,
mask, NUM_OBJECT_POINT)
object_point_cloud.set_shape([batch_size, NUM_OBJECT_POINT, num_channels])
return object_point_cloud, tf.squeeze(mask_xyz_mean, axis=1), end_points
- T-Net and coordinate translation
def get_center_regression_net(object_point_cloud, one_hot_vec,
is_training, bn_decay, end_points):
''' Regression network for center delta. a.k.a. T-Net.
Input:
object_point_cloud: TF tensor in shape (B,M,C)
point clouds in 3D mask coordinate
one_hot_vec: TF tensor in shape (B,3)
length-3 vectors indicating predicted object type
Output:
predicted_center: TF tensor in shape (B,3)
'''
num_point = object_point_cloud.get_shape()[1].value
net = tf.expand_dims(object_point_cloud, 2)
net = tf_util.conv2d(net, 128, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv-reg1-stage1', bn_decay=bn_decay)
net = tf_util.conv2d(net, 128, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv-reg2-stage1', bn_decay=bn_decay)
net = tf_util.conv2d(net, 256, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv-reg3-stage1', bn_decay=bn_decay)
net = tf_util.max_pool2d(net, [num_point, 1],
padding='VALID', scope='maxpool-stage1')
net = tf.squeeze(net, axis=[1, 2])
net = tf.concat([net, one_hot_vec], axis=1)
net = tf_util.fully_connected(net, 256, scope='fc1-stage1', bn=True,
is_training=is_training, bn_decay=bn_decay)
net = tf_util.fully_connected(net, 128, scope='fc2-stage1', bn=True,
is_training=is_training, bn_decay=bn_decay)
predicted_center = tf_util.fully_connected(net, 3, activation_fn=None,
scope='fc3-stage1')
return predicted_center, end_points
- Get object point cloud in object coordinate
object_point_cloud_xyz_new = \
object_point_cloud_xyz - tf.expand_dims(center_delta, 1)
- Amodel Box Estimation PointNet
def get_3d_box_estimation_v1_net(object_point_cloud, one_hot_vec,
is_training, bn_decay, end_points):
''' 3D Box Estimation PointNet v1 network.
Input:
object_point_cloud: TF tensor in shape (B,M,C)
point clouds in object coordinate
one_hot_vec: TF tensor in shape (B,3)
length-3 vectors indicating predicted object type
Output:
output: TF tensor in shape (B,3+NUM_HEADING_BIN*2+NUM_SIZE_CLUSTER*4)
including box centers, heading bin class scores and residuals,
and size cluster scores and residuals
'''
num_point = object_point_cloud.get_shape()[1].value
net = tf.expand_dims(object_point_cloud, 2)
net = tf_util.conv2d(net, 128, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv-reg1', bn_decay=bn_decay)
net = tf_util.conv2d(net, 128, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv-reg2', bn_decay=bn_decay)
net = tf_util.conv2d(net, 256, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv-reg3', bn_decay=bn_decay)
net = tf_util.conv2d(net, 512, [1, 1],
padding='VALID', stride=[1, 1],
bn=True, is_training=is_training,
scope='conv-reg4', bn_decay=bn_decay)
net = tf_util.max_pool2d(net, [num_point, 1],
padding='VALID', scope='maxpool2')
net = tf.squeeze(net, axis=[1, 2])
net = tf.concat([net, one_hot_vec], axis=1)
net = tf_util.fully_connected(net, 512, scope='fc1', bn=True,
is_training=is_training, bn_decay=bn_decay)
net = tf_util.fully_connected(net, 256, scope='fc2', bn=True,
is_training=is_training, bn_decay=bn_decay)
# The first 3 numbers: box center coordinates (cx,cy,cz),
# the next NUM_HEADING_BIN*2: heading bin class scores and bin residuals
# next NUM_SIZE_CLUSTER*4: box cluster scores and residuals
output = tf_util.fully_connected(net,
3 + NUM_HEADING_BIN * 2 + NUM_SIZE_CLUSTER * 4, activation_fn=None, scope='fc3')
return output, end_points
LOSS
def get_loss(mask_label, center_label, \
heading_class_label, heading_residual_label, \
size_class_label, size_residual_label, \
end_points, \
corner_loss_weight=10.0, \
box_loss_weight=1.0):
''' Loss functions for 3D object detection.
Input:
mask_label: TF int32 tensor in shape (B,N)
center_label: TF tensor in shape (B,3)
heading_class_label: TF int32 tensor in shape (B,)
heading_residual_label: TF tensor in shape (B,)
size_class_label: TF tensor int32 in shape (B,)
size_residual_label: TF tensor tensor in shape (B,)
end_points: dict, outputs from our model
corner_loss_weight: float scalar
box_loss_weight: float scalar
Output:
total_loss: TF scalar tensor
the total_loss is also added to the losses collection
'''
# 3D Segmentation loss
mask_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( \
logits=end_points['mask_logits'], labels=mask_label))
tf.summary.scalar('3d mask loss', mask_loss)
# Center regression losses
center_dist = tf.norm(center_label - end_points['center'], axis=-1)
center_loss = huber_loss(center_dist, delta=2.0)
tf.summary.scalar('center loss', center_loss)
stage1_center_dist = tf.norm(center_label - \
end_points['stage1_center'], axis=-1)
stage1_center_loss = huber_loss(stage1_center_dist, delta=1.0)
tf.summary.scalar('stage1 center loss', stage1_center_loss)
# Heading loss
heading_class_loss = tf.reduce_mean( \
tf.nn.sparse_softmax_cross_entropy_with_logits( \
logits=end_points['heading_scores'], labels=heading_class_label))
tf.summary.scalar('heading class loss', heading_class_loss)
hcls_onehot = tf.one_hot(heading_class_label,
depth=NUM_HEADING_BIN,
on_value=1, off_value=0, axis=-1) # BxNUM_HEADING_BIN
heading_residual_normalized_label = \
heading_residual_label / (np.pi / NUM_HEADING_BIN)
heading_residual_normalized_loss = huber_loss(tf.reduce_sum( \
end_points['heading_residuals_normalized'] * tf.to_float(hcls_onehot), axis=1) - \
heading_residual_normalized_label, delta=1.0)
tf.summary.scalar('heading residual normalized loss',
heading_residual_normalized_loss)
# Size loss
size_class_loss = tf.reduce_mean( \
tf.nn.sparse_softmax_cross_entropy_with_logits( \
logits=end_points['size_scores'], labels=size_class_label))
tf.summary.scalar('size class loss', size_class_loss)
scls_onehot = tf.one_hot(size_class_label,
depth=NUM_SIZE_CLUSTER,
on_value=1, off_value=0, axis=-1) # BxNUM_SIZE_CLUSTER
scls_onehot_tiled = tf.tile(tf.expand_dims( \
tf.to_float(scls_onehot), -1), [1, 1, 3]) # BxNUM_SIZE_CLUSTERx3
predicted_size_residual_normalized = tf.reduce_sum( \
end_points['size_residuals_normalized'] * scls_onehot_tiled, axis=[1]) # Bx3
mean_size_arr_expand = tf.expand_dims( \
tf.constant(g_mean_size_arr, dtype=tf.float32), 0) # 1xNUM_SIZE_CLUSTERx3
mean_size_label = tf.reduce_sum( \
scls_onehot_tiled * mean_size_arr_expand, axis=[1]) # Bx3
size_residual_label_normalized = size_residual_label / mean_size_label
size_normalized_dist = tf.norm( \
size_residual_label_normalized - predicted_size_residual_normalized,
axis=-1)
size_residual_normalized_loss = huber_loss(size_normalized_dist, delta=1.0)
tf.summary.scalar('size residual normalized loss',
size_residual_normalized_loss)
# Corner loss
# We select the predicted corners corresponding to the
# GT heading bin and size cluster.
corners_3d = get_box3d_corners(end_points['center'],
end_points['heading_residuals'],
end_points['size_residuals']) # (B,NH,NS,8,3)
gt_mask = tf.tile(tf.expand_dims(hcls_onehot, 2), [1, 1, NUM_SIZE_CLUSTER]) * \
tf.tile(tf.expand_dims(scls_onehot, 1), [1, NUM_HEADING_BIN, 1]) # (B,NH,NS)
corners_3d_pred = tf.reduce_sum( \
tf.to_float(tf.expand_dims(tf.expand_dims(gt_mask, -1), -1)) * corners_3d,
axis=[1, 2]) # (B,8,3)
heading_bin_centers = tf.constant( \
np.arange(0, 2 * np.pi, 2 * np.pi / NUM_HEADING_BIN), dtype=tf.float32) # (NH,)
heading_label = tf.expand_dims(heading_residual_label, 1) + \
tf.expand_dims(heading_bin_centers, 0) # (B,NH)
heading_label = tf.reduce_sum(tf.to_float(hcls_onehot) * heading_label, 1)
mean_sizes = tf.expand_dims( \
tf.constant(g_mean_size_arr, dtype=tf.float32), 0) # (1,NS,3)
size_label = mean_sizes + \
tf.expand_dims(size_residual_label, 1) # (1,NS,3) + (B,1,3) = (B,NS,3)
size_label = tf.reduce_sum( \
tf.expand_dims(tf.to_float(scls_onehot), -1) * size_label, axis=[1]) # (B,3)
corners_3d_gt = get_box3d_corners_helper( \
center_label, heading_label, size_label) # (B,8,3)
corners_3d_gt_flip = get_box3d_corners_helper( \
center_label, heading_label + np.pi, size_label) # (B,8,3)
corners_dist = tf.minimum(tf.norm(corners_3d_pred - corners_3d_gt, axis=-1),
tf.norm(corners_3d_pred - corners_3d_gt_flip, axis=-1))
corners_loss = huber_loss(corners_dist, delta=1.0)
tf.summary.scalar('corners loss', corners_loss)
# Weighted sum of all losses
total_loss = mask_loss + box_loss_weight * (center_loss + \
heading_class_loss + size_class_loss + \
heading_residual_normalized_loss * 20 + \
size_residual_normalized_loss * 20 + \
stage1_center_loss + \
corner_loss_weight * corners_loss)
tf.add_to_collection('losses', total_loss)
return total_loss
- 3D Segmentation Loss
a. 分类用softmax_cross_entropy求loss
b. logits: (B,N,2) labels: (B,N)
# 3D Segmentation loss
mask_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits( \
logits=end_points['mask_logits'], labels=mask_label))
tf.summary.scalar('3d mask loss', mask_loss)
- Center Regression Losses
a. center loss包含两部分:Amodel box estimation后的center loss + T-net后的center loss
b. center loss 用的是回归的huber loss
x = y_true-y_pred
loss = 0.5 * x^2 if |x| <= d
loss = 0.5 * d^2 + d * (|x| - d) if |x| > d
# Center regression losses
center_dist = tf.norm(center_label - end_points['center'], axis=-1)
center_loss = huber_loss(center_dist, delta=2.0)
tf.summary.scalar('center loss', center_loss)
stage1_center_dist = tf.norm(center_label - \
end_points['stage1_center'], axis=-1)
stage1_center_loss = huber_loss(stage1_center_dist, delta=1.0)
tf.summary.scalar('stage1 center loss', stage1_center_loss)
- Heading Loss
a. NUM_HEADING_BIN = 12
b. 分类的loss用 softmax_cross_entropy
c. logits: (B, 12) labels: (B,)
d. 回归的loss用 huber_loss
# Heading loss
heading_class_loss = tf.reduce_mean( \
tf.nn.sparse_softmax_cross_entropy_with_logits( \
logits=end_points['heading_scores'], labels=heading_class_label))
tf.summary.scalar('heading class loss', heading_class_loss)
hcls_onehot = tf.one_hot(heading_class_label,
depth=NUM_HEADING_BIN,
on_value=1, off_value=0, axis=-1) # BxNUM_HEADING_BIN
heading_residual_normalized_label = \
heading_residual_label / (np.pi / NUM_HEADING_BIN)
heading_residual_normalized_loss = huber_loss(tf.reduce_sum( \
end_points['heading_residuals_normalized'] * tf.to_float(hcls_onehot), axis=1) - \
heading_residual_normalized_label, delta=1.0)
tf.summary.scalar('heading residual normalized loss',
heading_residual_normalized_loss)
- Size Loss
a. NUM_SIZE_CLUSTER = 8
b. 分类的loss用 softmax_cross_entropy
c. logits: (B, 8) labels: (B,)
d. 回归的loss用 huber_loss (求与groundtruth的二范数)
# Size loss
size_class_loss = tf.reduce_mean( \
tf.nn.sparse_softmax_cross_entropy_with_logits( \
logits=end_points['size_scores'], labels=size_class_label))
tf.summary.scalar('size class loss', size_class_loss)
scls_onehot = tf.one_hot(size_class_label,
depth=NUM_SIZE_CLUSTER,
on_value=1, off_value=0, axis=-1) # BxNUM_SIZE_CLUSTER
scls_onehot_tiled = tf.tile(tf.expand_dims( \
tf.to_float(scls_onehot), -1), [1, 1, 3]) # BxNUM_SIZE_CLUSTERx3
predicted_size_residual_normalized = tf.reduce_sum( \
end_points['size_residuals_normalized'] * scls_onehot_tiled, axis=[1]) # Bx3
mean_size_arr_expand = tf.expand_dims( \
tf.constant(g_mean_size_arr, dtype=tf.float32), 0) # 1xNUM_SIZE_CLUSTERx3
mean_size_label = tf.reduce_sum( \
scls_onehot_tiled * mean_size_arr_expand, axis=[1]) # Bx3
size_residual_label_normalized = size_residual_label / mean_size_label
size_normalized_dist = tf.norm( \
size_residual_label_normalized - predicted_size_residual_normalized,
axis=-1)
size_residual_normalized_loss = huber_loss(size_normalized_dist, delta=1.0)
tf.summary.scalar('size residual normalized loss',
size_residual_normalized_loss)
- Corner Loss
a. 通过预测的中心,大小,角度计算 corner_3d (B, NH, NS, 8, 3)
b. 计算gt_mask onehot NH*NS (B,NH,NS)
c. corner_3d_pred 只计算预测与groundtruth的NS和NH相同的损失,再对所有每个点类别求和,共8个点
d. 同理计算groundtruth corners_3d_gt,corners_3d_gt_flip
e. corners_3d_gt_flip的朝向角度比corners_3d_gt多180度,其余相同
f. corner的回归也用huber loss
# Corner loss
# We select the predicted corners corresponding to the
# GT heading bin and size cluster.
corners_3d = get_box3d_corners(end_points['center'],
end_points['heading_residuals'],
end_points['size_residuals']) # (B,NH,NS,8,3)
gt_mask = tf.tile(tf.expand_dims(hcls_onehot, 2), [1, 1, NUM_SIZE_CLUSTER]) * \
tf.tile(tf.expand_dims(scls_onehot, 1), [1, NUM_HEADING_BIN, 1]) # (B,NH,NS)
corners_3d_pred = tf.reduce_sum( \
tf.to_float(tf.expand_dims(tf.expand_dims(gt_mask, -1), -1)) * corners_3d,
axis=[1, 2]) # (B,8,3)
heading_bin_centers = tf.constant( \
np.arange(0, 2 * np.pi, 2 * np.pi / NUM_HEADING_BIN), dtype=tf.float32) # (NH,)
heading_label = tf.expand_dims(heading_residual_label, 1) + \
tf.expand_dims(heading_bin_centers, 0) # (B,NH)
heading_label = tf.reduce_sum(tf.to_float(hcls_onehot) * heading_label, 1)
mean_sizes = tf.expand_dims( \
tf.constant(g_mean_size_arr, dtype=tf.float32), 0) # (1,NS,3)
size_label = mean_sizes + \
tf.expand_dims(size_residual_label, 1) # (1,NS,3) + (B,1,3) = (B,NS,3)
size_label = tf.reduce_sum( \
tf.expand_dims(tf.to_float(scls_onehot), -1) * size_label, axis=[1]) # (B,3)
corners_3d_gt = get_box3d_corners_helper( \
center_label, heading_label, size_label) # (B,8,3)
corners_3d_gt_flip = get_box3d_corners_helper( \
center_label, heading_label + np.pi, size_label) # (B,8,3)
corners_dist = tf.minimum(tf.norm(corners_3d_pred - corners_3d_gt, axis=-1),
tf.norm(corners_3d_pred - corners_3d_gt_flip, axis=-1))
corners_loss = huber_loss(corners_dist, delta=1.0)
tf.summary.scalar('corners loss', corners_loss)
- Weighted sum of all losses
total_loss = mask_loss + box_loss_weight * (center_loss + \
heading_class_loss + size_class_loss + \
heading_residual_normalized_loss * 20 + \
size_residual_normalized_loss * 20 + \
stage1_center_loss + \
corner_loss_weight * corners_loss)