本节主要介绍 V o x e l N e t VoxelNet VoxelNet的数据处理部分,整体训练和论文解读请移步这里。
其他三维目标检测算法的数据处理,特别是基于KITTI的三维目标检测,都可以参考这些处理方式,具有一定的通用性。
先上整体代码,大致思路:
class KittiDataset(data.Dataset):
def __init__(self, cfg, root='./KITTI',set='train',type='velodyne_train'):
self.type = type
self.root = root
self.data_path = os.path.join(root, 'training')
self.lidar_path = os.path.join(self.data_path, "crop")
self.image_path = os.path.join(self.data_path, "image_3/")
self.calib_path = os.path.join(self.data_path, "calib")
self.label_path = os.path.join(self.data_path, "label_2")
with open(os.path.join(self.data_path, '%s.txt' % set)) as f:
self.file_list = f.read().splitlines()
self.T = cfg.T
self.vd = cfg.vd
self.vh = cfg.vh
self.vw = cfg.vw
self.xrange = cfg.xrange
self.yrange = cfg.yrange
self.zrange = cfg.zrange
self.anchors = torch.tensor(cfg.anchors.reshape(-1,7)).float().to(cfg.device)
self.anchors_xylwr = self.anchors[..., [0, 1, 5, 4, 6]].contiguous()
self.feature_map_shape = (int(cfg.H / 2), int(cfg.W / 2))
self.anchors_per_position = cfg.anchors_per_position
self.pos_threshold = cfg.pos_threshold
self.neg_threshold = cfg.neg_threshold
def __getitem__(self, i):
lidar_file = self.lidar_path + '/' + self.file_list[i] + '.bin'
calib_file = self.calib_path + '/' + self.file_list[i] + '.txt'
label_file = self.label_path + '/' + self.file_list[i] + '.txt'
image_file = self.image_path + '/' + self.file_list[i] + '.png'
calib = utils.load_kitti_calib(calib_file)
Tr = calib['Tr_velo2cam']
gt_box3d_corner, gt_box3d = utils.load_kitti_label(label_file, Tr)
lidar = np.fromfile(lidar_file, dtype=np.float32).reshape(-1, 4)
image = cv2.imread(image_file)
lidar, gt_box3d_corner, gt_box3d = utils.get_filtered_lidar(lidar, gt_box3d_corner, gt_box3d)
voxel_features, voxel_coords = self.preprocess(lidar)
return voxel_features, voxel_coords, gt_box3d_corner, gt_box3d, image, calib, self.file_list[i]
几个基础数据是直接加载进来的,有点难懂的是voxel_features, voxel_coords
这部分
先上代码:
def preprocess(self, lidar):
np.random.shuffle(lidar)
voxel_coords = ((lidar[:, :3] - np.array([self.xrange[0], self.yrange[0], self.zrange[0]])) / (
self.vw, self.vh, self.vd)).astype(np.int32)
voxel_coords = voxel_coords[:,[2,1,0]]
voxel_coords, inv_ind, voxel_counts = np.unique(voxel_coords, axis=0, \
return_inverse=True, return_counts=True)
voxel_features = []
for i in range(len(voxel_coords)):
voxel = np.zeros((self.T, 7), dtype=np.float32)
pts = lidar[inv_ind == i]
if voxel_counts[i] > self.T:
pts = pts[:self.T, :]
voxel_counts[i] = self.T
voxel[:pts.shape[0], :] = np.concatenate((pts, pts[:, :3] - np.mean(pts[:, :3], 0)), axis=1)
voxel_features.append(voxel)
return np.array(voxel_features), voxel_coords
大致思路:
voxel_coords
这个体素坐标,按照xyz的顺序排好注意:
c,s=np.unique(b,return_index=True)
return_index=True表示返回新列表元素在旧列表中的位置,并以列表形式储存在s中。