深度学习——3D Fully Convolutional Network for Vehicle Detection in Point Cloud模型实现

1. 参考文献

3D Fully Convolutional Network for Vehicle Detection in Point Cloud

2. 模型实现

'''
Baidu Inc. 

Ref: 
3D Fully Convolutional Network for Vehicle Detection in Point Cloud

Author: HSW 
Date: 2018-05-02 
'''


import sys
import numpy as np 
import tensorflow as tf 
from prepare_data2 import * 
from baidu_cnn_3d import * 

KITTI_TRAIN_DATA_CNT = 7481
KITTI_TEST_DATA_CNT  = 7518


# create 3D-CNN Model
def create_graph(sess, modelType = 0, voxel_shape = (400, 400, 20),  activation=tf.nn.relu, is_train = True): 
	'''
	Inputs: 
		sess: tensorflow Session Object 
		voxel_shape: voxel shape for network first layer 
		activation: 
		phrase_train: 
	Outputs: 
		voxel, graph, sess 
	'''
	voxel = tf.placeholder(tf.float32, [None, voxel_shape[0], voxel_shape[1], voxel_shape[2], 1])

	phase_train = tf.placeholder(tf.bool, name="phase_train") if is_train else None 
	
	with tf.variable_scope("3D_CNN_Model") as scope: 
		model = Full_CNN_3D_Model()
		
		model.cnn3d_graph(voxel, modelType = modelType, activation=activation, phase_train = is_train)
		
	if is_train: 
		initialized_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="3D_CNN_model")
		sess.run(tf.variables_initializer(initialized_var))
	
	return voxel, model, phase_train


# read batch data 
def read_batch_data(batch_size, data_set_dir,objectType = "Car", split = "training", resolution=(0.2, 0.2, 0.2), scale=0.25, limitX = (0,80), limitY=(-40,40), limitZ=(-2.5,1.5)): 
	'''
	Inputs: 
		batch_size: 
		data_set_dir: 
		objectType: default is "Car"
		split: default is "training"
		resolution: 
		scale: outputSize / inputSize 
		limitX: 
		limitY: 
		limitZ: 
	Outputs: 
		
	'''
	kitti_3DVoxel = kitti_3DVoxel_interface(data_set_dir, objectType = objectType, split=split, scale = scale, resolution = resolution, limitX = limitX, limitY = limitY, limitZ = limitZ)
	
	TRAIN_PROCESSED_IDX  = 0
	TEST_PROCESSED_IDX   = 0
	
	if split == "training": 
		while TRAIN_PROCESSED_IDX < KITTI_TRAIN_DATA_CNT: 
			batch_voxel = []
			batch_g_obj = []
			batch_g_cord = []
			
			idx = 0 
			while idx < batch_size and TRAIN_PROCESSED_IDX < KITTI_TRAIN_DATA_CNT: 	
				
				print(TRAIN_PROCESSED_IDX)
				voxel, g_obj, g_cord = kitti_3DVoxel.read_kitti_data(TRAIN_PROCESSED_IDX)
				TRAIN_PROCESSED_IDX += 1
				
				if voxel is None:
					continue
				
				idx += 1 
				
				# print(voxel.shape)
				batch_voxel.append(voxel)
				batch_g_obj.append(g_obj)
				batch_g_cord.append(g_cord)
			
			yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis], np.array(batch_g_obj, dtype=np.float32), np.array(batch_g_cord, dtype=np.float32)
	
	elif split == "testing": 	
		while TEST_PROCESSED_IDX < KITTI_TEST_DATA_CNT: 
			batch_voxel = []
			
			idx = 0
			while idx < batch_size and TEST_PROCESSED_IDX < KITTI_TEST_DATA_CNT: 
			
				voxel = kitti_3DVoxel.read_kitti_data(iter * batch_size + idx)
				TEST_PROCESSED_IDX += 1
				
				if voxel is None: 
					continue
				
				idx += 1	
				batch_voxel.append(voxel)
			
			yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis]
		
	

# train 3D-CNN Model 
def train(batch_num, data_set_dir, modelType = 0, objectType = "Car", resolution=(0.2,0.2,0.2), scale = 0.25, lr=0.01, limitX=(0,80), limitY=(-40,40), limitZ=(-2.5,1.5), epoch=101): 
	'''
	Inputs: 
		batch_num: 
		data_set_dir: 
		modelType: 
		objectType: 
		resolution: 
		scale: 
		lr: 
		limitX, limitY, limitZ: 
	Outputs: 
		None
	'''
	batch_size      = batch_num
	training_epochs = epoch
	sizeX = int(round((limitX[1] - limitX[0]) / resolution[0]))
	sizeY = int(round((limitY[1] - limitY[0]) / resolution[0]))
	sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[0]))
	voxel_shape = (sizeX, sizeY, sizeZ)
	with tf.Session() as sess: 
		voxel, model, phase_train = create_graph(sess, modelType = modelType, voxel_shape = voxel_shape, activation=tf.nn.relu, is_train = True)
		saver = tf.train.Saver()
		
		total_loss, obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_obj, g_cord, y_pred = model.loss_Fun(lossType = 0, cord_loss_weight = 0.02)
		
		optimizer = model.create_optimizer(total_loss, optType = "Adam", learnRate = 0.001)
		
		init = tf.global_variables_initializer()
		
		sess.run(init)
		
		for epoch in range(training_epochs): 
			batchCnt = 0; 
			for (batch_voxel, batch_g_obj, batch_g_cord) in read_batch_data(batch_size, data_set_dir, objectType = objectType, split = "training", resolution = resolution, scale = scale, limitX = limitX, limitY = limitY, limitZ = limitZ): 
				# print("batch_g_obj")
				# print(batch_g_obj.shape)
				sess.run(optimizer, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
				cord_cost = sess.run(cord_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
				obj_cost = sess.run(is_obj_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
				non_obj_cost = sess.run(non_obj_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
				
				print("Epoch: ",  (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "cord_cost = ", "{:.9f}".format(cord_cost))
				print("Epoch: ",  (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "obj_cost = ", "{:.9f}".format(obj_cost))
				print("Epoch: ",  (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "non_obj_cost = ", "{:.9f}".format(non_obj_cost))
				
				batchCnt += 1
				
			if (epoch > 0) and (epoch % 10 == 0): 
				saver.save(sess, "velodyne_kitti_train_" + str(epoch) + ".ckpt")
				
		print("Training Finishied !")


# test 3D-CNN Model
def test(batch_num, data_set_dir, modelType = 0, objectType = "Car", resolution=(0.2, 0.2, 0.2), scale = 0.25, limitX = (0, 80), limitY = (-40, 40), limitZ=(-2.5, 1.5)): 
	'''
	Inputs: 
		batch_num: 
		data_set_dir: 
		resolution: 
		scale:
		limitX, limitY, limitZ:  
	Outputs: 
		None 
	'''
	sizeX = int(round((limitX[1] - limitX[0]) / resolution[0]))
	sizeY = int(round((limitY[1] - limitY[0]) / resolution[0]))
	sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[0]))
	voxel_shape = (sizeX, sizeY, sizeZ)
	batch_size = batch_num; 
	
	batch_voxel = read_batch_data(batch_num, data_set_dir, objectType = objectType, split="Testing", resolution=resolution, scale=scale, limitX=limitX, limitY=limitY, limitZ=limitZ)
	
	batch_voxel_x = batch_voxel.reshape(1, batch_voxel.shape[0], batch_voxel.shape[1], batch_voxel.shape[2], 1)
	
	with tf.Session() as sess: 
		is_train = False
		voxel, model, phase_train = create_graph(sess, modelType = modelType, voxel_shape = voxel_shape, activation=tf.nn.relu, is_train = False)
		new_saver = tf.train.import_meta_graph("velodyne_kitti_train_40.ckpt.meta")
		last_model = "./velodyne_kitti_train_40.ckpt"
		saver.restore(sess, last_model)
		
		objectness = model.objectness
		cordinate  = model.cordinate
		y_pred     = model.y 
		
		objectness = sess.run(objectness, feed_dict={voxel: batch_voxel_x})[0, :, :, :, 0]
		cordinate  = sess.run(cordinate, feed_dict={voxel:batch_voxel_x})[0]
		y_pred     = sess.run(y_pred, feed_dict={voxel: batch_voxel_x})[0, :, :, :, 0]
		
		idx        = np.where(y_pred >= 0.995)
		
		spheres    = np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()
		
		centers    = spheres_to_centers(spheres, scale = scale, resolution=resolution, limitX = limitX, limitY = limitY, limitZ = limitZ)
		
		corners    = cordinate[idx].reshape[-1, 8, 3] + centers[:, np.newaxis]
		
		print(centers)
		print(corners)
		

if __name__ == "__main__":
	
	batch_num       = 3
	data_set_dir    = "/home/hsw/桌面/PCL_API_Doc/frustum-pointnets-master/dataset"
	modelType       = 1
	objectType      = "Car"
	resolution      = (0.2, 0.2, 0.2)
	scale           = 0.25 
	lr              = 0.001
	limitX          = (0, 80)
	limitY          = (-40, 40)
	limitZ          = (-2.5, 1.5)       
	epoch           = 101 
	train(batch_num, data_set_dir = data_set_dir, modelType = modelType, objectType = objectType, resolution=resolution, scale=scale, lr =lr, limitX = limitX, limitY = limitY, limitZ = limitZ)
	saver = tf.train.Saver()
	

				

2.1 网络模型
'''
Baidu Inc. 

Ref: 
3D Fully Convolutional Network for Vehicle Detection in Point Cloud

Author: HSW 
Date: 2018-05-02 
'''

import numpy as np 
import tensorflow as tf 

class Full_CNN_3D_Model(object): 
	'''
		Define Full CNN Model
	'''
	
	def __init__(self): 
		pass; 
		
	def cnn3d_graph(self, voxel, modelType = 0, activation = tf.nn.relu, phase_train = True): 
		if modelType == 0: 
			# Modefied 3D-CNN, 该网络结构不可使用,因为降采样太严重(降采样1/8)导致在预测时会出现较大误差 
			self.layer1     = self.conv3d_layer(voxel      ,  1, 16, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, phase_train=phase_train)
						
			self.layer2     = self.conv3d_layer(self.layer1, 16, 32, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, phase_train=phase_train)
			
			self.layer3     = self.conv3d_layer(self.layer2, 32, 64, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, phase_train=phase_train)
			
			self.layer4     = self.conv3d_layer(self.layer3, 64, 64, 3, 3, 3, [1, 1, 1, 1, 1], name="layer4", activation=activation, phase_train=phase_train)
			
			self.objectness = self.conv3D_to_output(self.layer4, 64, 2, 3, 3, 3, [1, 1, 1, 1, 1], name="objectness", activation=None)
			
			self.cordinate  = self.conv3D_to_output(self.layer4, 64, 24, 3, 3, 3, [1, 1, 1, 1, 1], name="cordinate", activation=None)
			
			self.y          = tf.nn.softmax(self.objectness, dim=-1)
			
		elif modelType == 1: 
			# 3D-CNN(论文网络结构: 降采样1/4,即InputSize / OutputSize = 0.25)
			self.layer1       = self.conv3d_layer(voxel      ,  1, 10, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, phase_train=phase_train)
			
			self.layer2       = self.conv3d_layer(self.layer1, 10, 20, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, phase_train=phase_train)
			
			self.layer3       = self.conv3d_layer(self.layer2, 20, 30, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, phase_train=phase_train)
			
			base_shape        = self.layer2.get_shape().as_list()
			
			obj_output_shape  = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 2]
			
			cord_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 24]
			
			self.objectness   = self.deconv3D_to_output(self.layer3, 30, 2, 3, 3, 3, [1, 2, 2, 2, 1], obj_output_shape, name="objectness", activation=None)
			
			self.cordinate    = self.deconv3D_to_output(self.layer3, 30, 24, 3, 3, 3, [1, 2, 2, 2, 1], cord_output_shape, name="cordinate", activation=None)
			
			self.y            = tf.nn.softmax(self.objectness, dim=-1)
		
	# batch Normalize 
	def batch_norm(self, inputs, phase_train = True, decay = 0.9, eps = 1e-5): 
		'''
		Inputs: 
			inputs: input data for last layer 
			phase_train: True / False, = True is train, = False is Test 
		Outputs: 
			norm data for next layer 
		'''
		gamma = tf.get_variable("gamma", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
		beta = tf.get_variable("beta", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
		pop_mean = tf.get_variable("pop_mean", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
		pop_var = tf.get_variable("pop_var", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
		axes = range(len(inputs.get_shape()) - 1)
		if phase_train == True:
			batch_mean, batch_var = tf.nn.moments(inputs, axes = [0, 1, 2, 3])
			train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean*(1 - decay))
			train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
			with tf.control_dependencies([train_mean, train_var]):
				return tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, eps)
		else: 
			return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, gamma, eps)
			
		
	# 3D Conv Layer 
	def conv3d_layer(self, inputs, inputs_dims, outputs_dims, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name="", phase_train = True): 
		'''
		Inputs: 
			inputs: pre-Layer output 
			inputs_dims: pre-Layer output channels 
			outputs_dims: cur-Layer output channels 
			[length, height, width]: cur-Layer conv3d kernel size 
			stride: conv3d kernel move step in length/height/width axis
			activation: default use relu activation function 
			padding: conv3d 'padding' parameter 
		Outputs: 
			3D Conv. Layer outputs 	
		'''
		with tf.variable_scope("conv3D" + name): 
			# conv3d layer kernel 
			kernel = tf.get_variable("weights", shape=[length, height, width, inputs_dims, outputs_dims], dtype = tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))
			
			# conv3d layer bias 
			bias = tf.get_variable("bias", shape=[outputs_dims], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
		
			# conv3d 
			conv = tf.nn.conv3d(inputs, kernel, stride, padding=padding)
			
			bias = tf.nn.bias_add(conv, bias)
			
			if activation:
				bias = activation(bias, name="activation")
				
			bias = self.batch_norm(bias, phase_train)
			
		return bias
	
		
		
	# 3D Conv to Classification Layer 
	def conv3D_to_output(self, inputs, inputs_dims, outputs_dims, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name="", phase_train = True): 
		'''
		Inputs: 
			inputs: pre-Layer outputs 
			inputs_dims: pre-Layer output channels 
			outputs_dims: cur-Layer output channels 
			stride: conv3d kernel move step in length/height/width axis
			activation: default use relu activation function 
			padding: conv3d 'padding' parameter 
			outputs_shape: de-conv outputs shape  
		Outputs: 
			conv outputs 
		'''
		with tf.variable_scope("conv3D" + name):
			kernel = tf.get_variable("weights", shape=[length, height, width, inputs_dims, outputs_dims], dtype=tf.float32, initializer=tf.constant_initializer(0.01))
			conv = tf.nn.conv3d(inputs, kernel, stride, padding=padding)
		return conv 
		
		
	# 3D Deconv. to Classification Layer 
	def deconv3D_to_output(self, inputs, inputs_dims, outputs_dims, height, width, length, stride, output_shape, activation=tf.nn.relu, padding="SAME", name="", phase_train = True): 
		'''
		Inputs: 
			inputs: pre-Layer outputs 
			inputs_dims: pre-Layer output channels 
			outputs_dims: cur-Layer output channels 
			stride: conv3d kernel move step in length/height/width axis
			activation: default use relu activation function 
			padding: conv3d 'padding' parameter 
			outputs_shape: de-conv outputs shape  
		Outputs: 
			de-conv outputs 
		'''
		with tf.variable_scope("deconv3D"+name):
			kernel = tf.get_variable("weights", shape=[length, height, width, outputs_dims, inputs_dims], dtype=tf.float32, initializer=tf.constant_initializer(0.01))
			deconv = tf.nn.conv3d_transpose(inputs, kernel, output_shape, stride, padding="SAME")
		
		return deconv 
		
	# define loss 
	def loss_Fun(self, lossType = 0, cord_loss_weight = 0.02): 
		'''
		Inputs: 
			lossType:  =  for difference loss Type 
			cord_loss_weight: 0.02 
		Outputs: 			
		'''
		if lossType == 0: 
			# print("g_obj")
			# print(self.cordinate.get_shape())
			g_obj = tf.placeholder(tf.float32, self.cordinate.get_shape().as_list()[:4])
			g_cord = tf.placeholder(tf.float32, self.cordinate.get_shape().as_list())
			non_g_obj = tf.subtract(tf.ones_like(g_obj, dtype=tf.float32), g_obj )
				
			elosion = 0.00001
			y = self.y 
				
			is_obj_loss = -tf.reduce_sum(tf.multiply(g_obj , tf.log(y[:,:,:,:,0] + elosion)))         # object loss 
			non_obj_loss = tf.reduce_sum(tf.multiply(non_g_obj, tf.log(y[:, :, :, :, 0] + elosion)))  # non-object loss 
			
			cross_entropy = tf.add(is_obj_loss, non_obj_loss)
			obj_loss = cross_entropy
				
			cord_diff = tf.multiply(g_obj , tf.reduce_sum(tf.square(tf.subtract(self.cordinate, g_cord)), 4)) # cord loss 
			cord_loss = tf.multiply(tf.reduce_sum(cord_diff), cord_loss_weight)
				
			return tf.add(obj_loss, cord_loss), obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_obj, g_cord, y 
	
	# Create Optimizer 
	def create_optimizer(self, all_loss, optType = "Adam", learnRate = 0.001): 
		'''
		Inputs: 
			all_loss: graph all_loss 
			lr: learn rate 
		Outputs: 
			optimizer 
		'''
		if optType == "Adam": 
			opt = tf.train.AdamOptimizer(learnRate)
			optimizer = opt.minimize(all_loss)
		
		return optimizer

2.2  数据预处理

'''Prepase KITTI data for 3D Object detection
Ref: 3D Fully Convolutional Network for Vehicle Detection in Point Cloud

Author: Shiwen He 
Date: 28 April 2018 

'''

import numpy as np 
from kitti_object import kitti_object as kittiReader
import kitti_util   

# lidar data => 3D Grid Voxel 

# filter lidar data by camera FoV 
def filter_camera_fov(pc): 
	'''
	Inputs: 
		pc: n x 3 
	Outputs: 
		filter_pc: m x 3, m <= 3 
	Notices: 
		FoV: from -45 degree to 45 degree  
	'''
	logic_fov = np.logical_and((pc[:, 1] < pc[:, 0] - 0.27), (-pc[:, 1] < pc[:, 0] - 0.27))

	filter_pc = pc[logic_fov]
	
	return filter_pc 
	

# filter lidar data by detection range 
def filter_lidar_range(pc, limitX, limitY, limitZ):
	''' 
	Inputs: 
		pc: n x 3, 
		limitX, limitY, limitZ: 1 x 2
	Outputs: 
		filter_pc: m x 3, m <= n  
	'''
	logic_x = np.logical_and(pc[:, 0] >= limitX[0], pc[:, 0] < limitX[1])
	logic_y = np.logical_and(pc[:, 1] >= limitY[0], pc[:, 1] < limitY[1])
	logic_z = np.logical_and(pc[:, 2] >= limitZ[0], pc[:, 2] < limitZ[1])
	logic_xyz = np.logical_and(logic_x, np.logical_and(logic_y, logic_z))
	
	filter_pc = pc[:, :3][logic_xyz]
	
	return  filter_pc
	
	
# filter center + corners 
def filter_center_corners(centers, corners, boxsizes, limitX, limitY, limitZ): 
	'''
	Inputs: 
		centers: n x 3 
		corners: n x 8 x 3 
		limitX, limitY, limitZ: 1 x 2
	Outputs: 
		filter_centers: m x 3, m <= n 
		filter_corners: m x 3, m <= n  
	'''
	
	logic_x = np.logical_and(centers[:, 0] >= limitX[0], centers[:, 0] < limitX[1])
	logic_y = np.logical_and(centers[:, 1] >= limitY[0], centers[:, 1] < limitY[1])
	logic_z = np.logical_and(centers[:, 2] >= limitZ[0], centers[:, 2] < limitZ[1])
	logic_xyz = np.logical_and(logic_x, np.logical_and(logic_y, logic_z))
	
	filter_centers_1 = centers[logic_xyz, :]
	filter_corners_1 = corners[logic_xyz, :, :]
	filter_boxsizes_1 = boxsizes[logic_xyz, :]
	
	shape_centers = filter_centers_1.shape; 
	
	filter_centers = np.zeros([shape_centers[0], 3])
	filter_corners = np.zeros([shape_centers[0], 8, 3]); 
	filter_boxsizes = np.zeros([shape_centers[0], 3]); 
	
	idx = 0
	for idx2 in range(shape_centers[0]): 
		logic_x = np.logical_and(filter_corners_1[idx2, :, 0] >= limitX[0], filter_corners_1[idx2, :, 0] < limitX[1])
		logic_y = np.logical_and(filter_corners_1[idx2, :, 1] >= limitY[0], filter_corners_1[idx2, :, 1] < limitY[1])
		logic_z = np.logical_and(filter_corners_1[idx2, :, 2] >= limitZ[0], filter_corners_1[idx2, :, 2] < limitZ[1])
		logic_xyz = np.logical_and(logic_x, np.logical_and(logic_y, logic_z))

		if logic_xyz.all(): 
			filter_centers[idx, :3] = filter_centers_1[idx2, :]
			filter_corners[idx, :8, :3] = filter_corners_1[idx2, :, :] 
			filter_boxsizes[idx, :3] = filter_boxsizes_1[idx2, :]
			idx += 1		
	if idx > 0:
		return filter_centers[:idx, :], filter_corners[:idx, :, :], filter_boxsizes[:idx, :]
	else:
		return None, None, None

def filter_label(object3Ds, objectType = 'Car'): 
	'''
	Inputs: 
		object3Ds:
		objectType:  
	Outputs: 
		centers, corners, rotatey 
		
	'''
	idx = 0
	data = np.zeros([50, 7]).astype(np.float32)
	for iter in object3Ds: 
		if iter.type == "DontCare": 
			continue;
					
		if iter.type == objectType: 
			# position 
			data[idx, 0] = iter.t[0]
			data[idx, 1] = iter.t[1]
			data[idx, 2] = iter.t[2]
			# size 
			data[idx, 3] = iter.h
			data[idx, 4] = iter.w
			data[idx, 5] = iter.l 
			# rotate 
			data[idx, 6] = iter.ry
			idx += 1         
	if idx > 0:
		return data[:idx, :3], data[:idx, 3:6], data[:idx, 6]
	else:
		return None, None, None
		
		
def proj_to_velo(calib_data):
	"""
	Inputs: 
		calib_data: 
	Outputs: 
		project matrix: from camera cordination to velodyne cordination
	"""
 
	rect            = calib_data.R0; # calib_data["R0_rect"].reshape(3, 3)
	velo_to_cam     = calib_data.V2C; # calib_data["Tr_velo_to_cam"].reshape(3, 4)
	inv_rect        = np.linalg.inv(rect)
	inv_velo_to_cam = np.linalg.pinv(velo_to_cam[:, :3])
	
	return np.dot(inv_velo_to_cam, inv_rect)


# corners_3d
def compute_3d_corners(centers, sizes, rotates): 
	''' 
	Inputs: 
			centers: 
			rotates: 
			sizes: 
	Outputs: 
			corners_3d:  n x 8 x 3 array in Lidar coord.
	'''
	
	# print(centers) 
	
	corners = []
	
	for place, rotate, sz in zip(centers, rotates, sizes):
		x, y, z = place
		h, w, l = sz
		if l > 10:
			continue

		corner = np.array([
			[x - l / 2., y - w / 2., z],
			[x + l / 2., y - w / 2., z],
			[x - l / 2., y + w / 2., z],
			[x - l / 2., y - w / 2., z + h],
			[x - l / 2., y + w / 2., z + h],
			[x + l / 2., y + w / 2., z],
			[x + l / 2., y - w / 2., z + h],
			[x + l / 2., y + w / 2., z + h],
		])

		corner -= np.array([x, y, z])

		rotate_matrix = np.array([
			[np.cos(rotate), -np.sin(rotate), 0],
			[np.sin(rotate), np.cos(rotate), 0],
			[0, 0, 1]
		])

		a = np.dot(corner, rotate_matrix.transpose())
		a += np.array([x, y, z])
		corners.append(a)
		
	corners_3d  = np.array(corners)   
	
	return corners_3d
	
# lidar data to 3D Grid Voxel 
def lidar_to_binary_voxel(pc, resolution, limitX, limitY, limitZ):
	''' 
	Inputs: 
		pc: n x 3, 
		resolution: 1 x 3, 
		limitX, limitY, limitZ: 1 x 2
	Outputs: 
		voxel: shape is inputSize  
	'''
	
	voxel_pc = np.zeros_like(pc).astype(np.int32)
	 
	# Compute PointCloud Position in 3D Grid 
	voxel_pc[:, 0] = ((pc[:, 0] - limitX[0]) / resolution[0]).astype(np.int32)
	voxel_pc[:, 1] = ((pc[:, 1] - limitY[0]) / resolution[1]).astype(np.int32)
	voxel_pc[:, 2] = ((pc[:, 2] - limitZ[0]) / resolution[2]).astype(np.int32)
	
	# 3D Grid 
	voxel = np.zeros((int(round(limitX[1] - limitX[0]) / resolution[0]), int(round(limitY[1] - limitY[0]) / resolution[1]), \
	 int(round((limitZ[1] - limitZ[0]) / resolution[2])))) 
	
	# 3D Grid Value 
	voxel[voxel_pc[:, 0], voxel_pc[:, 1], voxel_pc[:, 2]] = 1
	
	return voxel 


# label center to 3D Grid Voxel Center(sphere) 
def center_to_sphere(centers, boxsize, scale, resolution, limitX, limitY, limitZ):
	''' 
	Inputs: 
		center:  n x 3 
		boxsize: n x 3
		scale: 1 x 1, = outputSize / inputSize
		resolution: 1 x 3
		limitX, limitY, limitZ: 1 x 2
	Outputs: 
		spheres: m x 3, m <= n  
	'''
	
	# from 3D Box's bottom center => 3D center 
	move_center = centers.copy(); 	

	print("centers")
	print(centers)
	print("boxsize")
	print(boxsize)
	
	move_center[:, 2] = centers[:, 2] + boxsize[:, 0] / 2;  

	
	# compute Label Center PointCloud Position in 3D Grid 
	spheres = np.zeros_like(move_center).astype(np.int32)
	
	spheres[:, 0] = ((move_center[:, 0] - limitX[0]) / resolution[0] * scale).astype(np.int32)
	spheres[:, 1] = ((move_center[:, 1] - limitY[0]) / resolution[1] * scale).astype(np.int32)
	spheres[:, 2] = ((move_center[:, 2] - limitZ[0]) / resolution[2] * scale).astype(np.int32)
	

	print("move_center")
	print(move_center)
	print("spheres")
	print(spheres)
	
	return spheres


# 3D Grid Voxel Center(sphere) to label center 
def sphere_to_center(spheres, scale, resolution, limitX, limitY, limitZ): 
	'''
	Inputs: 
		spheres: n x 3 
		scale: 1 x 1, = outputSize /  inputSize 
		resolution: 1 x 3
		limitX, limitY, limitZ: 1 x 2 
	Outputs: 
		centers: m x 3, m <= 3 
	'''
	centers = np.zeros_like(spheres).astype(np.float32); 
	
	centers[:, 0] = spheres[:, 0] * resolution[0] / scale + limitX[0]
	centers[:, 1] = spheres[:, 1] * resolution[1] / scale + limitY[0]
	centers[:, 2] = spheres[:, 2] * resolution[2] / scale + limitZ[0]
	
	return centers

# label corners to 3D Grid Voxel: corners - centers   
def corners_to_train(spheres, corners, scale, resolution, limitX, limitY, limitZ):
	'''
	Inputs: 
		spheres: n x 3
		corners: n x 8 x 3 
		scale: 1 x 1, = outputSize / inputSize
		resolution: 1 x 3
		limitX, limitY, limitZ: 1 x 2 
	Outputs: 
		train_corners: m x 3, m <= n 
	'''
	
	# 3D Grid Voxel Center => label center 
	centers = sphere_to_center(spheres, scale, resolution, limitX, limitY, limitZ)
	train_corners = np.zeros_like(corners).astype(np.float32)
	
	# train corners for regression loss 
	for index, (corner, center) in enumerate(zip(corners, centers)):
		train_corners[index] = corner - center  
	
	return train_corners
	
		
	
	
# create center and cordination for train 
def create_train_label(centers, corners, boxsize, scale, resolution, limitX, limitY, limitZ):
	'''
	Inputs: 
		centers: n x 3 
		corners: n x 8 x 3 
		boxsize: n x 3 
		scale: 1 x 1, outputSize / inputSize
		resolution: 1 x 3 
		limitX. limitY, limitZ: 1 x 2 
	Outputs: 
		train_centers: m x 3, m <= n 
		train_corners: m x 3, m <= n 
	'''
	
	train_centers = center_to_sphere(centers, boxsize, scale, resolution, limitX, limitY, limitZ)
	train_corners = corners_to_train(train_centers, corners, scale, resolution, limitX, limitY, limitZ)
	
	return train_centers, train_corners 
	

def create_obj_map(train_centers, scale, resolution, limitX, limitY, limitZ):
	'''
	Inputs: 
		centers: n x 3 
		scale: 1 x 1, outputSize / inputSize
		resolution: 1 x 3
		limitX, limitY, limitZ: 1 x 2
	Outputs: 
		obj_map: shape is scale * inputSize 
	'''
	
	# 3D Grid 
	sizeX = int(round((limitX[1] - limitX[0]) / resolution[0] * scale))
	sizeY = int(round((limitY[1] - limitY[0]) / resolution[1] * scale))
	sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[2] * scale))
	obj_map = np.zeros([sizeX, sizeY, sizeZ]) 
	
	# print("sizeX, sizeY, sizeZ")
	# print(sizeX, sizeY, sizeZ)
	
	# objectness map: label center in objectness map where value is 1  
	obj_map[train_centers[:,0], train_centers[:, 1], train_centers[:, 2]] = 1; 
	
	return obj_map  


def create_cord_map(train_centers, train_corners, scale, resolution, limitX, limitY, limitZ):
	'''
	Inputs: 
		train_centers: n x 3 
		train_corners: n x 8 x 3 
		scale: 1 x 1, outputSize / inputSize
		resolution: 1 x 3 
		limitX, limitY, limitZ: 1 x 2
	Outputs: 
		cord_map: shape is inputSize * scale   
	''' 
	# reshape train_corners:  n x 8 x 3 => n x 24 
	corners = train_corners.reshape(train_corners.shape[0], -1) 
	# 3D Grid 
	sizeX = int(round((limitX[1] - limitX[0]) / resolution[0] * scale))
	sizeY = int(round((limitY[1] - limitY[0]) / resolution[1] * scale))
	sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[2] * scale))
	sizeD = 24
	
	cord_map = np.zeros([sizeX, sizeY, sizeZ, sizeD]) 
	
	# print(train_centers)
	
	cord_map[train_centers[:,0], train_centers[:, 1], train_centers[:, 2]] = corners
	
	return cord_map 
	

# kitti data interface:  
class kitti_3DVoxel_interface(object): 
	
	def __init__(self, root_dir, objectType = 'Car', split='training', scale = 0.25, resolution = (0.2, 0.2, 0.2), limitX = (0, 80), limitY = (-40, 40), limitZ = (-2.5, 1.5)):
		'''
		Inputs: 
			case1 root_dir: train or val. data dir, train or val.'s file struct like: 
				root_dir->training->velodyne
				root_dir->training->calib
				root_dir->training->label_2 
			case2 root_dir: test data dir, test's file struct like: 
				root_dir->testing->velodyne
				root_dir->testing->calib 
		Outputs: 
				-None 
		'''
		self.root_dir   = root_dir
		self.split      = split
		self.object     = kittiReader(self.root_dir, self.split)
		self.objectType = objectType
		self.scale      = scale
		self.resolution = resolution    
		self.limitX     = limitX
		self.limitY     = limitY
		self.limitZ     = limitZ

	def read_kitti_data(self, idx = 0): 
		'''
		Inputs:
			idx: training or testing sample index
		Outputs:
			voxel    : inputSize
			obj_map  : scale * inputSize
			cord_map : scale * inputSize
		'''
	
		kitti_Object3Ds = None
		kitti_Lidar     = None 
		kitti_Calib     = None
	
		
		if self.split == 'training':
			# read Lidar data + Lidar Label + Calib data 
			kitti_Object3Ds = self.object.get_label_objects(idx);  
			kitti_Lidar     = self.object.get_lidar(idx);  
			kitti_Calib     = self.object.get_calibration(idx); 
			
			# lidar data filter 
			filter_fov = filter_camera_fov(kitti_Lidar) 
			filter_range = filter_lidar_range(filter_fov, self.limitX, self.limitY, self.limitZ)
		
			# label filter 
			centers, boxsizes, rotates = filter_label(kitti_Object3Ds, self.objectType)
			
		
			if centers is None:
				return None, None, None 
		
			# label center: Notice from camera Coordination to velo. Coordination  	
			if not(kitti_Calib is None): 
				proj_velo = proj_to_velo(kitti_Calib)[:, :3]
				centers = np.dot(centers, proj_velo.transpose())[:, :3] 
				
			
			# label corners: 
			corners = compute_3d_corners(centers, boxsizes, rotates)
	
			
			# print(corners)
			# print(corners.shape)
		
			# filter centers + corners 
			filter_centers, filter_corners, boxsizes = filter_center_corners(centers, corners, boxsizes, self.limitX, self.limitY, self.limitZ)
		
			# print(filter_centers)
			# print(filter_corners)
		
			if not(filter_centers is None): 
				# training center
				train_centers, train_corners = create_train_label(filter_centers, filter_corners, boxsizes, self.scale, self.resolution, self.limitX, self.limitY, self.limitZ)
				# print("filter_centers")
				# print(filter_centers)
				
				# print("train_centers")
				# print(train_centers)
				
				# obj_map / cord_map / voxel 
				obj_map = create_obj_map(train_centers, self.scale, self.resolution, self.limitX, self.limitY, self.limitZ)
				cord_map = create_cord_map(train_centers, train_corners, self.scale, self.resolution, self.limitX, self.limitY, self.limitZ)
				voxel = lidar_to_binary_voxel(filter_range, self.resolution, self.limitX, self.limitY, self.limitZ)
		
				return voxel, obj_map, cord_map
			else: 
				return None, None, None 
		elif self.split == 'testing':
			# read Lidar Data + Calib + Data 
			kitti_Lidar = self.object.get_lidar(idx);  
			kitti_Calib = self.object.get_calibration(idx); 
			
			# lidar data filter 
			filter_fov = filter_camera_fov(kitti_Lidar) 
			filter_range = filter_lidar_range(filter_fov, self.limitX, self.limitY, self.limitZ)
			
			voxel = lidar_to_binary_voxel(filter_range, self.resolution, self.limitX, self.limitY, self.limitZ)
			
			return voxel

				

if __name__ == '__main__':
	
	data_dir = "/home/hsw/桌面/PCL_API_Doc/frustum-pointnets-master/dataset"
	
	kitti_3DVoxel = kitti_3DVoxel_interface(data_dir, objectType = 'Car', split='training', scale = 0.25, resolution = (0.2, 0.2, 0.2), limitX = (0, 80), limitY = (-40, 40), limitZ = (-2.5, 1.5))
	
	sampleIdx = 195; 
	voxel, obj_map, cord_map = kitti_3DVoxel.read_kitti_data(sampleIdx)
	
	if not(voxel is None): 
		print(voxel.shape)
		print(obj_map.shape) 
		print(cord_map.shape)
	
	

2.3 KITTI数据读取相关

''' Helper class and functions for loading KITTI objects

Author: Charles R. Qi
Date: September 2017
'''
from __future__ import print_function

import os
import sys
import numpy as np
import cv2
from PIL import Image
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(BASE_DIR)
sys.path.append(os.path.join(ROOT_DIR, 'mayavi'))
import kitti_util as utils

try:
    raw_input          # Python 2
except NameError:
    raw_input = input  # Python 3

# 3D static data 
class kitti_object(object):
    '''Load and parse object data into a usable format.'''
    
    def __init__(self, root_dir, split='training'):
        '''root_dir contains training and testing folders'''
        self.root_dir = root_dir
        self.split = split
        self.split_dir = os.path.join(root_dir, split)

        if split == 'training':
            self.num_samples = 7481
        elif split == 'testing':
            self.num_samples = 7518
        else:
            print('Unknown split: %s' % (split))
            exit(-1)
		
		# data dir 
        self.image_dir = os.path.join(self.split_dir, 'image_2')
        self.calib_dir = os.path.join(self.split_dir, 'calib')
        self.lidar_dir = os.path.join(self.split_dir, 'velodyne')
        self.label_dir = os.path.join(self.split_dir, 'label_2')

    def __len__(self):
        return self.num_samples
	# read image: return image  
    def get_image(self, idx):
        assert(idx=xmin) & \
        (pts_2d[:,1]=ymin)
    fov_inds = fov_inds & (pc_velo[:,0]>clip_distance)
    imgfov_pc_velo = pc_velo[fov_inds,:]
    if return_more:
        return imgfov_pc_velo, pts_2d, fov_inds
    else:
        return imgfov_pc_velo

def show_lidar_with_boxes(pc_velo, objects, calib,
                          img_fov=False, img_width=None, img_height=None): 
    ''' Show all LiDAR points.
        Draw 3d box in LiDAR point cloud (in velo coord system) '''
    if 'mlab' not in sys.modules: import mayavi.mlab as mlab
    from viz_util import draw_lidar_simple, draw_lidar, draw_gt_boxes3d

    print(('All point num: ', pc_velo.shape[0]))
    fig = mlab.figure(figure=None, bgcolor=(0,0,0),
        fgcolor=None, engine=None, size=(1000, 500))
    if img_fov:
        pc_velo = get_lidar_in_image_fov(pc_velo, calib, 0, 0,
            img_width, img_height)
        print(('FOV point num: ', pc_velo.shape[0]))
    draw_lidar(pc_velo, fig=fig)

    for obj in objects:
        if obj.type=='DontCare':continue
        # Draw 3d bounding box
        box3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib.P) 
        box3d_pts_3d_velo = calib.project_rect_to_velo(box3d_pts_3d)
        # Draw heading arrow
        ori3d_pts_2d, ori3d_pts_3d = utils.compute_orientation_3d(obj, calib.P)
        ori3d_pts_3d_velo = calib.project_rect_to_velo(ori3d_pts_3d)
        x1,y1,z1 = ori3d_pts_3d_velo[0,:]
        x2,y2,z2 = ori3d_pts_3d_velo[1,:]
        draw_gt_boxes3d([box3d_pts_3d_velo], fig=fig)
        mlab.plot3d([x1, x2], [y1, y2], [z1,z2], color=(0.5,0.5,0.5),
            tube_radius=None, line_width=1, figure=fig)
    mlab.show(1)

def show_lidar_on_image(pc_velo, img, calib, img_width, img_height):
    ''' Project LiDAR points to image '''
    imgfov_pc_velo, pts_2d, fov_inds = get_lidar_in_image_fov(pc_velo,
        calib, 0, 0, img_width, img_height, True)
    imgfov_pts_2d = pts_2d[fov_inds,:]
    imgfov_pc_rect = calib.project_velo_to_rect(imgfov_pc_velo)

    import matplotlib.pyplot as plt
    cmap = plt.cm.get_cmap('hsv', 256)
    cmap = np.array([cmap(i) for i in range(256)])[:,:3]*255

    for i in range(imgfov_pts_2d.shape[0]):
        depth = imgfov_pc_rect[i,2]
        color = cmap[int(640.0/depth),:]
        cv2.circle(img, (int(np.round(imgfov_pts_2d[i,0])),
            int(np.round(imgfov_pts_2d[i,1]))),
            2, color=tuple(color), thickness=-1)
    Image.fromarray(img).show() 
    return img

def dataset_viz():
    dataset = kitti_object(os.path.join(ROOT_DIR, 'dataset/KITTI/object'))

    for data_idx in range(len(dataset)):
        # Load data from dataset
        objects = dataset.get_label_objects(data_idx)
        objects[0].print_object()
        img = dataset.get_image(data_idx)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 
        img_height, img_width, img_channel = img.shape
        print(('Image shape: ', img.shape))
        pc_velo = dataset.get_lidar(data_idx)[:,0:3]
        calib = dataset.get_calibration(data_idx)

        # Draw 2d and 3d boxes on image
        show_image_with_boxes(img, objects, calib, False)
        raw_input()
        # Show all LiDAR points. Draw 3d box in LiDAR point cloud
        show_lidar_with_boxes(pc_velo, objects, calib, True, img_width, img_height)
        raw_input()

if __name__=='__main__':
    import mayavi.mlab as mlab
    from viz_util import draw_lidar_simple, draw_lidar, draw_gt_boxes3d
    dataset_viz()


""" Helper methods for loading and parsing KITTI data.

Author: Charles R. Qi
Date: September 2017
"""
from __future__ import print_function

import numpy as np
import cv2
import os

class Object3d(object):
    ''' 3d object label '''
    def __init__(self, label_file_line):
        data = label_file_line.split(' ')
        data[1:] = [float(x) for x in data[1:]]

        # extract label, truncation, occlusion
        self.type = data[0] # 'Car', 'Pedestrian', ...
        self.truncation = data[1] # truncated pixel ratio [0..1]
        self.occlusion = int(data[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown
        self.alpha = data[3] # object observation angle [-pi..pi]

        # extract 2d bounding box in 0-based coordinates
        self.xmin = data[4] # left
        self.ymin = data[5] # top
        self.xmax = data[6] # right
        self.ymax = data[7] # bottom
        self.box2d = np.array([self.xmin,self.ymin,self.xmax,self.ymax])
        
        # extract 3d bounding box information
        self.h = data[8] # box height
        self.w = data[9] # box width
        self.l = data[10] # box length (in meters)
        self.t = (data[11],data[12],data[13]) # location (x,y,z) in camera coord.
        self.ry = data[14] # yaw angle (around Y-axis in camera coordinates) [-pi..pi]

    def print_object(self):
        print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % \
            (self.type, self.truncation, self.occlusion, self.alpha))
        print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % \
            (self.xmin, self.ymin, self.xmax, self.ymax))
        print('3d bbox h,w,l: %f, %f, %f' % \
            (self.h, self.w, self.l))
        print('3d bbox location, ry: (%f, %f, %f), %f' % \
            (self.t[0],self.t[1],self.t[2],self.ry))


class Calibration(object):
    ''' Calibration matrices and utils
        3d XYZ in 

3. 通过测试还在训练,但是我的硬件设备较差,所以,训练速度比较慢


你可能感兴趣的:(算法实现,KITTI数据集,tensorflow,深度学习,激光雷达,点云目标检测)