Hit_HSW

深度学习——3D Fully Convolutional Network for Vehicle Detection in Point Cloud模型实现

1. 参考文献

3D Fully Convolutional Network for Vehicle Detection in Point Cloud

2. 模型实现

'''
Baidu Inc. 

Ref: 
3D Fully Convolutional Network for Vehicle Detection in Point Cloud

Author: HSW 
Date: 2018-05-02 
'''


import sys
import numpy as np 
import tensorflow as tf 
from prepare_data2 import * 
from baidu_cnn_3d import * 

KITTI_TRAIN_DATA_CNT = 7481
KITTI_TEST_DATA_CNT  = 7518


# create 3D-CNN Model
def create_graph(sess, modelType = 0, voxel_shape = (400, 400, 20),  activation=tf.nn.relu, is_train = True): 
	'''
	Inputs: 
		sess: tensorflow Session Object 
		voxel_shape: voxel shape for network first layer 
		activation: 
		phrase_train: 
	Outputs: 
		voxel, graph, sess 
	'''
	voxel = tf.placeholder(tf.float32, [None, voxel_shape[0], voxel_shape[1], voxel_shape[2], 1])

	phase_train = tf.placeholder(tf.bool, name="phase_train") if is_train else None 
	
	with tf.variable_scope("3D_CNN_Model") as scope: 
		model = Full_CNN_3D_Model()
		
		model.cnn3d_graph(voxel, modelType = modelType, activation=activation, phase_train = is_train)
		
	if is_train: 
		initialized_var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="3D_CNN_model")
		sess.run(tf.variables_initializer(initialized_var))
	
	return voxel, model, phase_train


# read batch data 
def read_batch_data(batch_size, data_set_dir,objectType = "Car", split = "training", resolution=(0.2, 0.2, 0.2), scale=0.25, limitX = (0,80), limitY=(-40,40), limitZ=(-2.5,1.5)): 
	'''
	Inputs: 
		batch_size: 
		data_set_dir: 
		objectType: default is "Car"
		split: default is "training"
		resolution: 
		scale: outputSize / inputSize 
		limitX: 
		limitY: 
		limitZ: 
	Outputs: 
		
	'''
	kitti_3DVoxel = kitti_3DVoxel_interface(data_set_dir, objectType = objectType, split=split, scale = scale, resolution = resolution, limitX = limitX, limitY = limitY, limitZ = limitZ)
	
	TRAIN_PROCESSED_IDX  = 0
	TEST_PROCESSED_IDX   = 0
	
	if split == "training": 
		while TRAIN_PROCESSED_IDX < KITTI_TRAIN_DATA_CNT: 
			batch_voxel = []
			batch_g_obj = []
			batch_g_cord = []
			
			idx = 0 
			while idx < batch_size and TRAIN_PROCESSED_IDX < KITTI_TRAIN_DATA_CNT: 	
				
				print(TRAIN_PROCESSED_IDX)
				voxel, g_obj, g_cord = kitti_3DVoxel.read_kitti_data(TRAIN_PROCESSED_IDX)
				TRAIN_PROCESSED_IDX += 1
				
				if voxel is None:
					continue
				
				idx += 1 
				
				# print(voxel.shape)
				batch_voxel.append(voxel)
				batch_g_obj.append(g_obj)
				batch_g_cord.append(g_cord)
			
			yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis], np.array(batch_g_obj, dtype=np.float32), np.array(batch_g_cord, dtype=np.float32)
	
	elif split == "testing": 	
		while TEST_PROCESSED_IDX < KITTI_TEST_DATA_CNT: 
			batch_voxel = []
			
			idx = 0
			while idx < batch_size and TEST_PROCESSED_IDX < KITTI_TEST_DATA_CNT: 
			
				voxel = kitti_3DVoxel.read_kitti_data(iter * batch_size + idx)
				TEST_PROCESSED_IDX += 1
				
				if voxel is None: 
					continue
				
				idx += 1	
				batch_voxel.append(voxel)
			
			yield np.array(batch_voxel, dtype=np.float32)[:, :, :, :, np.newaxis]
		
	

# train 3D-CNN Model 
def train(batch_num, data_set_dir, modelType = 0, objectType = "Car", resolution=(0.2,0.2,0.2), scale = 0.25, lr=0.01, limitX=(0,80), limitY=(-40,40), limitZ=(-2.5,1.5), epoch=101): 
	'''
	Inputs: 
		batch_num: 
		data_set_dir: 
		modelType: 
		objectType: 
		resolution: 
		scale: 
		lr: 
		limitX, limitY, limitZ: 
	Outputs: 
		None
	'''
	batch_size      = batch_num
	training_epochs = epoch
	sizeX = int(round((limitX[1] - limitX[0]) / resolution[0]))
	sizeY = int(round((limitY[1] - limitY[0]) / resolution[0]))
	sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[0]))
	voxel_shape = (sizeX, sizeY, sizeZ)
	with tf.Session() as sess: 
		voxel, model, phase_train = create_graph(sess, modelType = modelType, voxel_shape = voxel_shape, activation=tf.nn.relu, is_train = True)
		saver = tf.train.Saver()
		
		total_loss, obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_obj, g_cord, y_pred = model.loss_Fun(lossType = 0, cord_loss_weight = 0.02)
		
		optimizer = model.create_optimizer(total_loss, optType = "Adam", learnRate = 0.001)
		
		init = tf.global_variables_initializer()
		
		sess.run(init)
		
		for epoch in range(training_epochs): 
			batchCnt = 0; 
			for (batch_voxel, batch_g_obj, batch_g_cord) in read_batch_data(batch_size, data_set_dir, objectType = objectType, split = "training", resolution = resolution, scale = scale, limitX = limitX, limitY = limitY, limitZ = limitZ): 
				# print("batch_g_obj")
				# print(batch_g_obj.shape)
				sess.run(optimizer, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
				cord_cost = sess.run(cord_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
				obj_cost = sess.run(is_obj_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
				non_obj_cost = sess.run(non_obj_loss, feed_dict={voxel: batch_voxel, g_obj: batch_g_obj, g_cord: batch_g_cord, phase_train: True})
				
				print("Epoch: ",  (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "cord_cost = ", "{:.9f}".format(cord_cost))
				print("Epoch: ",  (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "obj_cost = ", "{:.9f}".format(obj_cost))
				print("Epoch: ",  (epoch + 1), ",", "BatchNum: ", (batchCnt + 1), "," , "non_obj_cost = ", "{:.9f}".format(non_obj_cost))
				
				batchCnt += 1
				
			if (epoch > 0) and (epoch % 10 == 0): 
				saver.save(sess, "velodyne_kitti_train_" + str(epoch) + ".ckpt")
				
		print("Training Finishied !")


# test 3D-CNN Model
def test(batch_num, data_set_dir, modelType = 0, objectType = "Car", resolution=(0.2, 0.2, 0.2), scale = 0.25, limitX = (0, 80), limitY = (-40, 40), limitZ=(-2.5, 1.5)): 
	'''
	Inputs: 
		batch_num: 
		data_set_dir: 
		resolution: 
		scale:
		limitX, limitY, limitZ:  
	Outputs: 
		None 
	'''
	sizeX = int(round((limitX[1] - limitX[0]) / resolution[0]))
	sizeY = int(round((limitY[1] - limitY[0]) / resolution[0]))
	sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[0]))
	voxel_shape = (sizeX, sizeY, sizeZ)
	batch_size = batch_num; 
	
	batch_voxel = read_batch_data(batch_num, data_set_dir, objectType = objectType, split="Testing", resolution=resolution, scale=scale, limitX=limitX, limitY=limitY, limitZ=limitZ)
	
	batch_voxel_x = batch_voxel.reshape(1, batch_voxel.shape[0], batch_voxel.shape[1], batch_voxel.shape[2], 1)
	
	with tf.Session() as sess: 
		is_train = False
		voxel, model, phase_train = create_graph(sess, modelType = modelType, voxel_shape = voxel_shape, activation=tf.nn.relu, is_train = False)
		new_saver = tf.train.import_meta_graph("velodyne_kitti_train_40.ckpt.meta")
		last_model = "./velodyne_kitti_train_40.ckpt"
		saver.restore(sess, last_model)
		
		objectness = model.objectness
		cordinate  = model.cordinate
		y_pred     = model.y 
		
		objectness = sess.run(objectness, feed_dict={voxel: batch_voxel_x})[0, :, :, :, 0]
		cordinate  = sess.run(cordinate, feed_dict={voxel:batch_voxel_x})[0]
		y_pred     = sess.run(y_pred, feed_dict={voxel: batch_voxel_x})[0, :, :, :, 0]
		
		idx        = np.where(y_pred >= 0.995)
		
		spheres    = np.vstack((index[0], np.vstack((index[1], index[2])))).transpose()
		
		centers    = spheres_to_centers(spheres, scale = scale, resolution=resolution, limitX = limitX, limitY = limitY, limitZ = limitZ)
		
		corners    = cordinate[idx].reshape[-1, 8, 3] + centers[:, np.newaxis]
		
		print(centers)
		print(corners)
		

if __name__ == "__main__":
	
	batch_num       = 3
	data_set_dir    = "/home/hsw/桌面/PCL_API_Doc/frustum-pointnets-master/dataset"
	modelType       = 1
	objectType      = "Car"
	resolution      = (0.2, 0.2, 0.2)
	scale           = 0.25 
	lr              = 0.001
	limitX          = (0, 80)
	limitY          = (-40, 40)
	limitZ          = (-2.5, 1.5)       
	epoch           = 101 
	train(batch_num, data_set_dir = data_set_dir, modelType = modelType, objectType = objectType, resolution=resolution, scale=scale, lr =lr, limitX = limitX, limitY = limitY, limitZ = limitZ)
	saver = tf.train.Saver()

2.1 网络模型

'''
Baidu Inc. 

Ref: 
3D Fully Convolutional Network for Vehicle Detection in Point Cloud

Author: HSW 
Date: 2018-05-02 
'''

import numpy as np 
import tensorflow as tf 

class Full_CNN_3D_Model(object): 
	'''
		Define Full CNN Model
	'''
	
	def __init__(self): 
		pass; 
		
	def cnn3d_graph(self, voxel, modelType = 0, activation = tf.nn.relu, phase_train = True): 
		if modelType == 0: 
			# Modefied 3D-CNN, 该网络结构不可使用,因为降采样太严重(降采样1/8)导致在预测时会出现较大误差 
			self.layer1     = self.conv3d_layer(voxel      ,  1, 16, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, phase_train=phase_train)
						
			self.layer2     = self.conv3d_layer(self.layer1, 16, 32, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, phase_train=phase_train)
			
			self.layer3     = self.conv3d_layer(self.layer2, 32, 64, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, phase_train=phase_train)
			
			self.layer4     = self.conv3d_layer(self.layer3, 64, 64, 3, 3, 3, [1, 1, 1, 1, 1], name="layer4", activation=activation, phase_train=phase_train)
			
			self.objectness = self.conv3D_to_output(self.layer4, 64, 2, 3, 3, 3, [1, 1, 1, 1, 1], name="objectness", activation=None)
			
			self.cordinate  = self.conv3D_to_output(self.layer4, 64, 24, 3, 3, 3, [1, 1, 1, 1, 1], name="cordinate", activation=None)
			
			self.y          = tf.nn.softmax(self.objectness, dim=-1)
			
		elif modelType == 1: 
			# 3D-CNN(论文网络结构： 降采样1/4，即InputSize / OutputSize = 0.25)
			self.layer1       = self.conv3d_layer(voxel      ,  1, 10, 5, 5, 5, [1, 2, 2, 2, 1], name="layer1", activation=activation, phase_train=phase_train)
			
			self.layer2       = self.conv3d_layer(self.layer1, 10, 20, 5, 5, 5, [1, 2, 2, 2, 1], name="layer2", activation=activation, phase_train=phase_train)
			
			self.layer3       = self.conv3d_layer(self.layer2, 20, 30, 3, 3, 3, [1, 2, 2, 2, 1], name="layer3", activation=activation, phase_train=phase_train)
			
			base_shape        = self.layer2.get_shape().as_list()
			
			obj_output_shape  = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 2]
			
			cord_output_shape = [tf.shape(self.layer3)[0], base_shape[1], base_shape[2], base_shape[3], 24]
			
			self.objectness   = self.deconv3D_to_output(self.layer3, 30, 2, 3, 3, 3, [1, 2, 2, 2, 1], obj_output_shape, name="objectness", activation=None)
			
			self.cordinate    = self.deconv3D_to_output(self.layer3, 30, 24, 3, 3, 3, [1, 2, 2, 2, 1], cord_output_shape, name="cordinate", activation=None)
			
			self.y            = tf.nn.softmax(self.objectness, dim=-1)
		
	# batch Normalize 
	def batch_norm(self, inputs, phase_train = True, decay = 0.9, eps = 1e-5): 
		'''
		Inputs: 
			inputs: input data for last layer 
			phase_train: True / False, = True is train, = False is Test 
		Outputs: 
			norm data for next layer 
		'''
		gamma = tf.get_variable("gamma", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
		beta = tf.get_variable("beta", shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
		pop_mean = tf.get_variable("pop_mean", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
		pop_var = tf.get_variable("pop_var", trainable=False, shape=inputs.get_shape()[-1], dtype=tf.float32, initializer=tf.constant_initializer(1.0))
		axes = range(len(inputs.get_shape()) - 1)
		if phase_train == True:
			batch_mean, batch_var = tf.nn.moments(inputs, axes = [0, 1, 2, 3])
			train_mean = tf.assign(pop_mean, pop_mean * decay + batch_mean*(1 - decay))
			train_var = tf.assign(pop_var, pop_var * decay + batch_var * (1 - decay))
			with tf.control_dependencies([train_mean, train_var]):
				return tf.nn.batch_normalization(inputs, batch_mean, batch_var, beta, gamma, eps)
		else: 
			return tf.nn.batch_normalization(inputs, pop_mean, pop_var, beta, gamma, eps)
			
		
	# 3D Conv Layer 
	def conv3d_layer(self, inputs, inputs_dims, outputs_dims, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name="", phase_train = True): 
		'''
		Inputs: 
			inputs: pre-Layer output 
			inputs_dims: pre-Layer output channels 
			outputs_dims: cur-Layer output channels 
			[length, height, width]: cur-Layer conv3d kernel size 
			stride: conv3d kernel move step in length/height/width axis
			activation: default use relu activation function 
			padding: conv3d 'padding' parameter 
		Outputs: 
			3D Conv. Layer outputs 	
		'''
		with tf.variable_scope("conv3D" + name): 
			# conv3d layer kernel 
			kernel = tf.get_variable("weights", shape=[length, height, width, inputs_dims, outputs_dims], dtype = tf.float32, initializer=tf.truncated_normal_initializer(stddev=0.01))
			
			# conv3d layer bias 
			bias = tf.get_variable("bias", shape=[outputs_dims], dtype=tf.float32, initializer=tf.constant_initializer(0.0))
		
			# conv3d 
			conv = tf.nn.conv3d(inputs, kernel, stride, padding=padding)
			
			bias = tf.nn.bias_add(conv, bias)
			
			if activation:
				bias = activation(bias, name="activation")
				
			bias = self.batch_norm(bias, phase_train)
			
		return bias
	
		
		
	# 3D Conv to Classification Layer 
	def conv3D_to_output(self, inputs, inputs_dims, outputs_dims, height, width, length, stride, activation=tf.nn.relu, padding="SAME", name="", phase_train = True): 
		'''
		Inputs: 
			inputs: pre-Layer outputs 
			inputs_dims: pre-Layer output channels 
			outputs_dims: cur-Layer output channels 
			stride: conv3d kernel move step in length/height/width axis
			activation: default use relu activation function 
			padding: conv3d 'padding' parameter 
			outputs_shape: de-conv outputs shape  
		Outputs: 
			conv outputs 
		'''
		with tf.variable_scope("conv3D" + name):
			kernel = tf.get_variable("weights", shape=[length, height, width, inputs_dims, outputs_dims], dtype=tf.float32, initializer=tf.constant_initializer(0.01))
			conv = tf.nn.conv3d(inputs, kernel, stride, padding=padding)
		return conv 
		
		
	# 3D Deconv. to Classification Layer 
	def deconv3D_to_output(self, inputs, inputs_dims, outputs_dims, height, width, length, stride, output_shape, activation=tf.nn.relu, padding="SAME", name="", phase_train = True): 
		'''
		Inputs: 
			inputs: pre-Layer outputs 
			inputs_dims: pre-Layer output channels 
			outputs_dims: cur-Layer output channels 
			stride: conv3d kernel move step in length/height/width axis
			activation: default use relu activation function 
			padding: conv3d 'padding' parameter 
			outputs_shape: de-conv outputs shape  
		Outputs: 
			de-conv outputs 
		'''
		with tf.variable_scope("deconv3D"+name):
			kernel = tf.get_variable("weights", shape=[length, height, width, outputs_dims, inputs_dims], dtype=tf.float32, initializer=tf.constant_initializer(0.01))
			deconv = tf.nn.conv3d_transpose(inputs, kernel, output_shape, stride, padding="SAME")
		
		return deconv 
		
	# define loss 
	def loss_Fun(self, lossType = 0, cord_loss_weight = 0.02): 
		'''
		Inputs: 
			lossType:  =  for difference loss Type 
			cord_loss_weight: 0.02 
		Outputs: 			
		'''
		if lossType == 0: 
			# print("g_obj")
			# print(self.cordinate.get_shape())
			g_obj = tf.placeholder(tf.float32, self.cordinate.get_shape().as_list()[:4])
			g_cord = tf.placeholder(tf.float32, self.cordinate.get_shape().as_list())
			non_g_obj = tf.subtract(tf.ones_like(g_obj, dtype=tf.float32), g_obj )
				
			elosion = 0.00001
			y = self.y 
				
			is_obj_loss = -tf.reduce_sum(tf.multiply(g_obj , tf.log(y[:,:,:,:,0] + elosion)))         # object loss 
			non_obj_loss = tf.reduce_sum(tf.multiply(non_g_obj, tf.log(y[:, :, :, :, 0] + elosion)))  # non-object loss 
			
			cross_entropy = tf.add(is_obj_loss, non_obj_loss)
			obj_loss = cross_entropy
				
			cord_diff = tf.multiply(g_obj , tf.reduce_sum(tf.square(tf.subtract(self.cordinate, g_cord)), 4)) # cord loss 
			cord_loss = tf.multiply(tf.reduce_sum(cord_diff), cord_loss_weight)
				
			return tf.add(obj_loss, cord_loss), obj_loss, cord_loss, is_obj_loss, non_obj_loss, g_obj, g_cord, y 
	
	# Create Optimizer 
	def create_optimizer(self, all_loss, optType = "Adam", learnRate = 0.001): 
		'''
		Inputs: 
			all_loss: graph all_loss 
			lr: learn rate 
		Outputs: 
			optimizer 
		'''
		if optType == "Adam": 
			opt = tf.train.AdamOptimizer(learnRate)
			optimizer = opt.minimize(all_loss)
		
		return optimizer

2.2 数据预处理

'''Prepase KITTI data for 3D Object detection
Ref: 3D Fully Convolutional Network for Vehicle Detection in Point Cloud

Author: Shiwen He 
Date: 28 April 2018 

'''

import numpy as np 
from kitti_object import kitti_object as kittiReader
import kitti_util   

# lidar data => 3D Grid Voxel 

# filter lidar data by camera FoV 
def filter_camera_fov(pc): 
	'''
	Inputs: 
		pc: n x 3 
	Outputs: 
		filter_pc: m x 3, m <= 3 
	Notices: 
		FoV: from -45 degree to 45 degree  
	'''
	logic_fov = np.logical_and((pc[:, 1] < pc[:, 0] - 0.27), (-pc[:, 1] < pc[:, 0] - 0.27))

	filter_pc = pc[logic_fov]
	
	return filter_pc 
	

# filter lidar data by detection range 
def filter_lidar_range(pc, limitX, limitY, limitZ):
	''' 
	Inputs: 
		pc: n x 3, 
		limitX, limitY, limitZ: 1 x 2
	Outputs: 
		filter_pc: m x 3, m <= n  
	'''
	logic_x = np.logical_and(pc[:, 0] >= limitX[0], pc[:, 0] < limitX[1])
	logic_y = np.logical_and(pc[:, 1] >= limitY[0], pc[:, 1] < limitY[1])
	logic_z = np.logical_and(pc[:, 2] >= limitZ[0], pc[:, 2] < limitZ[1])
	logic_xyz = np.logical_and(logic_x, np.logical_and(logic_y, logic_z))
	
	filter_pc = pc[:, :3][logic_xyz]
	
	return  filter_pc
	
	
# filter center + corners 
def filter_center_corners(centers, corners, boxsizes, limitX, limitY, limitZ): 
	'''
	Inputs: 
		centers: n x 3 
		corners: n x 8 x 3 
		limitX, limitY, limitZ: 1 x 2
	Outputs: 
		filter_centers: m x 3, m <= n 
		filter_corners: m x 3, m <= n  
	'''
	
	logic_x = np.logical_and(centers[:, 0] >= limitX[0], centers[:, 0] < limitX[1])
	logic_y = np.logical_and(centers[:, 1] >= limitY[0], centers[:, 1] < limitY[1])
	logic_z = np.logical_and(centers[:, 2] >= limitZ[0], centers[:, 2] < limitZ[1])
	logic_xyz = np.logical_and(logic_x, np.logical_and(logic_y, logic_z))
	
	filter_centers_1 = centers[logic_xyz, :]
	filter_corners_1 = corners[logic_xyz, :, :]
	filter_boxsizes_1 = boxsizes[logic_xyz, :]
	
	shape_centers = filter_centers_1.shape; 
	
	filter_centers = np.zeros([shape_centers[0], 3])
	filter_corners = np.zeros([shape_centers[0], 8, 3]); 
	filter_boxsizes = np.zeros([shape_centers[0], 3]); 
	
	idx = 0
	for idx2 in range(shape_centers[0]): 
		logic_x = np.logical_and(filter_corners_1[idx2, :, 0] >= limitX[0], filter_corners_1[idx2, :, 0] < limitX[1])
		logic_y = np.logical_and(filter_corners_1[idx2, :, 1] >= limitY[0], filter_corners_1[idx2, :, 1] < limitY[1])
		logic_z = np.logical_and(filter_corners_1[idx2, :, 2] >= limitZ[0], filter_corners_1[idx2, :, 2] < limitZ[1])
		logic_xyz = np.logical_and(logic_x, np.logical_and(logic_y, logic_z))

		if logic_xyz.all(): 
			filter_centers[idx, :3] = filter_centers_1[idx2, :]
			filter_corners[idx, :8, :3] = filter_corners_1[idx2, :, :] 
			filter_boxsizes[idx, :3] = filter_boxsizes_1[idx2, :]
			idx += 1		
	if idx > 0:
		return filter_centers[:idx, :], filter_corners[:idx, :, :], filter_boxsizes[:idx, :]
	else:
		return None, None, None

def filter_label(object3Ds, objectType = 'Car'): 
	'''
	Inputs: 
		object3Ds:
		objectType:  
	Outputs: 
		centers, corners, rotatey 
		
	'''
	idx = 0
	data = np.zeros([50, 7]).astype(np.float32)
	for iter in object3Ds: 
		if iter.type == "DontCare": 
			continue;
					
		if iter.type == objectType: 
			# position 
			data[idx, 0] = iter.t[0]
			data[idx, 1] = iter.t[1]
			data[idx, 2] = iter.t[2]
			# size 
			data[idx, 3] = iter.h
			data[idx, 4] = iter.w
			data[idx, 5] = iter.l 
			# rotate 
			data[idx, 6] = iter.ry
			idx += 1         
	if idx > 0:
		return data[:idx, :3], data[:idx, 3:6], data[:idx, 6]
	else:
		return None, None, None
		
		
def proj_to_velo(calib_data):
	"""
	Inputs: 
		calib_data: 
	Outputs: 
		project matrix: from camera cordination to velodyne cordination
	"""
 
	rect            = calib_data.R0; # calib_data["R0_rect"].reshape(3, 3)
	velo_to_cam     = calib_data.V2C; # calib_data["Tr_velo_to_cam"].reshape(3, 4)
	inv_rect        = np.linalg.inv(rect)
	inv_velo_to_cam = np.linalg.pinv(velo_to_cam[:, :3])
	
	return np.dot(inv_velo_to_cam, inv_rect)


# corners_3d
def compute_3d_corners(centers, sizes, rotates): 
	''' 
	Inputs: 
			centers: 
			rotates: 
			sizes: 
	Outputs: 
			corners_3d:  n x 8 x 3 array in Lidar coord.
	'''
	
	# print(centers) 
	
	corners = []
	
	for place, rotate, sz in zip(centers, rotates, sizes):
		x, y, z = place
		h, w, l = sz
		if l > 10:
			continue

		corner = np.array([
			[x - l / 2., y - w / 2., z],
			[x + l / 2., y - w / 2., z],
			[x - l / 2., y + w / 2., z],
			[x - l / 2., y - w / 2., z + h],
			[x - l / 2., y + w / 2., z + h],
			[x + l / 2., y + w / 2., z],
			[x + l / 2., y - w / 2., z + h],
			[x + l / 2., y + w / 2., z + h],
		])

		corner -= np.array([x, y, z])

		rotate_matrix = np.array([
			[np.cos(rotate), -np.sin(rotate), 0],
			[np.sin(rotate), np.cos(rotate), 0],
			[0, 0, 1]
		])

		a = np.dot(corner, rotate_matrix.transpose())
		a += np.array([x, y, z])
		corners.append(a)
		
	corners_3d  = np.array(corners)   
	
	return corners_3d
	
# lidar data to 3D Grid Voxel 
def lidar_to_binary_voxel(pc, resolution, limitX, limitY, limitZ):
	''' 
	Inputs: 
		pc: n x 3, 
		resolution: 1 x 3, 
		limitX, limitY, limitZ: 1 x 2
	Outputs: 
		voxel: shape is inputSize  
	'''
	
	voxel_pc = np.zeros_like(pc).astype(np.int32)
	 
	# Compute PointCloud Position in 3D Grid 
	voxel_pc[:, 0] = ((pc[:, 0] - limitX[0]) / resolution[0]).astype(np.int32)
	voxel_pc[:, 1] = ((pc[:, 1] - limitY[0]) / resolution[1]).astype(np.int32)
	voxel_pc[:, 2] = ((pc[:, 2] - limitZ[0]) / resolution[2]).astype(np.int32)
	
	# 3D Grid 
	voxel = np.zeros((int(round(limitX[1] - limitX[0]) / resolution[0]), int(round(limitY[1] - limitY[0]) / resolution[1]), \
	 int(round((limitZ[1] - limitZ[0]) / resolution[2])))) 
	
	# 3D Grid Value 
	voxel[voxel_pc[:, 0], voxel_pc[:, 1], voxel_pc[:, 2]] = 1
	
	return voxel 


# label center to 3D Grid Voxel Center(sphere) 
def center_to_sphere(centers, boxsize, scale, resolution, limitX, limitY, limitZ):
	''' 
	Inputs: 
		center:  n x 3 
		boxsize: n x 3
		scale: 1 x 1, = outputSize / inputSize
		resolution: 1 x 3
		limitX, limitY, limitZ: 1 x 2
	Outputs: 
		spheres: m x 3, m <= n  
	'''
	
	# from 3D Box's bottom center => 3D center 
	move_center = centers.copy(); 	

	print("centers")
	print(centers)
	print("boxsize")
	print(boxsize)
	
	move_center[:, 2] = centers[:, 2] + boxsize[:, 0] / 2;  

	
	# compute Label Center PointCloud Position in 3D Grid 
	spheres = np.zeros_like(move_center).astype(np.int32)
	
	spheres[:, 0] = ((move_center[:, 0] - limitX[0]) / resolution[0] * scale).astype(np.int32)
	spheres[:, 1] = ((move_center[:, 1] - limitY[0]) / resolution[1] * scale).astype(np.int32)
	spheres[:, 2] = ((move_center[:, 2] - limitZ[0]) / resolution[2] * scale).astype(np.int32)
	

	print("move_center")
	print(move_center)
	print("spheres")
	print(spheres)
	
	return spheres


# 3D Grid Voxel Center(sphere) to label center 
def sphere_to_center(spheres, scale, resolution, limitX, limitY, limitZ): 
	'''
	Inputs: 
		spheres: n x 3 
		scale: 1 x 1, = outputSize /  inputSize 
		resolution: 1 x 3
		limitX, limitY, limitZ: 1 x 2 
	Outputs: 
		centers: m x 3, m <= 3 
	'''
	centers = np.zeros_like(spheres).astype(np.float32); 
	
	centers[:, 0] = spheres[:, 0] * resolution[0] / scale + limitX[0]
	centers[:, 1] = spheres[:, 1] * resolution[1] / scale + limitY[0]
	centers[:, 2] = spheres[:, 2] * resolution[2] / scale + limitZ[0]
	
	return centers

# label corners to 3D Grid Voxel: corners - centers   
def corners_to_train(spheres, corners, scale, resolution, limitX, limitY, limitZ):
	'''
	Inputs: 
		spheres: n x 3
		corners: n x 8 x 3 
		scale: 1 x 1, = outputSize / inputSize
		resolution: 1 x 3
		limitX, limitY, limitZ: 1 x 2 
	Outputs: 
		train_corners: m x 3, m <= n 
	'''
	
	# 3D Grid Voxel Center => label center 
	centers = sphere_to_center(spheres, scale, resolution, limitX, limitY, limitZ)
	train_corners = np.zeros_like(corners).astype(np.float32)
	
	# train corners for regression loss 
	for index, (corner, center) in enumerate(zip(corners, centers)):
		train_corners[index] = corner - center  
	
	return train_corners
	
		
	
	
# create center and cordination for train 
def create_train_label(centers, corners, boxsize, scale, resolution, limitX, limitY, limitZ):
	'''
	Inputs: 
		centers: n x 3 
		corners: n x 8 x 3 
		boxsize: n x 3 
		scale: 1 x 1, outputSize / inputSize
		resolution: 1 x 3 
		limitX. limitY, limitZ: 1 x 2 
	Outputs: 
		train_centers: m x 3, m <= n 
		train_corners: m x 3, m <= n 
	'''
	
	train_centers = center_to_sphere(centers, boxsize, scale, resolution, limitX, limitY, limitZ)
	train_corners = corners_to_train(train_centers, corners, scale, resolution, limitX, limitY, limitZ)
	
	return train_centers, train_corners 
	

def create_obj_map(train_centers, scale, resolution, limitX, limitY, limitZ):
	'''
	Inputs: 
		centers: n x 3 
		scale: 1 x 1, outputSize / inputSize
		resolution: 1 x 3
		limitX, limitY, limitZ: 1 x 2
	Outputs: 
		obj_map: shape is scale * inputSize 
	'''
	
	# 3D Grid 
	sizeX = int(round((limitX[1] - limitX[0]) / resolution[0] * scale))
	sizeY = int(round((limitY[1] - limitY[0]) / resolution[1] * scale))
	sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[2] * scale))
	obj_map = np.zeros([sizeX, sizeY, sizeZ]) 
	
	# print("sizeX, sizeY, sizeZ")
	# print(sizeX, sizeY, sizeZ)
	
	# objectness map: label center in objectness map where value is 1  
	obj_map[train_centers[:,0], train_centers[:, 1], train_centers[:, 2]] = 1; 
	
	return obj_map  


def create_cord_map(train_centers, train_corners, scale, resolution, limitX, limitY, limitZ):
	'''
	Inputs: 
		train_centers: n x 3 
		train_corners: n x 8 x 3 
		scale: 1 x 1, outputSize / inputSize
		resolution: 1 x 3 
		limitX, limitY, limitZ: 1 x 2
	Outputs: 
		cord_map: shape is inputSize * scale   
	''' 
	# reshape train_corners:  n x 8 x 3 => n x 24 
	corners = train_corners.reshape(train_corners.shape[0], -1) 
	# 3D Grid 
	sizeX = int(round((limitX[1] - limitX[0]) / resolution[0] * scale))
	sizeY = int(round((limitY[1] - limitY[0]) / resolution[1] * scale))
	sizeZ = int(round((limitZ[1] - limitZ[0]) / resolution[2] * scale))
	sizeD = 24
	
	cord_map = np.zeros([sizeX, sizeY, sizeZ, sizeD]) 
	
	# print(train_centers)
	
	cord_map[train_centers[:,0], train_centers[:, 1], train_centers[:, 2]] = corners
	
	return cord_map 
	

# kitti data interface:  
class kitti_3DVoxel_interface(object): 
	
	def __init__(self, root_dir, objectType = 'Car', split='training', scale = 0.25, resolution = (0.2, 0.2, 0.2), limitX = (0, 80), limitY = (-40, 40), limitZ = (-2.5, 1.5)):
		'''
		Inputs: 
			case1 root_dir: train or val. data dir, train or val.'s file struct like: 
				root_dir->training->velodyne
				root_dir->training->calib
				root_dir->training->label_2 
			case2 root_dir: test data dir, test's file struct like: 
				root_dir->testing->velodyne
				root_dir->testing->calib 
		Outputs: 
				-None 
		'''
		self.root_dir   = root_dir
		self.split      = split
		self.object     = kittiReader(self.root_dir, self.split)
		self.objectType = objectType
		self.scale      = scale
		self.resolution = resolution    
		self.limitX     = limitX
		self.limitY     = limitY
		self.limitZ     = limitZ

	def read_kitti_data(self, idx = 0): 
		'''
		Inputs:
			idx: training or testing sample index
		Outputs:
			voxel    : inputSize
			obj_map  : scale * inputSize
			cord_map : scale * inputSize
		'''
	
		kitti_Object3Ds = None
		kitti_Lidar     = None 
		kitti_Calib     = None
	
		
		if self.split == 'training':
			# read Lidar data + Lidar Label + Calib data 
			kitti_Object3Ds = self.object.get_label_objects(idx);  
			kitti_Lidar     = self.object.get_lidar(idx);  
			kitti_Calib     = self.object.get_calibration(idx); 
			
			# lidar data filter 
			filter_fov = filter_camera_fov(kitti_Lidar) 
			filter_range = filter_lidar_range(filter_fov, self.limitX, self.limitY, self.limitZ)
		
			# label filter 
			centers, boxsizes, rotates = filter_label(kitti_Object3Ds, self.objectType)
			
		
			if centers is None:
				return None, None, None 
		
			# label center: Notice from camera Coordination to velo. Coordination  	
			if not(kitti_Calib is None): 
				proj_velo = proj_to_velo(kitti_Calib)[:, :3]
				centers = np.dot(centers, proj_velo.transpose())[:, :3] 
				
			
			# label corners: 
			corners = compute_3d_corners(centers, boxsizes, rotates)
	
			
			# print(corners)
			# print(corners.shape)
		
			# filter centers + corners 
			filter_centers, filter_corners, boxsizes = filter_center_corners(centers, corners, boxsizes, self.limitX, self.limitY, self.limitZ)
		
			# print(filter_centers)
			# print(filter_corners)
		
			if not(filter_centers is None): 
				# training center
				train_centers, train_corners = create_train_label(filter_centers, filter_corners, boxsizes, self.scale, self.resolution, self.limitX, self.limitY, self.limitZ)
				# print("filter_centers")
				# print(filter_centers)
				
				# print("train_centers")
				# print(train_centers)
				
				# obj_map / cord_map / voxel 
				obj_map = create_obj_map(train_centers, self.scale, self.resolution, self.limitX, self.limitY, self.limitZ)
				cord_map = create_cord_map(train_centers, train_corners, self.scale, self.resolution, self.limitX, self.limitY, self.limitZ)
				voxel = lidar_to_binary_voxel(filter_range, self.resolution, self.limitX, self.limitY, self.limitZ)
		
				return voxel, obj_map, cord_map
			else: 
				return None, None, None 
		elif self.split == 'testing':
			# read Lidar Data + Calib + Data 
			kitti_Lidar = self.object.get_lidar(idx);  
			kitti_Calib = self.object.get_calibration(idx); 
			
			# lidar data filter 
			filter_fov = filter_camera_fov(kitti_Lidar) 
			filter_range = filter_lidar_range(filter_fov, self.limitX, self.limitY, self.limitZ)
			
			voxel = lidar_to_binary_voxel(filter_range, self.resolution, self.limitX, self.limitY, self.limitZ)
			
			return voxel

				

if __name__ == '__main__':
	
	data_dir = "/home/hsw/桌面/PCL_API_Doc/frustum-pointnets-master/dataset"
	
	kitti_3DVoxel = kitti_3DVoxel_interface(data_dir, objectType = 'Car', split='training', scale = 0.25, resolution = (0.2, 0.2, 0.2), limitX = (0, 80), limitY = (-40, 40), limitZ = (-2.5, 1.5))
	
	sampleIdx = 195; 
	voxel, obj_map, cord_map = kitti_3DVoxel.read_kitti_data(sampleIdx)
	
	if not(voxel is None): 
		print(voxel.shape)
		print(obj_map.shape) 
		print(cord_map.shape)

2.3 KITTI数据读取相关

''' Helper class and functions for loading KITTI objects

Author: Charles R. Qi
Date: September 2017
'''
from __future__ import print_function

import os
import sys
import numpy as np
import cv2
from PIL import Image
BASE_DIR = os.path.dirname(os.path.abspath(__file__))
ROOT_DIR = os.path.dirname(BASE_DIR)
sys.path.append(os.path.join(ROOT_DIR, 'mayavi'))
import kitti_util as utils

try:
    raw_input          # Python 2
except NameError:
    raw_input = input  # Python 3

# 3D static data 
class kitti_object(object):
    '''Load and parse object data into a usable format.'''
    
    def __init__(self, root_dir, split='training'):
        '''root_dir contains training and testing folders'''
        self.root_dir = root_dir
        self.split = split
        self.split_dir = os.path.join(root_dir, split)

        if split == 'training':
            self.num_samples = 7481
        elif split == 'testing':
            self.num_samples = 7518
        else:
            print('Unknown split: %s' % (split))
            exit(-1)
		
		# data dir 
        self.image_dir = os.path.join(self.split_dir, 'image_2')
        self.calib_dir = os.path.join(self.split_dir, 'calib')
        self.lidar_dir = os.path.join(self.split_dir, 'velodyne')
        self.label_dir = os.path.join(self.split_dir, 'label_2')

    def __len__(self):
        return self.num_samples
	# read image: return image  
    def get_image(self, idx):
        assert(idx=xmin) & \
        (pts_2d[:,1]=ymin)
    fov_inds = fov_inds & (pc_velo[:,0]>clip_distance)
    imgfov_pc_velo = pc_velo[fov_inds,:]
    if return_more:
        return imgfov_pc_velo, pts_2d, fov_inds
    else:
        return imgfov_pc_velo

def show_lidar_with_boxes(pc_velo, objects, calib,
                          img_fov=False, img_width=None, img_height=None): 
    ''' Show all LiDAR points.
        Draw 3d box in LiDAR point cloud (in velo coord system) '''
    if 'mlab' not in sys.modules: import mayavi.mlab as mlab
    from viz_util import draw_lidar_simple, draw_lidar, draw_gt_boxes3d

    print(('All point num: ', pc_velo.shape[0]))
    fig = mlab.figure(figure=None, bgcolor=(0,0,0),
        fgcolor=None, engine=None, size=(1000, 500))
    if img_fov:
        pc_velo = get_lidar_in_image_fov(pc_velo, calib, 0, 0,
            img_width, img_height)
        print(('FOV point num: ', pc_velo.shape[0]))
    draw_lidar(pc_velo, fig=fig)

    for obj in objects:
        if obj.type=='DontCare':continue
        # Draw 3d bounding box
        box3d_pts_2d, box3d_pts_3d = utils.compute_box_3d(obj, calib.P) 
        box3d_pts_3d_velo = calib.project_rect_to_velo(box3d_pts_3d)
        # Draw heading arrow
        ori3d_pts_2d, ori3d_pts_3d = utils.compute_orientation_3d(obj, calib.P)
        ori3d_pts_3d_velo = calib.project_rect_to_velo(ori3d_pts_3d)
        x1,y1,z1 = ori3d_pts_3d_velo[0,:]
        x2,y2,z2 = ori3d_pts_3d_velo[1,:]
        draw_gt_boxes3d([box3d_pts_3d_velo], fig=fig)
        mlab.plot3d([x1, x2], [y1, y2], [z1,z2], color=(0.5,0.5,0.5),
            tube_radius=None, line_width=1, figure=fig)
    mlab.show(1)

def show_lidar_on_image(pc_velo, img, calib, img_width, img_height):
    ''' Project LiDAR points to image '''
    imgfov_pc_velo, pts_2d, fov_inds = get_lidar_in_image_fov(pc_velo,
        calib, 0, 0, img_width, img_height, True)
    imgfov_pts_2d = pts_2d[fov_inds,:]
    imgfov_pc_rect = calib.project_velo_to_rect(imgfov_pc_velo)

    import matplotlib.pyplot as plt
    cmap = plt.cm.get_cmap('hsv', 256)
    cmap = np.array([cmap(i) for i in range(256)])[:,:3]*255

    for i in range(imgfov_pts_2d.shape[0]):
        depth = imgfov_pc_rect[i,2]
        color = cmap[int(640.0/depth),:]
        cv2.circle(img, (int(np.round(imgfov_pts_2d[i,0])),
            int(np.round(imgfov_pts_2d[i,1]))),
            2, color=tuple(color), thickness=-1)
    Image.fromarray(img).show() 
    return img

def dataset_viz():
    dataset = kitti_object(os.path.join(ROOT_DIR, 'dataset/KITTI/object'))

    for data_idx in range(len(dataset)):
        # Load data from dataset
        objects = dataset.get_label_objects(data_idx)
        objects[0].print_object()
        img = dataset.get_image(data_idx)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) 
        img_height, img_width, img_channel = img.shape
        print(('Image shape: ', img.shape))
        pc_velo = dataset.get_lidar(data_idx)[:,0:3]
        calib = dataset.get_calibration(data_idx)

        # Draw 2d and 3d boxes on image
        show_image_with_boxes(img, objects, calib, False)
        raw_input()
        # Show all LiDAR points. Draw 3d box in LiDAR point cloud
        show_lidar_with_boxes(pc_velo, objects, calib, True, img_width, img_height)
        raw_input()

if __name__=='__main__':
    import mayavi.mlab as mlab
    from viz_util import draw_lidar_simple, draw_lidar, draw_gt_boxes3d
    dataset_viz()

""" Helper methods for loading and parsing KITTI data.

Author: Charles R. Qi
Date: September 2017
"""
from __future__ import print_function

import numpy as np
import cv2
import os

class Object3d(object):
    ''' 3d object label '''
    def __init__(self, label_file_line):
        data = label_file_line.split(' ')
        data[1:] = [float(x) for x in data[1:]]

        # extract label, truncation, occlusion
        self.type = data[0] # 'Car', 'Pedestrian', ...
        self.truncation = data[1] # truncated pixel ratio [0..1]
        self.occlusion = int(data[2]) # 0=visible, 1=partly occluded, 2=fully occluded, 3=unknown
        self.alpha = data[3] # object observation angle [-pi..pi]

        # extract 2d bounding box in 0-based coordinates
        self.xmin = data[4] # left
        self.ymin = data[5] # top
        self.xmax = data[6] # right
        self.ymax = data[7] # bottom
        self.box2d = np.array([self.xmin,self.ymin,self.xmax,self.ymax])
        
        # extract 3d bounding box information
        self.h = data[8] # box height
        self.w = data[9] # box width
        self.l = data[10] # box length (in meters)
        self.t = (data[11],data[12],data[13]) # location (x,y,z) in camera coord.
        self.ry = data[14] # yaw angle (around Y-axis in camera coordinates) [-pi..pi]

    def print_object(self):
        print('Type, truncation, occlusion, alpha: %s, %d, %d, %f' % \
            (self.type, self.truncation, self.occlusion, self.alpha))
        print('2d bbox (x0,y0,x1,y1): %f, %f, %f, %f' % \
            (self.xmin, self.ymin, self.xmax, self.ymax))
        print('3d bbox h,w,l: %f, %f, %f' % \
            (self.h, self.w, self.l))
        print('3d bbox location, ry: (%f, %f, %f), %f' % \
            (self.t[0],self.t[1],self.t[2],self.ry))


class Calibration(object):
    ''' Calibration matrices and utils
        3d XYZ in .txt are in rect camera coord.
        2d box xy are in image2 coord
        Points in .bin are in Velodyne coord.

        y_image2 = P^2_rect * x_rect
        y_image2 = P^2_rect * R0_rect * Tr_velo_to_cam * x_velo
        x_ref = Tr_velo_to_cam * x_velo
        x_rect = R0_rect * x_ref

        P^2_rect = [f^2_u,  0,      c^2_u,  -f^2_u b^2_x;
                    0,      f^2_v,  c^2_v,  -f^2_v b^2_y;
                    0,      0,      1,      0]
                 = K * [1|t]

        image2 coord:
         ----> x-axis (u)
        |
        |
        v y-axis (v)

        velodyne coord:
        front x, left y, up z

        rect/ref camera coord:
        right x, down y, front z

        Ref (KITTI paper): http://www.cvlibs.net/publications/Geiger2013IJRR.pdf

        TODO(rqi): do matrix multiplication only once for each projection.
    '''
    def __init__(self, calib_filepath, from_video=False):
        if from_video:
            calibs = self.read_calib_from_video(calib_filepath)
        else:
            calibs = self.read_calib_file(calib_filepath)
        # Projection matrix from rect camera coord to image2 coord
        self.P = calibs['P2'] 
        self.P = np.reshape(self.P, [3,4])
        # Rigid transform from Velodyne coord to reference camera coord
        self.V2C = calibs['Tr_velo_to_cam']
        self.V2C = np.reshape(self.V2C, [3,4])
        self.C2V = inverse_rigid_trans(self.V2C)
        # Rotation from reference camera coord to rect camera coord
        self.R0 = calibs['R0_rect']
        self.R0 = np.reshape(self.R0,[3,3])

        # Camera intrinsics and extrinsics
        self.c_u = self.P[0,2]
        self.c_v = self.P[1,2]
        self.f_u = self.P[0,0]
        self.f_v = self.P[1,1]
        self.b_x = self.P[0,3]/(-self.f_u) # relative 
        self.b_y = self.P[1,3]/(-self.f_v)

    def read_calib_file(self, filepath):
        ''' Read in a calibration file and parse into a dictionary.
        Ref: https://github.com/utiasSTARS/pykitti/blob/master/pykitti/utils.py
        '''
        data = {}
        with open(filepath, 'r') as f:
            for line in f.readlines():
                line = line.rstrip()
                if len(line)==0: continue
                key, value = line.split(':', 1)
                # The only non-float values in these files are dates, which
                # we don't care about anyway
                try:
                    data[key] = np.array([float(x) for x in value.split()])
                except ValueError:
                    pass

        return data
    
    def read_calib_from_video(self, calib_root_dir):
        ''' Read calibration for camera 2 from video calib files.
            there are calib_cam_to_cam and calib_velo_to_cam under the calib_root_dir
        '''
        data = {}
        cam2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_cam_to_cam.txt'))
        velo2cam = self.read_calib_file(os.path.join(calib_root_dir, 'calib_velo_to_cam.txt'))
        Tr_velo_to_cam = np.zeros((3,4))
        Tr_velo_to_cam[0:3,0:3] = np.reshape(velo2cam['R'], [3,3])
        Tr_velo_to_cam[:,3] = velo2cam['T']
        data['Tr_velo_to_cam'] = np.reshape(Tr_velo_to_cam, [12])
        data['R0_rect'] = cam2cam['R_rect_00']
        data['P2'] = cam2cam['P_rect_02']
        return data

    def cart2hom(self, pts_3d):
        ''' Input: nx3 points in Cartesian
            Oupput: nx4 points in Homogeneous by pending 1
        '''
        n = pts_3d.shape[0]
        pts_3d_hom = np.hstack((pts_3d, np.ones((n,1))))
        return pts_3d_hom
 
    # =========================== 
    # ------- 3d to 3d ---------- 
    # =========================== 
    def project_velo_to_ref(self, pts_3d_velo):
        pts_3d_velo = self.cart2hom(pts_3d_velo) # nx4
        return np.dot(pts_3d_velo, np.transpose(self.V2C))

    def project_ref_to_velo(self, pts_3d_ref):
        pts_3d_ref = self.cart2hom(pts_3d_ref) # nx4
        return np.dot(pts_3d_ref, np.transpose(self.C2V))

    def project_rect_to_ref(self, pts_3d_rect):
        ''' Input and Output are nx3 points '''
        return np.transpose(np.dot(np.linalg.inv(self.R0), np.transpose(pts_3d_rect)))
    
    def project_ref_to_rect(self, pts_3d_ref):
        ''' Input and Output are nx3 points '''
        return np.transpose(np.dot(self.R0, np.transpose(pts_3d_ref)))
 
    def project_rect_to_velo(self, pts_3d_rect):
        ''' Input: nx3 points in rect camera coord.
            Output: nx3 points in velodyne coord.
        ''' 
        pts_3d_ref = self.project_rect_to_ref(pts_3d_rect)
        return self.project_ref_to_velo(pts_3d_ref)

    def project_velo_to_rect(self, pts_3d_velo):
        pts_3d_ref = self.project_velo_to_ref(pts_3d_velo)
        return self.project_ref_to_rect(pts_3d_ref)

    # =========================== 
    # ------- 3d to 2d ---------- 
    # =========================== 
    def project_rect_to_image(self, pts_3d_rect):
        ''' Input: nx3 points in rect camera coord.
            Output: nx2 points in image2 coord.
        '''
        pts_3d_rect = self.cart2hom(pts_3d_rect)
        pts_2d = np.dot(pts_3d_rect, np.transpose(self.P)) # nx3
        pts_2d[:,0] /= pts_2d[:,2]
        pts_2d[:,1] /= pts_2d[:,2]
        return pts_2d[:,0:2]
    
    def project_velo_to_image(self, pts_3d_velo):
        ''' Input: nx3 points in velodyne coord.
            Output: nx2 points in image2 coord.
        '''
        pts_3d_rect = self.project_velo_to_rect(pts_3d_velo)
        return self.project_rect_to_image(pts_3d_rect)

    # =========================== 
    # ------- 2d to 3d ---------- 
    # =========================== 
    def project_image_to_rect(self, uv_depth):
        ''' Input: nx3 first two channels are uv, 3rd channel
                   is depth in rect camera coord.
            Output: nx3 points in rect camera coord.
        '''
        n = uv_depth.shape[0]
        x = ((uv_depth[:,0]-self.c_u)*uv_depth[:,2])/self.f_u + self.b_x
        y = ((uv_depth[:,1]-self.c_v)*uv_depth[:,2])/self.f_v + self.b_y
        pts_3d_rect = np.zeros((n,3))
        pts_3d_rect[:,0] = x
        pts_3d_rect[:,1] = y
        pts_3d_rect[:,2] = uv_depth[:,2]
        return pts_3d_rect

    def project_image_to_velo(self, uv_depth):
        pts_3d_rect = self.project_image_to_rect(uv_depth)
        return self.project_rect_to_velo(pts_3d_rect)

 
def rotx(t):
    ''' 3D Rotation about the x-axis. '''
    c = np.cos(t)
    s = np.sin(t)
    return np.array([[1,  0,  0],
                     [0,  c, -s],
                     [0,  s,  c]])


def roty(t):
    ''' Rotation about the y-axis. '''
    c = np.cos(t)
    s = np.sin(t)
    return np.array([[c,  0,  s],
                     [0,  1,  0],
                     [-s, 0,  c]])


def rotz(t):
    ''' Rotation about the z-axis. '''
    c = np.cos(t)
    s = np.sin(t)
    return np.array([[c, -s,  0],
                     [s,  c,  0],
                     [0,  0,  1]])


def transform_from_rot_trans(R, t):
    ''' Transforation matrix from rotation matrix and translation vector. '''
    R = R.reshape(3, 3)
    t = t.reshape(3, 1)
    return np.vstack((np.hstack([R, t]), [0, 0, 0, 1]))


def inverse_rigid_trans(Tr):
    ''' Inverse a rigid body transform matrix (3x4 as [R|t])
        [R'|-R't; 0|1]
    '''
    inv_Tr = np.zeros_like(Tr) # 3x4
    inv_Tr[0:3,0:3] = np.transpose(Tr[0:3,0:3])
    inv_Tr[0:3,3] = np.dot(-np.transpose(Tr[0:3,0:3]), Tr[0:3,3])
    return inv_Tr

def read_label(label_filename):
    lines = [line.rstrip() for line in open(label_filename)]
    objects = [Object3d(line) for line in lines]
    return objects

def load_image(img_filename):
    return cv2.imread(img_filename)

def load_velo_scan(velo_filename):
    scan = np.fromfile(velo_filename, dtype=np.float32)
    scan = scan.reshape((-1, 4))
    return scan

def project_to_image(pts_3d, P):
    ''' Project 3d points to image plane.

    Usage: pts_2d = projectToImage(pts_3d, P)
      input: pts_3d: nx3 matrix
             P:      3x4 projection matrix
      output: pts_2d: nx2 matrix

      P(3x4) dot pts_3d_extended(4xn) = projected_pts_2d(3xn)
      => normalize projected_pts_2d(2xn)

      <=> pts_3d_extended(nx4) dot P'(4x3) = projected_pts_2d(nx3)
          => normalize projected_pts_2d(nx2)
    '''
    n = pts_3d.shape[0]
    pts_3d_extend = np.hstack((pts_3d, np.ones((n,1))))
    print(('pts_3d_extend shape: ', pts_3d_extend.shape))
    pts_2d = np.dot(pts_3d_extend, np.transpose(P)) # nx3
    pts_2d[:,0] /= pts_2d[:,2]
    pts_2d[:,1] /= pts_2d[:,2]
    return pts_2d[:,0:2]
    

# corners_2d + corners_3d
def compute_box_3d(obj, P):
    ''' Takes an object and a projection matrix (P) and projects the 3d
        bounding box into the image plane.
        Returns:
            corners_2d: (8,2) array in left image coord.
            corners_3d: (8,3) array in in rect camera coord.
    '''
    # compute rotational matrix around yaw axis
    R = roty(obj.ry)    

    # 3d bounding box dimensions
    l = obj.l;
    w = obj.w;
    h = obj.h;
    
    # 3d bounding box corners
    x_corners = [l/2,l/2,-l/2,-l/2,l/2,l/2,-l/2,-l/2];
    y_corners = [0,0,0,0,-h,-h,-h,-h];
    z_corners = [w/2,-w/2,-w/2,w/2,w/2,-w/2,-w/2,w/2];
    
    # rotate and translate 3d bounding box
    corners_3d = np.dot(R, np.vstack([x_corners,y_corners,z_corners]))
    #print corners_3d.shape
    corners_3d[0,:] = corners_3d[0,:] + obj.t[0];
    corners_3d[1,:] = corners_3d[1,:] + obj.t[1];
    corners_3d[2,:] = corners_3d[2,:] + obj.t[2];
    #print 'cornsers_3d: ', corners_3d 
    # only draw 3d bounding box for objs in front of the camera
    if np.any(corners_3d[2,:]<0.1):
        corners_2d = None
        return corners_2d, np.transpose(corners_3d)
    
    # project the 3d bounding box into the image plane
    corners_2d = project_to_image(np.transpose(corners_3d), P);
    #print 'corners_2d: ', corners_2d
    return corners_2d, np.transpose(corners_3d)


def compute_orientation_3d(obj, P):
    ''' Takes an object and a projection matrix (P) and projects the 3d
        object orientation vector into the image plane.
        Returns:
            orientation_2d: (2,2) array in left image coord.
            orientation_3d: (2,3) array in in rect camera coord.
    '''
    
    # compute rotational matrix around yaw axis
    R = roty(obj.ry)
   
    # orientation in object coordinate system
    orientation_3d = np.array([[0.0, obj.l],[0,0],[0,0]])
    
    # rotate and translate in camera coordinate system, project in image
    orientation_3d = np.dot(R, orientation_3d)
    orientation_3d[0,:] = orientation_3d[0,:] + obj.t[0]
    orientation_3d[1,:] = orientation_3d[1,:] + obj.t[1]
    orientation_3d[2,:] = orientation_3d[2,:] + obj.t[2]
    
    # vector behind image plane?
    if np.any(orientation_3d[2,:]<0.1):
      orientation_2d = None
      return orientation_2d, np.transpose(orientation_3d)
    
    # project orientation into the image plane
    orientation_2d = project_to_image(np.transpose(orientation_3d), P);
    return orientation_2d, np.transpose(orientation_3d)

def draw_projected_box3d(image, qs, color=(255,255,255), thickness=2):
    ''' Draw 3d bounding box in image
        qs: (8,3) array of vertices for the 3d box in following order:
            1 -------- 0
           /|         /|
          2 -------- 3 .
          | |        | |
          . 5 -------- 4
          |/         |/
          6 -------- 7
    '''
    qs = qs.astype(np.int32)
    for k in range(0,4):
       # Ref: http://docs.enthought.com/mayavi/mayavi/auto/mlab_helper_functions.html
       i,j=k,(k+1)%4
       # use LINE_AA for opencv3
       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)

       i,j=k+4,(k+1)%4 + 4
       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)

       i,j=k,k+4
       cv2.line(image, (qs[i,0],qs[i,1]), (qs[j,0],qs[j,1]), color, thickness, cv2.CV_AA)
    return image

3. 通过测试还在训练，但是我的硬件设备较差，所以，训练速度比较慢

你可能感兴趣的:(算法实现,KITTI数据集,tensorflow,深度学习,激光雷达,点云目标检测)

机器学习与深度学习间关系与区别 ℒℴѵℯ心·动ꦿ໊ོ꫞ 人工智能学习深度学习 python
一、机器学习概述定义机器学习（MachineLearning,ML）是一种通过数据驱动的方法，利用统计学和计算算法来训练模型，使计算机能够从数据中学习并自动进行预测或决策。机器学习通过分析大量数据样本，识别其中的模式和规律，从而对新的数据进行判断。其核心在于通过训练过程，让模型不断优化和提升其预测准确性。主要类型1.监督学习（SupervisedLearning）监督学习是指在训练数据集中包含输入
LLM 词汇表落难Coder LLMs NLP 大语言模型大模型 llama 人工智能
Contextwindow“上下文窗口”是指语言模型在生成新文本时能够回溯和参考的文本量。这不同于语言模型训练时所使用的大量数据集，而是代表了模型的“工作记忆”。较大的上下文窗口可以让模型理解和响应更复杂和更长的提示，而较小的上下文窗口可能会限制模型处理较长提示或在长时间对话中保持连贯性的能力。Fine-tuning微调是使用额外的数据进一步训练预训练语言模型的过程。这使得模型开始表示和模仿微调数
将cmd中命令输出保存为txt文本文件落难Coder Windows cmd window
最近深度学习本地的训练中我们常常要在命令行中运行自己的代码，无可厚非，我们有必要保存我们的炼丹结果，但是复制命令行输出到txt是非常麻烦的，其实Windows下的命令行为我们提供了相应的操作。其基本的调用格式就是：运行指令>输出到的文件名称或者具体保存路径测试下，我打开cmd并且ping一下百度：pingwww.baidu.com>./data.txt看下相同目录下data.txt的输出：如果你再
【目标检测数据集】卡车数据集1073张VOC+YOLO格式熬夜写代码的平头哥∰ 目标检测 YOLO 人工智能
数据集格式：PascalVOC格式+YOLO格式(不包含分割路径的txt文件，仅仅包含jpg图片以及对应的VOC格式xml文件和yolo格式txt文件)图片数量(jpg文件个数)：1073标注数量(xml文件个数)：1073标注数量(txt文件个数)：1073标注类别数：1标注类别名称:["truck"]每个类别标注的框数：truck框数=1120总框数：1120使用标注工具：labelImg标注
Faiss Tips：高效向量搜索与聚类的利器焦习娜Samantha
FaissTips：高效向量搜索与聚类的利器faiss_tipsSomeusefultipsforfaiss项目地址:https://gitcode.com/gh_mirrors/fa/faiss_tips项目介绍Faiss是由FacebookAIResearch开发的一个用于高效相似性搜索和密集向量聚类的库。它支持多种硬件平台，包括CPU和GPU，能够在海量数据集上实现快速的近似最近邻搜索（AN
番茄西红柿叶子病害分类数据集12882张11类别 futureflsl 数据集分类数据挖掘人工智能
数据集类型：图像分类用，不可用于目标检测无标注文件数据集格式：仅仅包含jpg图片，每个类别文件夹下面存放着对应图片图片数量(jpg文件个数)：12882分类类别数：11类别名称:["Bacterial_Spot_Bacteria","Early_Blight_Fungus","Healthy","Late_Blight_Water_Mold","Leaf_Mold_Fungus","Powdery
钢筋长度超限检测检数据集VOC+YOLO格式215张1类别 futureflsl 数据集 YOLO 深度学习机器学习
数据集格式：PascalVOC格式+YOLO格式(不包含分割路径的txt文件，仅仅包含jpg图片以及对应的VOC格式xml文件和yolo格式txt文件)图片数量(jpg文件个数)：215标注数量(xml文件个数)：215标注数量(txt文件个数)：215标注类别数：1标注类别名称:["iron"]每个类别标注的框数：iron框数=215总框数：215使用标注工具：labelImg标注规则：对类别进
Faiss：高效相似性搜索与聚类的利器网络·魚大数据 faiss
Faiss是一个针对大规模向量集合的相似性搜索库，由FacebookAIResearch开发。它提供了一系列高效的算法和数据结构，用于加速向量之间的相似性搜索，特别是在大规模数据集上。本文将介绍Faiss的原理、核心功能以及如何在实际项目中使用它。Faiss原理：近似最近邻搜索：Faiss的核心功能之一是近似最近邻搜索，它能够高效地在大规模数据集中找到与给定查询向量最相似的向量。这种搜索是近似的，
Python开发常用的三方模块如下：换个网名有点难 python 开发语言
Python是一门功能强大的编程语言，拥有丰富的第三方库，这些库为开发者提供了极大的便利。以下是100个常用的Python库，涵盖了多个领域：1、NumPy，用于科学计算的基础库。2、Pandas，提供数据结构和数据分析工具。3、Matplotlib，一个绘图库。4、Scikit-learn，机器学习库。5、SciPy，用于数学、科学和工程的库。6、TensorFlow，由Google开发的开源机
ES聚合分析原理与代码实例讲解光剑书架上的书大厂Offer收割机面试题简历程序员读书硅基计算碳基计算认知计算生物计算深度学习神经网络大数据 AIGC AGI LLM Java Python 架构设计 Agent 程序员实现财富自由
ES聚合分析原理与代码实例讲解1.背景介绍1.1问题的由来在大规模数据分析场景中，特别是在使用Elasticsearch（ES）进行数据存储和检索时，聚合分析成为了一个至关重要的功能。聚合分析允许用户对数据集进行细分和分组，以便深入探索数据的结构和模式。这在诸如实时监控、日志分析、业务洞察等领域具有广泛的应用。1.2研究现状目前，ES聚合分析已经成为现代大数据平台的核心组件之一。它支持多种类型的聚
K近邻算法_分类鸢尾花数据集 _feivirus_ 算法机器学习和数学分类机器学习 K近邻
importnumpyasnpimportpandasaspdfromsklearn.datasetsimportload_irisfromsklearn.model_selectionimporttrain_test_splitfromsklearn.metricsimportaccuracy_score1.数据预处理iris=load_iris()df=pd.DataFrame(data=ir
Python多线程实现大规模数据集高效转移 sand&wich 网络 python 服务器
背景在处理大规模数据集时，通常需要在不同存储设备、不同服务器或文件夹之间高效地传输数据。如果采用单线程传输方式，当数据量非常大时，整个过程会非常耗时。因此，通过多线程并行处理可以大幅提升数据传输效率。本文将分享一个基于Python多线程实现的高效数据传输工具，通过遍历源文件夹中的所有文件，将它们移动到目标文件夹。工具和库这个数据集转移工具主要依赖于以下Python标准库：os：用于文件系统操作，如
COCO 格式的数据集转化为 YOLO 格式的数据集 QYQY77 YOLO python
"""--json_path输入的json文件路径--save_path保存的文件夹名字，默认为当前目录下的labels。"""importosimportjsonfromtqdmimporttqdmimportargparseparser=argparse.ArgumentParser()parser.add_argument('--json_path',default='./instances
推荐3家毕业AI论文可五分钟一键生成！文末附免费教程！小猪包333 写论文人工智能 AI写作深度学习计算机视觉
在当前的学术研究和写作领域，AI论文生成器已经成为许多研究人员和学生的重要工具。这些工具不仅能够帮助用户快速生成高质量的论文内容，还能进行内容优化、查重和排版等操作。以下是三款值得推荐的AI论文生成器：千笔-AIPassPaper、懒人论文以及AIPaperPass。千笔-AIPassPaper千笔-AIPassPaper是一款基于深度学习和自然语言处理技术的AI写作助手，旨在帮助用户快速生成高质
AI大模型的架构演进与最新发展季风泯灭的季节 AI大模型应用技术二人工智能架构
随着深度学习的发展，AI大模型（LargeLanguageModels,LLMs）在自然语言处理、计算机视觉等领域取得了革命性的进展。本文将详细探讨AI大模型的架构演进，包括从Transformer的提出到GPT、BERT、T5等模型的历史演变，并探讨这些模型的技术细节及其在现代人工智能中的核心作用。一、基础模型介绍：Transformer的核心原理Transformer架构的背景在Transfo
[实践应用] 深度学习之模型性能评估指标 YuanDaima2048 深度学习工具使用深度学习人工智能损失函数性能评估 pytorch python 机器学习
文章总览：YuanDaiMa2048博客文章总览深度学习之模型性能评估指标分类任务回归任务排序任务聚类任务生成任务其他介绍在机器学习和深度学习领域，评估模型性能是一项至关重要的任务。不同的学习任务需要不同的性能指标来衡量模型的有效性。以下是对一些常见任务及其相应的性能评估指标的详细解释和总结。分类任务分类任务是指模型需要将输入数据分配到预定义的类别或标签中。以下是分类任务中常用的性能指标：准确率(
[实践应用] 深度学习之优化器 YuanDaima2048 深度学习工具使用 pytorch 深度学习人工智能机器学习 python 优化器
文章总览：YuanDaiMa2048博客文章总览深度学习之优化器1.随机梯度下降（SGD）2.动量优化（Momentum）3.自适应梯度（Adagrad）4.自适应矩估计（Adam）5.RMSprop总结其他介绍在深度学习中，优化器用于更新模型的参数，以最小化损失函数。常见的优化函数有很多种，下面是几种主流的优化器及其特点、原理和PyTorch实现：1.随机梯度下降（SGD）原理:随机梯度下降通过
生成式地图制图 Bwywb_3 深度学习机器学习深度学习生成对抗网络
生成式地图制图（GenerativeCartography）是一种利用生成式算法和人工智能技术自动创建地图的技术。它结合了传统的地理信息系统（GIS）技术与现代生成模型（如深度学习、GANs等），能够根据输入的数据自动生成符合需求的地图。这种方法在城市规划、虚拟环境设计、游戏开发等多个领域具有应用前景。主要特点：自动化生成：通过算法和模型，系统能够根据输入的地理或空间数据自动生成地图，而无需人工逐
[数据集][目标检测]汽车头部尾部检测数据集VOC+YOLO格式5319张3类别 FL1623863129 数据集目标检测汽车 YOLO
数据集制作单位：未来自主研究中心(FIRC)版权单位：未来自主研究中心(FIRC)版权声明：数据集仅仅供个人使用，不得在未授权情况下挂淘宝、咸鱼等交易网站公开售卖,由此引发的法律责任需自行承担数据集格式：PascalVOC格式+YOLO格式(不包含分割路径的txt文件，仅仅包含jpg图片以及对应的VOC格式xml文件和yolo格式txt文件)图片数量(jpg文件个数)：5319标注数量(xml文件
吴恩达深度学习笔记(30)-正则化的解释极客Array
正则化（Regularization）深度学习可能存在过拟合问题——高方差，有两个解决方法，一个是正则化，另一个是准备更多的数据，这是非常可靠的方法，但你可能无法时时刻刻准备足够多的训练数据或者获取更多数据的成本很高，但正则化通常有助于避免过拟合或减少你的网络误差。如果你怀疑神经网络过度拟合了数据，即存在高方差问题，那么最先想到的方法可能是正则化，另一个解决高方差的方法就是准备更多数据，这也是非常
Hadoop 傲雪凌霜，松柏长青后端大数据 hadoop 大数据分布式
ApacheHadoop是一个开源的分布式计算框架，主要用于处理海量数据集。它具有高度的可扩展性、容错性和高效的分布式存储与计算能力。Hadoop核心由四个主要模块组成，分别是HDFS（分布式文件系统）、MapReduce（分布式计算框架）、YARN（资源管理）和HadoopCommon（公共工具和库）。1.HDFS（HadoopDistributedFileSystem）HDFS是Hadoop生
个人学习笔记7-6：动手学深度学习pytorch版-李沐浪子L 深度学习深度学习笔记计算机视觉 python 人工智能神经网络 pytorch
#人工智能##深度学习##语义分割##计算机视觉##神经网络#计算机视觉13.11全卷积网络全卷积网络（fullyconvolutionalnetwork，FCN）采用卷积神经网络实现了从图像像素到像素类别的变换。引入l转置卷积（transposedconvolution）实现的，输出的类别预测与输入图像在像素级别上具有一一对应关系：通道维的输出即该位置对应像素的类别预测。13.11.1构造模型下
Hadoop架构 henan程序媛 hadoop 大数据分布式
一、案列分析1.1案例概述现在已经进入了大数据(BigData)时代，数以万计用户的互联网服务时时刻刻都在产生大量的交互，要处理的数据量实在是太大了，以传统的数据库技术等其他手段根本无法应对数据处理的实时性、有效性的需求。HDFS顺应时代出现，在解决大数据存储和计算方面有很多的优势。1.2案列前置知识点1.什么是大数据大数据是指无法在一定时间范围内用常规软件工具进行捕捉、管理和处理的大量数据集合，
[转载] NoSQL简介 weixin_30325793 大数据数据库运维
摘自“百度百科”。NoSQL，泛指非关系型的数据库。随着互联网web2.0网站的兴起，传统的关系数据库在应付web2.0网站，特别是超大规模和高并发的SNS类型的web2.0纯动态网站已经显得力不从心，暴露了很多难以克服的问题，而非关系型的数据库则由于其本身的特点得到了非常迅速的发展。NoSQL数据库的产生就是为了解决大规模数据集合多重数据种类带来的挑战，尤其是大数据应用难题。虽然NoSQL流行语
深度学习-点击率预估-研究论文2024-09-14速读 sp_fyf_2024 深度学习人工智能
深度学习-点击率预估-研究论文2024-09-14速读1.DeepTargetSessionInterestNetworkforClick-ThroughRatePredictionHZhong,JMa,XDuan,SGu,JYao-2024InternationalJointConferenceonNeuralNetworks,2024深度目标会话兴趣网络用于点击率预测摘要：这篇文章提出了一种新
python编写直方图和饼图 2301_80421078 python 开发语言
1.直方图#直方图的绘制#语法格式：plt.hist(x,bins),其中x:数据集；bins:统计数据的分布区间importmatplotlib.pyplotaspltimportpandasaspd#导入文件excel=pd.read_excel('成绩.xlsx')#print(excel)#避免乱码plt.rcParams['font.sans-serif']=['SimHei']x=ex
Cut, Paste and Learn方法解读 wangxinwei2000 深度学习人工智能
Abstract问题背景：标注数据的缺乏：在实例检测任务中，部署物体检测模型的一个主要障碍是缺乏大量标注数据。例如，在一个特定的厨房环境中找到包含实例的大型标注数据集是不太可能的。每当面对新的环境和新的物体实例时，都需要进行昂贵的数据收集和标注工作。研究贡献：解决方法：本文提出了一种简单的方法，可以以最小的努力生成大量标注的实例数据集。关键洞察：研究者的关键洞察是，仅仅确保“局部真实感”（patc
Python中判断两个字符串的内容是否相同 songyuc 《Python学习笔记》Python
1前言今天在划分数据集的时候，需要判断两个字符串的内容是否相同，这个之前查过，不过好像忘记了，所以想着再记录一下～2Python中判断两个字符串的内容是否相同使用“==”符号进行判断，这个判断是根据字符串中字符的ASCII进行判断的；在判断字符串内容是否相同时，不能使用“is”进行判断，因为is是判断变量的内存ID（即使用函数id(a)获得变量的内存ID）是否相同；
机器学习流形数据降维：UMAP 降维算法小嗷犬 Python 机器学习 #数据分析及可视化机器学习算法人工智能
✅作者简介：人工智能专业本科在读，喜欢计算机与编程，写博客记录自己的学习历程。个人主页：小嗷犬的个人主页个人网站：小嗷犬的技术小站个人信条：为天地立心，为生民立命，为往圣继绝学，为万世开太平。本文目录UMAP简介理论基础特点与优势应用场景在Python中使用UMAP安装umap-learn库使用UMAP可视化手写数字数据集UMAP简介UMAP（UniformManifoldApproximatio
损失函数与反向传播 Star_. PyTorch pytorch 深度学习 python
损失函数定义与作用损失函数(lossfunction)在深度学习领域是用来计算搭建模型预测的输出值和真实值之间的误差。1.损失函数越小越好2.计算实际输出与目标之间的差距3.为更新输出提供依据（反向传播)常见的损失函数回归常见的损失函数有：均方差（MeanSquaredError，MSE）、平均绝对误差（MeanAbsoluteErrorLoss，MAE）、HuberLoss是一种将MSE与MAE
HQL之投影查询归来朝歌 HQL Hibernate 查询语句投影查询
在HQL查询中，常常面临这样一个场景，对于多表查询，是要将一个表的对象查出来还是要只需要每个表中的几个字段，最后放在一起显示？针对上面的场景，如果需要将一个对象查出来： HQL语句写“from 对象”即可 Session session = HibernateUtil.openSession();
Spring整合redis bylijinnan redis
pom.xml <dependencies>  <dependency> <groupId>org.springframework.data</groupId> <artifactId>spring-data-redi
org.hibernate.NonUniqueResultException: query did not return a unique result: 2 0624chenhong Hibernate
参考：http://blog.csdn.net/qingfeilee/article/details/7052736 org.hibernate.NonUniqueResultException: query did not return a unique result: 2 在项目中出现了org.hiber
android动画效果不懂事的小屁孩 android动画
前几天弄alertdialog和popupwindow的时候，用到了android的动画效果，今天专门研究了一下关于android的动画效果，列出来，方便以后使用。 Android 平台提供了两类动画。一类是Tween动画，就是对场景里的对象不断的进行图像变化来产生动画效果（旋转、平移、放缩和渐变）。第二类就是 Frame动画，即顺序的播放事先做好的图像，与gif图片原理类似。
js delete 删除机理以及它的内存泄露问题的解决方案换个号韩国红果果 JavaScript
delete删除属性时只是解除了属性与对象的绑定，故当属性值为一个对象时，删除时会造成内存泄露（其实还未删除）举例： var person={name:{firstname:'bob'}} var p=person.name delete person.name p.firstname -->'bob' // 依然可以访问p.firstname，存在内存泄露
Oracle将零干预分析加入网络即服务计划蓝儿唯美 oracle
由Oracle通信技术部门主导的演示项目并没有在本月较早前法国南斯举行的行业集团TM论坛大会中获得嘉奖。但是，Oracle通信官员解雇致力于打造一个支持零干预分配和编制功能的网络即服务（NaaS）平台，帮助企业以更灵活和更适合云的方式实现通信服务提供商（CSP）的连接产品。这个Oracle主导的项目属于TM Forum Live!活动上展示的Catalyst计划的19个项目之一。Catalyst计
spring学习——springmvc（二） a-john springMVC
Spring MVC提供了非常方便的文件上传功能。 1，配置Spring支持文件上传： DispatcherServlet本身并不知道如何处理multipart的表单数据，需要一个multipart解析器把POST请求的multipart数据中抽取出来，这样DispatcherServlet就能将其传递给我们的控制器了。为了在Spring中注册multipart解析器，需要声明一个实现了Mul
POJ-2828-Buy Tickets aijuans ACM_POJ
POJ-2828-Buy Tickets http://poj.org/problem?id=2828 线段树，逆序插入 #include<iostream>#include<cstdio>#include<cstring>#include<cstdlib>using namespace std;#define N 200010struct
Java Ant build.xml详解 asia007 build.xml
1,什么是antant是构建工具2,什么是构建概念到处可查到，形象来说，你要把代码从某个地方拿来，编译，再拷贝到某个地方去等等操作，当然不仅与此，但是主要用来干这个3,ant的好处跨平台 --因为ant是使用java实现的，所以它跨平台使用简单--与ant的兄弟make比起来语法清晰--同样是和make相比功能强大--ant能做的事情很多，可能你用了很久，你仍然不知道它能有
android按钮监听器的四种技术百合不是茶 android xml配置监听器实现接口
android开发中经常会用到各种各样的监听器,android监听器的写法与java又有不同的地方; 1,activity中使用内部类实现接口 ,创建内部类实例使用add方法与java类似创建监听器的实例 myLis lis = new myLis(); 使用add方法给按钮添加监听器
软件架构师不等同于资深程序员 bijian1013 程序员架构师架构设计
本文的作者Armel Nene是ETAPIX Global公司的首席架构师，他居住在伦敦，他参与过的开源项目包括 Apache Lucene,，Apache Nutch， Liferay 和 Pentaho等。如今很多的公司
TeamForge Wiki Syntax & CollabNet User Information Center sunjing TeamForge How do Attachement Anchor Wiki Syntax
the CollabNet user information center http://help.collab.net/ How do I create a new Wiki page? A CollabNet TeamForge project can have any number of Wiki pages. All Wiki pages are linked, and
【Redis四】Redis数据类型 bit1129 redis
概述 Redis是一个高性能的数据结构服务器，称之为数据结构服务器的原因是，它提供了丰富的数据类型以满足不同的应用场景，本文对Redis的数据类型以及对这些类型可能的操作进行总结。 Redis常用的数据类型包括string、set、list、hash以及sorted set.Redis本身是K/V系统，这里的数据类型指的是value的类型，而不是key的类型，key的类型只有一种即string
SSH2整合-附源码白糖_ eclipse spring tomcat Hibernate Google
今天用eclipse终于整合出了struts2+hibernate+spring框架。我创建的是tomcat项目，需要有tomcat插件。导入项目以后，鼠标右键选择属性，然后再找到“tomcat”项，勾选一下“Is a tomcat project”即可。具体方法见源码里的jsp图片，sql也在源码里。补充1：项目中部分jar包不是最新版的，可能导
[转]开源项目代码的学习方法 braveCS 学习方法
转自： http://blog.sina.com.cn/s/blog_693458530100lk5m.html http://www.cnblogs.com/west-link/archive/2011/06/07/2074466.html 1）阅读features。以此来搞清楚该项目有哪些特性2）思考。想想如果自己来做有这些features的项目该如何构架3）下载并安装d
编程之美-子数组的最大和（二维） bylijinnan 编程之美
package beautyOfCoding; import java.util.Arrays; import java.util.Random; public class MaxSubArraySum2 { /** * 编程之美子数组之和的最大值（二维） */ private static final int ROW = 5; private stat
读书笔记-3 chengxuyuancsdn jquery笔记 resultMap配置 ibatis一对多配置
1、resultMap配置 2、ibatis一对多配置 3、jquery笔记 1、resultMap配置当<select resultMap="topic_data"> <resultMap id="topic_data">必须一一对应。 (1)<resultMap class="tblTopic&q
[物理与天文]物理学新进展 comsci
如果我们必须获得某种地球上没有的矿石,才能够进行某些能量输出装置的设计和建造,而要获得这种矿石,又必须首先进行深空探测,而要进行深空探测,又必须获得这种能量输出装置,这个矛盾的循环,会导致地球联盟在与宇宙文明建立关系的时候,陷入困境怎么办呢?
Oracle 11g新特性:Automatic Diagnostic Repository daizj oracle ADR
Oracle Database 11g的FDI（Fault Diagnosability Infrastructure）是自动化诊断方面的又一增强。 FDI的一个关键组件是自动诊断库（Automatic Diagnostic Repository-ADR）。在oracle 11g中，alert文件的信息是以xml的文件格式存在的，另外提供了普通文本格式的alert文件。这两份log文
简单排序:选择排序 dieslrae 选择排序
public void selectSort(int[] array){ int select; for(int i=0;i<array.length;i++){ select = i; for(int k=i+1;k<array.leng
C语言学习六指针的经典程序，互换两个数字 dcj3sjt126com c
示例程序，swap_1和swap_2都是错误的，推理从1开始推到2，2没完成，推到3就完成了 # include <stdio.h> void swap_1(int, int); void swap_2(int *, int *); void swap_3(int *, int *); int main(void) { int a = 3; int b =
php 5.4中php-fpm 的重启、终止操作命令 dcj3sjt126com PHP
php 5.4中php-fpm 的重启、终止操作命令: 查看php运行目录命令：which php/usr/bin/php 查看php-fpm进程数：ps aux | grep -c php-fpm 查看运行内存/usr/bin/php -i|grep mem 重启php-fpm/etc/init.d/php-fpm restart 在phpinfo()输出内容可以看到php
线程同步工具类 shuizhaosi888 同步工具类
同步工具类包括信号量（Semaphore）、栅栏（barrier）、闭锁（CountDownLatch）闭锁（CountDownLatch） public class RunMain { public long timeTasks(int nThreads, final Runnable task) throws InterruptedException { fin
bleeding edge是什么意思 haojinghua DI
不止一次，看到很多讲技术的文章里面出现过这个词语。今天终于弄懂了——通过朋友给的浏览软件，上了wiki。我再一次感到，没有辞典能像WiKi一样，给出这样体贴人心、一清二楚的解释了。为了表达我对WiKi的喜爱，只好在此一一中英对照，给大家上次课。 In computer science, bleeding edge is a term that
c中实现utf8和gbk的互转 jimmee c iconv utf8&gbk编码
#include <iconv.h> #include <stdlib.h> #include <stdio.h> #include <unistd.h> #include <fcntl.h> #include <string.h> #include <sys/stat.h> int code_c
大型分布式网站架构设计与实践 lilin530 应用服务器搜索引擎
1.大型网站软件系统的特点？ a.高并发，大流量。 b.高可用。 c.海量数据。 d.用户分布广泛，网络情况复杂。 e.安全环境恶劣。 f.需求快速变更，发布频繁。 g.渐进式发展。 2.大型网站架构演化发展历程？ a.初始阶段的网站架构。应用程序，数据库，文件等所有的资源都在一台服务器上。 b.应用服务器和数据服务器分离。 c.使用缓存改善网站性能。 d.使用应用
在代码中获取Android theme中的attr属性值 OliveExcel android theme
Android的Theme是由各种attr组合而成, 每个attr对应了这个属性的一个引用, 这个引用又可以是各种东西. 在某些情况下, 我们需要获取非自定义的主题下某个属性的内容 (比如拿到系统默认的配色colorAccent), 操作方式举例一则: int defaultColor = 0xFF000000; int[] attrsArray = { andorid.r.
基于Zookeeper的分布式共享锁 roadrunners zookeeper 分布式共享锁
首先，说说我们的场景，订单服务是做成集群的，当两个以上结点同时收到一个相同订单的创建指令，这时并发就产生了，系统就会重复创建订单。等等......场景。这时，分布式共享锁就闪亮登场了。共享锁在同一个进程中是很容易实现的，但在跨进程或者在不同Server之间就不好实现了。Zookeeper就很容易实现。具体的实现原理官网和其它网站也有翻译，这里就不在赘述了。官
两个容易被忽略的MySQL知识 tomcat_oracle mysql
1、varchar(5)可以存储多少个汉字，多少个字母数字？　　相信有好多人应该跟我一样，对这个已经很熟悉了，根据经验我们能很快的做出决定，比如说用varchar(200)去存储url等等，但是，即使你用了很多次也很熟悉了，也有可能对上面的问题做出错误的回答。　　这个问题我查了好多资料，有的人说是可以存储5个字符，2.5个汉字（每个汉字占用两个字节的话），有的人说这个要区分版本，5.0
zoj 3827 Information Entropy(水题) 阿尔萨斯 format
题目链接：zoj 3827 Information Entropy 题目大意：三种底，计算和。解题思路：调用库函数就可以直接算了，不过要注意Pi = 0的时候，不过它题目里居然也讲了。。。limp→0+plogb(p)=0，因为p是logp的高阶。 #include <cstdio> #include <cstring> #include <cmath&