通过这个文件获取相机的translation和rotation, camera_intrinsic
calibrated_sensor {
"token": <str> -- Unique record identifier.
"sensor_token": <str> -- Foreign key pointing to the sensor type.
"translation": <float> [3] -- Coordinate system origin in meters: x, y, z.
"rotation": <float> [4] -- Coordinate system orientation as quaternion: w, x, y, z.
"camera_intrinsic": <float> [3, 3] -- Intrinsic camera calibration. Empty for sensors that are not cameras.
sample_data {
"token": <str> -- Unique record identifier.
"sample_token": <str> -- Foreign key. Sample to which this sample_data is associated.
"ego_pose_token": <str> -- Foreign key.
"calibrated_sensor_token": <str> -- Foreign key.
"filename": <str> -- Relative path to data-blob on disk.
"fileformat": <str> -- Data file format.
"width": <int> -- If the sample data is an image, this is the image width in pixels.
"height": <int> -- If the sample data is an image, this is the image height in pixels.
"timestamp": <int> -- Unix time stamp.
"is_key_frame": <bool> -- True if sample_data is part of key_frame, else False.
"next": <str> -- Foreign key. Sample data from the same sensor that follows this in time. Empty if end of scene.
"prev": <str> -- Foreign key. Sample data from the same sensor that precedes this in time. Empty if start of scene.
size: bbox的wlh
sample_annotation {
"translation": <float> [3] -- Bounding box location in meters as center_x, center_y, center_z.
"size": <float> [3] -- Bounding box size in meters as width, length, height.
"rotation": <float> [4] -- Bounding box orientation as quaternion: w, x, y, z.
_, boxes, camera_intrinsic = nusc.get_sample_data(
image_token, box_vis_level=BoxVisibility.ANY)
calib = np.eye(4, dtype=np.float32)
calib[:3, :3] = camera_intrinsic
calib = calib[:3]
def get_sample_data(self, sample_data_token: str,
box_vis_level: BoxVisibility = BoxVisibility.ANY,
selected_anntokens: List[str] = None,
use_flat_vehicle_coordinates: bool = False) -> \
Tuple[str, List[Box], np.array]:
Returns the data path as well as all annotations related to that sample_data.
Note that the boxes are transformed into the current sensor's coordinate frame.
:param sample_data_token: Sample_data token.
:param box_vis_level: If sample_data is an image, this sets required visibility for boxes.
:param selected_anntokens: If provided only return the selected annotation.
:param use_flat_vehicle_coordinates: Instead of the current sensor's coordinate frame, use ego frame which is
aligned to z-plane in the world.
:return: (data_path, boxes, camera_intrinsic )
# Retrieve sensor & pose records
sd_record = self.get('sample_data', sample_data_token)
# 1. calibrated_sensor
cs_record = self.get('calibrated_sensor', sd_record['calibrated_sensor_token'])
sensor_record = self.get('sensor', cs_record['sensor_token'])
pose_record = self.get('ego_pose', sd_record['ego_pose_token'])
data_path = self.get_sample_data_path(sample_data_token)
if sensor_record['modality'] == 'camera':
# 2. 获取camera下的内置参数cam_intrinsic
cam_intrinsic = np.array(cs_record['camera_intrinsic'])
imsize = (sd_record['width'], sd_record['height'])
cam_intrinsic = None
imsize = None
# Retrieve all sample annotations and map to sensor coordinate system.
if selected_anntokens is not None:
boxes = list(map(self.get_box, selected_anntokens))
boxes = self.get_boxes(sample_data_token)
# Make list of Box objects including coord system transforms.
box_list = []
for box in boxes:
if use_flat_vehicle_coordinates:
# Move box to ego vehicle coord system parallel to world z plane.
yaw = Quaternion(pose_record['rotation']).yaw_pitch_roll[0]
box.rotate(Quaternion(scalar=np.cos(yaw / 2), vector=[0, 0, np.sin(yaw / 2)]).inverse)
# Move box to ego vehicle coord system.
# Move box to sensor coord system.
if sensor_record['modality'] == 'camera' and not \
box_in_image(box, cam_intrinsic, imsize, vis_level=box_vis_level):
return data_path, box_list, cam_intrinsic
v =, np.array([1, 0, 0]))
yaw = -np.arctan2(v[2], v[0])
alpha = _rot_y2alpha(yaw, (bbox[0] + bbox[2]) / 2, camera_intrinsic[0, 2], camera_intrinsic[0, 0])
ann['bbox'] = [bbox[0], bbox[1], bbox[2] - bbox[0], bbox[3] - bbox[1]]
ann['area'] = (bbox[2] - bbox[0]) * (bbox[3] - bbox[1])
ann['alpha'] = alpha
这里的box为Box类。用的是四元数旋转矩阵做的三维空间转换。其中orientation: Quaternion是sample_annotation.json文件中读取的rotation。
def _rot_y2alpha(rot_y, x, cx, fx):
Get rotation_y by alpha + theta - 180
alpha : Observation angle of object, ranging [-pi..pi]
x : Object center x to the camera center (x-W/2), in pixels
rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
alpha = rot_y - np.arctan2(x - cx, fx)
if alpha > np.pi:
alpha -= 2 * np.pi
if alpha < -np.pi:
alpha += 2 * np.pi
return alpha
# 通过3D中心点(x,y,z)和相机内置参数calib,做矩阵变换,得到该3D中心点在二维图像上的位置(x',y')
amodel_center = project_to_image(
np.array([[0],[1] - box.wlh[2] / 2,[2]],
np.float32).reshape(1, 3), calib)[0].tolist()
def project_to_image(pts_3d, P):
# pts_3d: n x 3
# P: 3 x 4
# return: n x 2
pts_3d_homo = np.concatenate(
[pts_3d, np.ones((pts_3d.shape[0], 1), dtype=np.float32)], axis=1)
pts_2d =, pts_3d_homo.transpose(1, 0)).transpose(1, 0)
pts_2d = pts_2d[:, :2] / pts_2d[:, 2:]
return pts_2d
ann = {
'id': num_anns,
'image_id': num_images,
'category_id': category_id,
'dim': [box.wlh[2], box.wlh[0], box.wlh[1]],
'location': [[0],[1],[2]],
'occluded': 0,
'truncated': 0,
'rotation_y': yaw,
'amodel_center': amodel_center,
'iscrowd': 0,
'track_id': track_id,
'attributes': ATTRIBUTE_TO_ID[att],
'velocity': vel
bbox = KittiDB.project_kitti_box_to_image(
copy.deepcopy(box), camera_intrinsic, imsize=(1600, 900))
def project_kitti_box_to_image(box: Box, p_left: np.ndarray, imsize: Tuple[int, int]) \
-> Union[None, Tuple[int, int, int, int]]:
Projects 3D box into KITTI image FOV.
:param box: 3D box in KITTI reference frame.
:param p_left: . Projection matrix.
:param imsize: (width, height). Image size.
:return: (xmin, ymin, xmax, ymax). Bounding box in image plane or None if box is not in the image.
# Create a new box.
# box = box.copy()
# KITTI defines the box center as the bottom center of the object.
# We use the true center, so we need to adjust half height in negative y direction.
box.translate(np.array([0, -box.wlh[2] / 2, 0]))
# Check that some corners are inside the image.
corners = np.array([corner for corner in box.corners().T if corner[2] > 0]).T
if len(corners) == 0:
return None
# Project corners that are in front of the camera to 2d to get bbox in pixel coords.
# 将camera下的3D的corner,通过相机内置参数camera_intrinsic转换成图片中的2D坐标。
# 从而确定2Dbbox。
imcorners = view_points(corners, p_left, normalize=True)[:2]
bbox = (np.min(imcorners[0]), np.min(imcorners[1]), np.max(imcorners[0]), np.max(imcorners[1]))
# Crop bbox to prevent it extending outside image.
bbox_crop = tuple(max(0, b) for b in bbox)
bbox_crop = (min(imsize[0], bbox_crop[0]),
min(imsize[0], bbox_crop[1]),
min(imsize[0], bbox_crop[2]),
min(imsize[1], bbox_crop[3]))
# Detect if a cropped box is empty.
if bbox_crop[0] >= bbox_crop[2] or bbox_crop[1] >= bbox_crop[3]:
return None
return bbox_crop
def view_points(points: np.ndarray, view: np.ndarray, normalize: bool) -> np.ndarray:
This is a helper class that maps 3d points to a 2d plane. It can be used to implement both perspective and orthographic projections. It first applies the dot product between the points and the view. By convention, the view should be such that the data is projected onto the first 2 axis. It then optionally applies a normalization along the third dimension.
For a perspective projection the view should be a 3x3 camera matrix, and normalize=True
For an orthographic projection with translation the view is a 3x4 matrix and normalize=False
For an orthographic projection without translation the view is a 3x3 matrix (optionally 3x4 with last columns
all zeros) and normalize=False
:param points: Matrix of points, where each point (x, y, z) is along each column.
:param view: . Defines an arbitrary projection (n <= 4).
The projection should be such that the corners are projected onto the first 2 axis.
:param normalize: Whether to normalize the remaining coordinate (along the third axis).
:return: . Mapped point. If normalize=False, the third coordinate is the height.
assert view.shape[0] <= 4
assert view.shape[1] <= 4
assert points.shape[0] == 3
viewpad = np.eye(4)
viewpad[:view.shape[0], :view.shape[1]] = view
nbr_points = points.shape[1]
# Do operation in homogenous coordinates.
points = np.concatenate((points, np.ones((1, nbr_points))))
# 最关键的运算:矩阵乘法
points =, points)
points = points[:3, :]
if normalize:
points = points / points[2:3, :].repeat(3, 0).reshape(3, nbr_points)
return points
def pre_process(self, image, scale, input_meta={}):
resized_image, c, s, inp_width, inp_height, height, width = \
trans_input = get_affine_transform(c, s, 0, [inp_width, inp_height])
out_height = inp_height // self.opt.down_ratio
out_width = inp_width // self.opt.down_ratio
trans_output = get_affine_transform(c, s, 0, [out_width, out_height])
# 进行仿射变换,获取固定的800x448的固定尺寸作为网络的输入
inp_image = cv2.warpAffine(
resized_image, trans_input, (inp_width, inp_height),
# 归一化
inp_image = ((inp_image / 255. - self.mean) / self.std).astype(np.float32)
images = inp_image.transpose(2, 0, 1).reshape(1, 3, inp_height, inp_width)
if self.opt.flip_test:
images = np.concatenate((images, images[:, :, :, ::-1]), axis=0)
images = torch.from_numpy(images)
meta = {'calib': np.array(input_meta['calib'], dtype=np.float32) \
if 'calib' in input_meta else \
self._get_default_calib(width, height)}
meta.update({'c': c, 's': s, 'height': height, 'width': width,
'out_height': out_height, 'out_width': out_width,
'inp_height': inp_height, 'inp_width': inp_width,
'trans_input': trans_input, 'trans_output': trans_output})
if 'pre_dets' in input_meta:
meta['pre_dets'] = input_meta['pre_dets']
if 'cur_dets' in input_meta:
meta['cur_dets'] = input_meta['cur_dets']
return images, meta
def process(self, images, pre_images=None, pre_hms=None,
pre_inds=None, return_time=False):
with torch.no_grad():
output = self.model(images, pre_images, pre_hms)[-1]
##output:['hm', 'reg', 'wh', 'dep', 'rot', 'dim', 'amodel_offset', 'pre_inds']
output = self._sigmoid_output(output)
output.update({'pre_inds': pre_inds})
if self.opt.flip_test:
output = self._flip_output(output)
forward_time = time.time()
#解码后的输出为['scores', 'clses', 'xs', 'ys', 'cts', 'bbox', 'dep', 'rot', 'dim', 'amodel_offset', 'pre_inds']
dets = generic_decode(output, K=self.opt.K, opt=self.opt)
for k in dets:
dets[k] = dets[k].detach().cpu().numpy()
if return_time:
return output, dets, forward_time
return output, dets
def _sigmoid_output(self, output):
if 'hm' in output:
output['hm'] = output['hm'].sigmoid_()
if 'hm_hp' in output:
output['hm_hp'] = output['hm_hp'].sigmoid_()
if 'dep' in output:
output['dep'] = 1. / (output['dep'].sigmoid() + 1e-6) - 1.
output['dep'] *= self.opt.depth_scale
return output
def generic_decode(output, K=100, opt=None):
if not ('hm' in output):
return {}
if opt.zero_tracking:
output['tracking'] *= 0
heat = output['hm']
batch, cat, height, width = heat.size()
# 'hm'经过nms及topk输出['scores', 'clses', 'xs', 'ys', 'cts']
heat = _nms(heat)
scores, inds, clses, ys0, xs0 = _topk(heat, K=K)
clses = clses.view(batch, K)
scores = scores.view(batch, K)
bboxes = None
cts =[xs0.unsqueeze(2), ys0.unsqueeze(2)], dim=2)
ret = {'scores': scores, 'clses': clses.float(),
'xs': xs0, 'ys': ys0, 'cts': cts}
if 'reg' in output:
reg = output['reg']
reg = _tranpose_and_gather_feat(reg, inds)
reg = reg.view(batch, K, 2)
xs = xs0.view(batch, K, 1) + reg[:, :, 0:1]
ys = ys0.view(batch, K, 1) + reg[:, :, 1:2]
xs = xs0.view(batch, K, 1) + 0.5
ys = ys0.view(batch, K, 1) + 0.5
if 'wh' in output:
wh = output['wh']
wh = _tranpose_and_gather_feat(wh, inds) # B x K x (F)
# wh = wh.view(batch, K, -1)
wh = wh.view(batch, K, 2)
wh[wh < 0] = 0
if wh.size(2) == 2 * cat: # cat spec
wh = wh.view(batch, K, -1, 2)
cats = clses.view(batch, K, 1, 1).expand(batch, K, 1, 2)
wh = wh.gather(2, cats.long()).squeeze(2) # B x K x 2
bboxes =[xs - wh[..., 0:1] / 2,
ys - wh[..., 1:2] / 2,
xs + wh[..., 0:1] / 2,
ys + wh[..., 1:2] / 2], dim=2)
ret['bboxes'] = bboxes
# print('ret bbox', ret['bboxes'])
if 'ltrb' in output:
ltrb = output['ltrb']
ltrb = _tranpose_and_gather_feat(ltrb, inds) # B x K x 4
ltrb = ltrb.view(batch, K, 4)
bboxes =[xs0.view(batch, K, 1) + ltrb[..., 0:1],
ys0.view(batch, K, 1) + ltrb[..., 1:2],
xs0.view(batch, K, 1) + ltrb[..., 2:3],
ys0.view(batch, K, 1) + ltrb[..., 3:4]], dim=2)
ret['bboxes'] = bboxes
regression_heads = ['tracking', 'dep', 'rot', 'dim', 'amodel_offset',
'nuscenes_att', 'velocity']
for head in regression_heads:
if head in output:
ret[head] = _tranpose_and_gather_feat(
output[head], inds).view(batch, K, -1)
return ret
output[‘hm’], batch[‘hm’], batch[‘ind’], batch[‘mask’], batch[‘cat’]
output['reg'], batch['reg_mask'], batch['ind'], batch['reg']
output['wh'], batch['wh_mask'], batch['ind'], batch['wh']
output['dep'], batch['dep_mask'], batch['ind'], batch['dep']
output['dim'], batch['dim_mask'], batch['ind'], batch['dim']
output['amodel_offset'], batch['amodel_offset_mask'], batch['ind'], batch['amodel_offset']
The classification are trained with softmax and the angular values are
trained with L1 loss
output['rot'], batch['rot_mask'], batch['ind'], batch['rotbin'], batch['rotres']
### init samples
self._init_ret(ret, gt_det)
calib = self._get_calib(img_info, width, height)
cls_id = int(self.cat_ids[ann['category_id']])
bbox, bbox_amodal = self._get_bbox_output(
ann['bbox'], trans_output, height, width)
def _add_instance(
self, ret, gt_det, k, cls_id, bbox, bbox_amodal, ann, trans_output,
aug_s, calib, pre_cts=None, track_ids=None):
h, w = bbox[3] - bbox[1], bbox[2] - bbox[0]
if h <= 0 or w <= 0:
radius = gaussian_radius((math.ceil(h), math.ceil(w)))
radius = max(0, int(radius))
ct = np.array(
[(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2], dtype=np.float32)
ct_int = ct.astype(np.int32)
ret['cat'][k] = cls_id - 1
ret['mask'][k] = 1
if 'wh' in ret:
ret['wh'][k] = 1. * w, 1. * h
ret['wh_mask'][k] = 1
ret['ind'][k] = ct_int[1] * self.opt.output_w + ct_int[0]
ret['reg'][k] = ct - ct_int
ret['reg_mask'][k] = 1
draw_umich_gaussian(ret['hm'][cls_id - 1], ct_int, radius)
np.array([ct[0] - w / 2, ct[1] - h / 2,
ct[0] + w / 2, ct[1] + h / 2], dtype=np.float32))
gt_det['clses'].append(cls_id - 1)
if 'rot' in self.opt.heads:
self._add_rot(ret, ann, k, gt_det)
if 'dep' in self.opt.heads:
if 'depth' in ann:
ret['dep_mask'][k] = 1
ret['dep'][k] = ann['depth'] * aug_s
if 'dim' in self.opt.heads:
if 'dim' in ann:
ret['dim_mask'][k] = 1
ret['dim'][k] = ann['dim']
if 'amodel_offset' in self.opt.heads:
if 'amodel_center' in ann:
amodel_center = affine_transform(ann['amodel_center'], trans_output)
ret['amodel_offset_mask'][k] = 1
ret['amodel_offset'][k] = amodel_center - ct_int
gt_det['amodel_offset'].append([0, 0])
def _add_rot(self, ret, ann, k, gt_det):
if 'alpha' in ann:
ret['rot_mask'][k] = 1
alpha = ann['alpha']
if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.:
ret['rotbin'][k, 0] = 1
ret['rotres'][k, 0] = alpha - (-0.5 * np.pi)
if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.:
ret['rotbin'][k, 1] = 1
ret['rotres'][k, 1] = alpha - (0.5 * np.pi)
def _alpha_to_8(self, alpha):
ret = [0, 0, 0, 1, 0, 0, 0, 1]
if alpha < np.pi / 6. or alpha > 5 * np.pi / 6.:
r = alpha - (-0.5 * np.pi)
ret[1] = 1
ret[2], ret[3] = np.sin(r), np.cos(r)
if alpha > -np.pi / 6. or alpha < -5 * np.pi / 6.:
r = alpha - (0.5 * np.pi)
ret[5] = 1
ret[6], ret[7] = np.sin(r), np.cos(r)
return ret
def post_process(self, dets, meta, scale=1):
dets = generic_post_process(
self.opt, dets, [meta['c']], [meta['s']],
meta['out_height'], meta['out_width'], self.opt.num_classes,
[meta['calib']], meta['height'], meta['width'])
self.this_calib = meta['calib']
if scale != 1:
for i in range(len(dets[0])):
for k in ['bbox', 'hps']:
if k in dets[0][i]:
dets[0][i][k] = (np.array(
dets[0][i][k], np.float32) / scale).tolist()
return dets[0]
def generic_post_process(
opt, dets, c, s, h, w, num_classes, calibs=None, height=-1, width=-1):
if not ('scores' in dets):
return [{}], [{}]
ret = []
for i in range(len(dets['scores'])):
preds = []
trans = get_affine_transform(
c[i], s[i], 0, (w, h), inv=1).astype(np.float32)
for j in range(len(dets['scores'][i])):
if dets['scores'][i][j] < opt.out_thresh:
item = {}
item['score'] = dets['scores'][i][j]
item['class'] = int(dets['clses'][i][j]) + 1
item['ct'] = transform_preds_with_trans(
(dets['cts'][i][j]).reshape(1, 2), trans).reshape(2)
if 'tracking' in dets:
tracking = transform_preds_with_trans(
(dets['tracking'][i][j] + dets['cts'][i][j]).reshape(1, 2),
item['tracking'] = tracking - item['ct']
if 'bboxes' in dets:
bbox = transform_preds_with_trans(
dets['bboxes'][i][j].reshape(2, 2), trans).reshape(4)
item['bbox'] = bbox
if 'dep' in dets and len(dets['dep'][i]) > j:
item['dep'] = dets['dep'][i][j]
if 'dim' in dets and len(dets['dim'][i]) > j:
# item['dim'] = dets['dim'][i][j]
item['dim'] = np.maximum(dets['dim'][i][j], 0)
#由rot获取alpha, arctan2()+/- 0.5*np.pi
if 'rot' in dets and len(dets['rot'][i]) > j:
item['alpha'] = get_alpha(dets['rot'][i][j:j+1])[0]
if 'rot' in dets and 'dep' in dets and 'dim' in dets \
and len(dets['dep'][i]) > j:
if 'amodel_offset' in dets and len(dets['amodel_offset'][i]) > j:
ct_output = dets['bboxes'][i][j].reshape(2, 2).mean(axis=0)
amodel_ct_output = ct_output + dets['amodel_offset'][i][j]
ct = transform_preds_with_trans(
amodel_ct_output.reshape(1, 2), trans).reshape(2).tolist()
bbox = item['bbox']
ct = [(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2]
item['ct'] = ct
#center, alpha, dim, dep, calib获取loc和rot_y
item['loc'], item['rot_y'] = ddd2locrot(
ct, item['alpha'], item['dim'], item['dep'], calibs[i])
#最终输出['score', 'class', 'ct', 'bbox', 'dep', 'dim', 'alpha', 'loc', 'rot_y']
return ret
def get_alpha(rot):
# output: (B, 8) [bin1_cls[0], bin1_cls[1], bin1_sin, bin1_cos,
# bin2_cls[0], bin2_cls[1], bin2_sin, bin2_cos]
# return rot[:, 0]
idx = rot[:, 1] > rot[:, 5]
alpha1 = np.arctan2(rot[:, 2], rot[:, 3]) + (-0.5 * np.pi)
alpha2 = np.arctan2(rot[:, 6], rot[:, 7]) + ( 0.5 * np.pi)
return alpha1 * idx + alpha2 * (1 - idx)
def ddd2locrot(center, alpha, dim, depth, calib):
# single image
#center, depth, calib获取loc,即3D的中心点
locations = unproject_2d_to_3d(center, depth, calib)
locations[1] += dim[0] / 2
#alpha, center, calib获取rot_y
rotation_y = alpha2rot_y(alpha, center[0], calib[0, 2], calib[0, 0])
return locations, rotation_y
center, depth, calib获取loc,即3D的中心点
def unproject_2d_to_3d(pt_2d, depth, P):
# pts_2d: 2
# depth: 1
# P: 3 x 4
# return: 3
z = depth - P[2, 3]
x = (pt_2d[0] * depth - P[0, 3] - P[0, 2] * z) / P[0, 0]
y = (pt_2d[1] * depth - P[1, 3] - P[1, 2] * z) / P[1, 1]
pt_3d = np.array([x, y, z], dtype=np.float32).reshape(3)
return pt_3d
alpha, center, calib获取rot_y
def alpha2rot_y(alpha, x, cx, fx):
Get rotation_y by alpha + theta - 180
alpha : Observation angle of object, ranging [-pi..pi]
x : Object center x to the camera center (x-W/2), in pixels
rotation_y : Rotation ry around Y-axis in camera coordinates [-pi..pi]
rot_y = alpha + np.arctan2(x - cx, fx)
if rot_y > np.pi:
rot_y -= 2 * np.pi
if rot_y < -np.pi:
rot_y += 2 * np.pi
return rot_y
def add_3d_detection(
self, image_or_path, flipped, dets, calib, show_txt=False,
vis_thresh=0.3, img_id='det'):
if isinstance(image_or_path, np.ndarray):
self.imgs[img_id] = image_or_path.copy()
self.imgs[img_id] = cv2.imread(image_or_path)
# thickness = 1
if self.opt.show_track_color:
# self.imgs[img_id] = (self.imgs[img_id] * 0.5 + \
# np.ones_like(self.imgs[img_id]) * 255 * 0.5).astype(np.uint8)
# thickness = 3
if flipped:
self.imgs[img_id] = self.imgs[img_id][:, ::-1].copy()
for item in dets:
if item['score'] > vis_thresh \
and 'dim' in item and 'loc' in item and 'rot_y' in item:
cl = (self.colors[int(item['class']) - 1, 0, 0]).tolist() \
if not self.opt.show_track_color else \
if self.theme == 'white' and not self.opt.show_track_color:
cl = (255 - np.array(cl)).tolist()
if self.opt.tango_color:
cl = (255 - tango_color_dark[int(item['class']) - 1, 0, 0]).tolist()
dim = item['dim']
loc = item['loc']
rot_y = item['rot_y']
if loc[2] > 1:
box_3d = compute_box_3d(dim, loc, rot_y)
box_2d = project_to_image(box_3d, calib)
self.imgs[img_id] = draw_box_3d(
self.imgs[img_id], box_2d.astype(np.int32), cl,
same_color=self.opt.show_track_color or self.opt.qualitative)
if self.opt.show_track_color or self.opt.qualitative:
bbox = [box_2d[:,0].min(), box_2d[:,1].min(),
box_2d[:,0].max(), box_2d[:,1].max()]
sc = int(item['tracking_id']) if self.opt.show_track_color else \
bbox, item['class'] - 1, sc, no_bbox=True, img_id=img_id)
if self.opt.show_track_color:
self.add_arrow([(bbox[0] + bbox[2]) / 2, (bbox[1] + bbox[3]) / 2],
item['tracking'], img_id=img_id)
def compute_box_3d(dim, location, rotation_y):
# dim: 3
# location: 3
# rotation_y: 1
# return: 8 x 3
corners_3d = comput_corners_3d(dim, rotation_y)
corners_3d = corners_3d + np.array(location, dtype=np.float32).reshape(1, 3)
return corners_3d
def comput_corners_3d(dim, rotation_y):
# dim: 3
# location: 3
# rotation_y: 1
# return: 8 x 3
c, s = np.cos(rotation_y), np.sin(rotation_y)
R = np.array([[c, 0, s], [0, 1, 0], [-s, 0, c]], dtype=np.float32)
l, w, h = dim[2], dim[1], dim[0]
x_corners = [l/2, l/2, -l/2, -l/2, l/2, l/2, -l/2, -l/2]
y_corners = [0,0,0,0,-h,-h,-h,-h]
z_corners = [w/2, -w/2, -w/2, w/2, w/2, -w/2, -w/2, w/2]
corners = np.array([x_corners, y_corners, z_corners], dtype=np.float32)
corners_3d =, corners).transpose(1, 0)
return corners_3d
