3.3 、gt.yml
python compute_gt_poses.py LINEMOD/duck2
python register_scene.py LINEMOD/duck2
python register_segmented.py LINEMOD/duck2
python create_label_files.py LINEMOD/duck2
代码参考bop_toolkit: https://github.com/thodan/bop_toolkit/tree/master
import yaml
import numpy as np
import struct
import math
import trimesh
def load_ply(path):
"""Loads a 3D mesh model from a PLY file.
:param path: Path to a PLY file.
:return: The loaded model given by a dictionary with items:
- 'pts' (nx3 ndarray)
- 'normals' (nx3 ndarray), optional
- 'colors' (nx3 ndarray), optional
- 'faces' (mx3 ndarray), optional
- 'texture_uv' (nx2 ndarray), optional
- 'texture_uv_face' (mx6 ndarray), optional
- 'texture_file' (string), optional
f = open(path, 'rb')
# Only triangular faces are supported.
face_n_corners = 3
n_pts = 0
n_faces = 0
pt_props = []
face_props = []
is_binary = False
header_vertex_section = False
header_face_section = False
texture_file = None
# Read the header.
while True:
# Strip the newline character(s).
line = f.readline().decode('utf8').rstrip('\n').rstrip('\r')
if line.startswith('comment TextureFile'):
texture_file = line.split()[-1]
elif line.startswith('element vertex'):
n_pts = int(line.split()[-1])
header_vertex_section = True
header_face_section = False
elif line.startswith('element face'):
n_faces = int(line.split()[-1])
header_vertex_section = False
header_face_section = True
elif line.startswith('element'): # Some other element.
header_vertex_section = False
header_face_section = False
elif line.startswith('property') and header_vertex_section:
# (name of the property, data type)
pt_props.append((line.split()[-1], line.split()[-2]))
elif line.startswith('property list') and header_face_section:
elems = line.split()
if elems[-1] == 'vertex_indices' or elems[-1] == 'vertex_index':
# (name of the property, data type)
face_props.append(('n_corners', elems[2]))
for i in range(face_n_corners):
face_props.append(('ind_' + str(i), elems[3]))
elif elems[-1] == 'texcoord':
# (name of the property, data type)
face_props.append(('texcoord', elems[2]))
for i in range(face_n_corners * 2):
face_props.append(('texcoord_ind_' + str(i), elems[3]))
print('Warning: Not supported face property: ' + elems[-1])
elif line.startswith('format'):
if 'binary' in line:
is_binary = True
elif line.startswith('end_header'):
# Prepare data structures.
model = {}
if texture_file is not None:
model['texture_file'] = texture_file
model['pts'] = np.zeros((n_pts, 3), np.float64)
if n_faces > 0:
model['faces'] = np.zeros((n_faces, face_n_corners), np.float64)
pt_props_names = [p[0] for p in pt_props]
face_props_names = [p[0] for p in face_props]
is_normal = False
if {'nx', 'ny', 'nz'}.issubset(set(pt_props_names)):
is_normal = True
model['normals'] = np.zeros((n_pts, 3), np.float64)
is_color = False
if {'red', 'green', 'blue'}.issubset(set(pt_props_names)):
is_color = True
model['colors'] = np.zeros((n_pts, 3), np.float64)
is_texture_pt = False
if {'texture_u', 'texture_v'}.issubset(set(pt_props_names)):
is_texture_pt = True
model['texture_uv'] = np.zeros((n_pts, 2), np.float64)
is_texture_face = False
if {'texcoord'}.issubset(set(face_props_names)):
is_texture_face = True
model['texture_uv_face'] = np.zeros((n_faces, 6), np.float64)
# Formats for the binary case.
formats = {
'float': ('f', 4),
'double': ('d', 8),
'int': ('i', 4),
'uchar': ('B', 1)
# Load vertices.
for pt_id in range(n_pts):
prop_vals = {}
load_props = ['x', 'y', 'z', 'nx', 'ny', 'nz',
'red', 'green', 'blue', 'texture_u', 'texture_v']
if is_binary:
for prop in pt_props:
format = formats[prop[1]]
read_data = f.read(format[1])
val = struct.unpack(format[0], read_data)[0]
if prop[0] in load_props:
prop_vals[prop[0]] = val
elems = f.readline().decode('utf8').rstrip('\n').rstrip('\r').split()
for prop_id, prop in enumerate(pt_props):
if prop[0] in load_props:
prop_vals[prop[0]] = elems[prop_id]
model['pts'][pt_id, 0] = float(prop_vals['x'])
model['pts'][pt_id, 1] = float(prop_vals['y'])
model['pts'][pt_id, 2] = float(prop_vals['z'])
if is_normal:
model['normals'][pt_id, 0] = float(prop_vals['nx'])
model['normals'][pt_id, 1] = float(prop_vals['ny'])
model['normals'][pt_id, 2] = float(prop_vals['nz'])
if is_color:
model['colors'][pt_id, 0] = float(prop_vals['red'])
model['colors'][pt_id, 1] = float(prop_vals['green'])
model['colors'][pt_id, 2] = float(prop_vals['blue'])
if is_texture_pt:
model['texture_uv'][pt_id, 0] = float(prop_vals['texture_u'])
model['texture_uv'][pt_id, 1] = float(prop_vals['texture_v'])
# Load faces.
for face_id in range(n_faces):
prop_vals = {}
if is_binary:
for prop in face_props:
format = formats[prop[1]]
val = struct.unpack(format[0], f.read(format[1]))[0]
if prop[0] == 'n_corners':
if val != face_n_corners:
raise ValueError('Only triangular faces are supported.')
elif prop[0] == 'texcoord':
if val != face_n_corners * 2:
raise ValueError('Wrong number of UV face coordinates.')
prop_vals[prop[0]] = val
elems = f.readline().decode('utf8').rstrip('\n').rstrip('\r').split()
for prop_id, prop in enumerate(face_props):
if prop[0] == 'n_corners':
if int(elems[prop_id]) != face_n_corners:
raise ValueError('Only triangular faces are supported.')
elif prop[0] == 'texcoord':
if int(elems[prop_id]) != face_n_corners * 2:
raise ValueError('Wrong number of UV face coordinates.')
prop_vals[prop[0]] = elems[prop_id]
model['faces'][face_id, 0] = int(prop_vals['ind_0'])
model['faces'][face_id, 1] = int(prop_vals['ind_1'])
model['faces'][face_id, 2] = int(prop_vals['ind_2'])
if is_texture_face:
for i in range(6):
model['texture_uv_face'][face_id, i] = float(
return model
def calc_pts_diameter(pts):
"""Calculates the diameter of a set of 3D points (i.e. the maximum distance
between any two points in the set).
:param pts: nx3 ndarray with 3D points.
:return: The calculated diameter.
diameter = -1.0
for pt_id in range(pts.shape[0]):
pt_dup = np.tile(np.array([pts[pt_id, :]]), [pts.shape[0] - pt_id, 1])
pts_diff = pt_dup - pts[pt_id:, :]
max_dist = math.sqrt((pts_diff * pts_diff).sum(axis=1).max())
if max_dist > diameter:
diameter = max_dist
return diameter
def distance(point_one, point_two):
return ((point_one[0] - point_two[0]) ** 2 +
(point_one[1] - point_two[1]) ** 2 + (point_one[2] - point_two[2]) ** 2) ** 0.5
def max_distance(points):
return max(distance(p1, p2) for p1, p2 in zip(points, points[1:]))
def calc_pts_diameter1(path):
mesh = trimesh.load(path)
vertices = mesh.vertices
maxD = max_distance(vertices.tolist())
return maxD
if __name__=="__main__":
obj_ids =[1]
model_lpath = r"models/obj_01.ply"
models_info = {}
for obj_id in obj_ids:
print('Processing model of object {}...'.format(obj_id))
model = load_ply(model_lpath)
ref_pt = model['pts'].min(axis=0).flatten()
size = model['pts'].max(axis=0) - ref_pt
# Calculated diameter.
diameter = calc_pts_diameter(model['pts'])
#diameter = calc_pts_diameter1(model_lpath)
models_info[obj_id] = {
'min_x': float(ref_pt[0]), 'min_y': float(ref_pt[1]), 'min_z': float(ref_pt[2]),
'size_x': float(size[0]), 'size_y': float(size[1]), 'size_z': float(size[2]),
'diameter': float(diameter)
# Save the calculated info about the object models.
#inout.save_json(dp_model['models_info_path'], models_info)
with open(models_info_path, 'w') as f:
yaml.dump(models_info, f)
import numpy as np
import os
import json
import yaml
pose = 'transforms'
savepath = 'scene_camera.json'
s2 = 'info.yml'
camepath = 'intrinsics.json'
obj_id = 1
with open(camepath, 'r') as f:
content = f.read()
loadData = json.loads(content)
data = np.zeros(9)
data[0] = float(loadData['fx'])
data[2] = float(loadData['ppx'])
data[4] = float(loadData['fy'])
data[5] = float(loadData['ppy'])
data[8] = 1.0
#data = np.loadtxt(camepath).flatten()
files = os.listdir(pose)
scene_gt_info = {}
l = len(files)
for name in files:
i = int(name.split(".")[0])#int(name.split(".")[0][4:])
# scene_gt_info[i] = []
'cam_K': [float(e) for e in data],
'depth_scale': float(float(loadData['depth_scale'])),
with open(savepath, 'w') as f:
if isinstance(scene_gt_info, dict):
content_sorted = sorted(scene_gt_info.items(), key=lambda x: x[0])
for elem_id, (k, v) in enumerate(content_sorted):
f.write(' \"{}\": {}'.format(k, json.dumps(v, sort_keys=True)))
if elem_id != len(scene_gt_info) - 1:
with open(s2, 'w') as f:
yaml.dump(scene_gt_info, f)
import numpy as np
import os
import json
pose = r'transforms'
savepath = r'scene_gt.json'
obj_id = 1
files = os.listdir(pose)
scene_gt_info = {}
for name in files:
i = int(name.split(".")[0])
scene_gt_info[i] = []
loadData = np.load(pose+"/"+name)
r = loadData[0:3,0:3].flatten()
t = loadData[0:3,-1].flatten()
'cam_R_m2c': [float(e) for e in r],
'cam_t_m2c': [float(e) for e in t],
'obj_id': int(obj_id)
with open(savepath, 'w') as f:
if isinstance(scene_gt_info, dict):
content_sorted = sorted(scene_gt_info.items(), key=lambda x: x[0])
for elem_id, (k, v) in enumerate(content_sorted):
f.write(' \"{}\": {}'.format(k, json.dumps(v, sort_keys=True)))
if elem_id != len(scene_gt_info) - 1:
参考scripts/calc_gt_info.py,需要使用from bop_toolkit_lib import renderer_vispy
import os
import numpy as np
import yaml
import json
import imageio
from bop_toolkit_lib import renderer_vispy
def _camera_as_numpy(camera):
if 'cam_K' in camera.keys():
camera['cam_K'] = \
np.array(camera['cam_K'], np.float64).reshape((3, 3))
if 'cam_R_w2c' in camera.keys():
camera['cam_R_w2c'] = \
np.array(camera['cam_R_w2c'], np.float64).reshape((3, 3))
if 'cam_t_w2c' in camera.keys():
camera['cam_t_w2c'] = \
np.array(camera['cam_t_w2c'], np.float64).reshape((3, 1))
return camera
def load_json(path, keys_to_int=False):
"""Loads content of a JSON file.
:param path: Path to the JSON file.
:return: Content of the loaded JSON file.
# Keys to integers.
def convert_keys_to_int(x):
return {int(k) if k.lstrip('-').isdigit() else k: v for k, v in x.items()}
with open(path, 'r') as f:
if keys_to_int:
content = json.load(f, object_hook=lambda x: convert_keys_to_int(x))
content = json.load(f)
return content
def load_scene_camera(path):
scene_camera = load_json(path, keys_to_int=True)
for im_id in scene_camera.keys():
scene_camera[im_id] = _camera_as_numpy(scene_camera[im_id])
return scene_camera
def load_scene_gt(path):
"""Loads content of a JSON file with ground-truth annotations.
See docs/bop_datasets_format.md for details.
:param path: Path to the JSON file.
:return: Dictionary with the loaded content.
scene_gt = load_json(path, keys_to_int=True)
for im_id, im_gt in scene_gt.items():
for gt in im_gt:
if 'cam_R_m2c' in gt.keys():
gt['cam_R_m2c'] = np.array(gt['cam_R_m2c'], np.float64).reshape((3, 3))
if 'cam_t_m2c' in gt.keys():
gt['cam_t_m2c'] = np.array(gt['cam_t_m2c'], np.float64).reshape((3, 1))
return scene_gt
def depth_im_to_dist_im_fast(depth_im, K):
"""Converts a depth image to a distance image.
:param depth_im: hxw ndarray with the input depth image, where depth_im[y, x]
is the Z coordinate of the 3D point [X, Y, Z] that projects to pixel [x, y],
or 0 if there is no such 3D point (this is a typical output of the
Kinect-like sensors).
:param K: 3x3 ndarray with an intrinsic camera matrix.
:return: hxw ndarray with the distance image, where dist_im[y, x] is the
distance from the camera center to the 3D point [X, Y, Z] that projects to
pixel [x, y], or 0 if there is no such 3D point.
xs, ys = np.meshgrid(np.arange(depth_im.shape[1]), np.arange(depth_im.shape[0]))
pre_Xs = (xs - K[0, 2]) / np.float64(K[0, 0])
pre_Ys = (ys - K[1, 2]) / np.float64(K[1, 1])
dist_im = np.sqrt(
np.multiply(pre_Xs, depth_im)**2 +
np.multiply(pre_Ys, depth_im)**2 +
return dist_im
def estimate_visib_mask(d_test, d_model, delta, visib_mode='bop19'):
"""Estimates a mask of the visible object surface.
:param d_test: Distance image of a scene in which the visibility is estimated.
:param d_model: Rendered distance image of the object model.
:param delta: Tolerance used in the visibility test.
:param visib_mode: Visibility mode:
1) 'bop18' - Object is considered NOT VISIBLE at pixels with missing depth.
2) 'bop19' - Object is considered VISIBLE at pixels with missing depth. This
allows to use the VSD pose error function also on shiny objects, which
are typically not captured well by the depth sensors. A possible problem
with this mode is that some invisible parts can be considered visible.
However, the shadows of missing depth measurements, where this problem is
expected to appear and which are often present at depth discontinuities,
are typically relatively narrow and therefore this problem is less
:return: Visibility mask.
assert (d_test.shape == d_model.shape)
if visib_mode == 'bop18':
mask_valid = np.logical_and(d_test > 0, d_model > 0)
d_diff = d_model.astype(np.float32) - d_test.astype(np.float32)
visib_mask = np.logical_and(d_diff <= delta, mask_valid)
elif visib_mode == 'bop19':
d_diff = d_model.astype(np.float32) - d_test.astype(np.float32)
visib_mask = np.logical_and(
np.logical_or(d_diff <= delta, d_test == 0), d_model > 0)
raise ValueError('Unknown visibility mode.')
return visib_mask
def clip_pt_to_im(pt, im_size):
"""Clips a 2D point to the image frame.
:param pt: 2D point (x, y).
:param im_size: Image size (width, height).
:return: Clipped 2D point (x, y).
return [min(max(pt[0], 0), im_size[0] - 1),
min(max(pt[1], 0), im_size[1] - 1)]
def calc_2d_bbox(xs, ys, im_size=None, clip=False):
"""Calculates 2D bounding box of the given set of 2D points.
:param xs: 1D ndarray with x-coordinates of 2D points.
:param ys: 1D ndarray with y-coordinates of 2D points.
:param im_size: Image size (width, height) (used for optional clipping).
:param clip: Whether to clip the bounding box (default == False).
:return: 2D bounding box (x, y, w, h), where (x, y) is the top-left corner
and (w, h) is width and height of the bounding box.
bb_min = [xs.min(), ys.min()]
bb_max = [xs.max(), ys.max()]
if clip:
assert (im_size is not None)
bb_min = clip_pt_to_im(bb_min, im_size)
bb_max = clip_pt_to_im(bb_max, im_size)
return [bb_min[0], bb_min[1], bb_max[0] - bb_min[0], bb_max[1] - bb_min[1]]
obj_id = 1
datasets_path= r"LINEMOD/duck2"
depth_tpath = datasets_path+"/depth"
model_fpath = r"LINEMOD/duck2/duck2.ply"
scene_camera_tpath = r"LINEMOD/duck2/scene_camera.json"
scene_gt_tpath = r"/LINEMOD/duck2/scene_gt.json"
scene_gt_info_path = r"LINEMOD/duck2/gt.yml"
scene_ids=list(range(1, 16))
rgb_ext = '.jpg'
gray_ext = '.png'
depth_ext = '.png'
im_width, im_height = [640,480]
# The renderer has a larger canvas for generation of masks of truncated objects.
ren_width, ren_height = 3 * im_width, 3 * im_height
ren_cx_offset, ren_cy_offset = im_width, im_height
ren =renderer_vispy.RendererVispy(ren_width, ren_height, mode='depth')
ren.add_object(obj_id, model_fpath)
#for scene_id in scene_ids:
# Load scene info and ground-truth poses.
scene_camera = load_scene_camera(scene_camera_tpath)
scene_gt = load_scene_gt(scene_gt_tpath)
scene_gt_info = {}
im_ids = sorted(scene_gt.keys())
for im_counter, im_id in enumerate(im_ids):
# Load depth image.
depth_fpath =depth_tpath+"/"+str(im_id)+".png"
d = imageio.imread(depth_fpath)
depth = d.astype(np.float32)
K = scene_camera[im_id]['cam_K']
fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
im_size = (depth.shape[1], depth.shape[0])
scene_gt_info[im_id] = []
for gt_id, gt in enumerate(scene_gt[im_id]):
r = gt['cam_R_m2c'].flatten()
t = gt['cam_t_m2c'].flatten()
# Render depth image of the object model in the ground-truth pose.
depth_gt_large = ren.render_object(
gt['obj_id'], gt['cam_R_m2c'], gt['cam_t_m2c'],
fx, fy, cx + ren_cx_offset, cy + ren_cy_offset)['depth']
depth_gt = depth_gt_large[
ren_cy_offset:(ren_cy_offset + im_height),
ren_cx_offset:(ren_cx_offset + im_width)]
# Convert depth images to distance images.
dist_gt = depth_im_to_dist_im_fast(depth_gt, K)
dist_im = depth_im_to_dist_im_fast(depth, K)
# Estimation of the visibility mask.
visib_gt = estimate_visib_mask(
dist_im, dist_gt, delta, visib_mode='bop19')
# Mask of the object in the GT pose.
obj_mask_gt_large = depth_gt_large > 0
obj_mask_gt = dist_gt > 0
# Number of pixels in the visible part of the object silhouette.
px_count_visib = visib_gt.sum()
# Bounding box of the whole object silhouette
# (including the truncated part).
bbox = [-1, -1, -1, -1]
if px_count_visib > 0:
ys, xs = obj_mask_gt_large.nonzero()
ys -= ren_cy_offset
xs -= ren_cx_offset
bbox = calc_2d_bbox(xs, ys, im_size)
# Store the calculated info.
'cam_R_m2c': [float(e) for e in r],
'cam_t_m2c': [float(e) for e in t],
'bbox_obj': [int(e) for e in bbox],
'obj_id': int(obj_id)
# Save the info for the current scene.
if not os.path.exists(os.path.dirname(scene_gt_info_path)):
with open(scene_gt_info_path, 'w') as f:
yaml.dump(scene_gt_info, f)