制作步骤:
目录
1、采集图片
2、ObjectDatasetTools
2.1、安装
2.2、生成transforms.npy文件
2.3、生成点云模型
2.4、生成mask和label
3、用代码补全剩下文件
3.1、model_info.yml
3.2、info.yml
3.3 、gt.yml
严格按照linemod的格式制作数据集
linemod
--data
--01
--depth
--mask
--rgb
--gt.yml
--info.yml
--model
--models_info.yml
--obj_01.ply
--segnet_results
这块就不多说了,用深度相机采集rgb和depth图,和相机内参intrinsics.json
https://github.com/F2Wang/ObjectDatasetTools
按照installation安装就行,然后在ObjectDatasetTools目录中创建LINEMOD/duck2文件夹,这个里面放JPEGImages和depth和intrinsics.json
这个是所有图片到第一帧的转换关系,4×4
python compute_gt_poses.py LINEMOD/duck2
方法一:手动
python register_scene.py LINEMOD/duck2
这个生成的是点云,需要用meshlab手动处理,处理成三角化的文件才行,具体步骤参考:记录::6d位姿数据集-meshlab简单使用-修改单位度量-CSDN博客
方法二:自动
python register_segmented.py LINEMOD/duck2
这个自动分割,但效果不太好,最后还是用的方法一手动处理了一下
python create_label_files.py LINEMOD/duck2
可以看一下mask是否正确,若不正确就是前面的registeredScene.ply有问题
代码参考bop_toolkit: https://github.com/thodan/bop_toolkit/tree/master
参考scripts/calc_model_info.py
import yaml
import numpy as np
import struct
import math
import trimesh
def load_ply(path):
"""Loads a 3D mesh model from a PLY file.
:param path: Path to a PLY file.
:return: The loaded model given by a dictionary with items:
- 'pts' (nx3 ndarray)
- 'normals' (nx3 ndarray), optional
- 'colors' (nx3 ndarray), optional
- 'faces' (mx3 ndarray), optional
- 'texture_uv' (nx2 ndarray), optional
- 'texture_uv_face' (mx6 ndarray), optional
- 'texture_file' (string), optional
"""
f = open(path, 'rb')
# Only triangular faces are supported.
face_n_corners = 3
n_pts = 0
n_faces = 0
pt_props = []
face_props = []
is_binary = False
header_vertex_section = False
header_face_section = False
texture_file = None
# Read the header.
while True:
# Strip the newline character(s).
line = f.readline().decode('utf8').rstrip('\n').rstrip('\r')
if line.startswith('comment TextureFile'):
texture_file = line.split()[-1]
elif line.startswith('element vertex'):
n_pts = int(line.split()[-1])
header_vertex_section = True
header_face_section = False
elif line.startswith('element face'):
n_faces = int(line.split()[-1])
header_vertex_section = False
header_face_section = True
elif line.startswith('element'): # Some other element.
header_vertex_section = False
header_face_section = False
elif line.startswith('property') and header_vertex_section:
# (name of the property, data type)
pt_props.append((line.split()[-1], line.split()[-2]))
elif line.startswith('property list') and header_face_section:
elems = line.split()
if elems[-1] == 'vertex_indices' or elems[-1] == 'vertex_index':
# (name of the property, data type)
face_props.append(('n_corners', elems[2]))
for i in range(face_n_corners):
face_props.append(('ind_' + str(i), elems[3]))
elif elems[-1] == 'texcoord':
# (name of the property, data type)
face_props.append(('texcoord', elems[2]))
for i in range(face_n_corners * 2):
face_props.append(('texcoord_ind_' + str(i), elems[3]))
else:
print('Warning: Not supported face property: ' + elems[-1])
elif line.startswith('format'):
if 'binary' in line:
is_binary = True
elif line.startswith('end_header'):
break
# Prepare data structures.
model = {}
if texture_file is not None:
model['texture_file'] = texture_file
model['pts'] = np.zeros((n_pts, 3), np.float64)
if n_faces > 0:
model['faces'] = np.zeros((n_faces, face_n_corners), np.float64)
pt_props_names = [p[0] for p in pt_props]
face_props_names = [p[0] for p in face_props]
is_normal = False
if {'nx', 'ny', 'nz'}.issubset(set(pt_props_names)):
is_normal = True
model['normals'] = np.zeros((n_pts, 3), np.float64)
is_color = False
if {'red', 'green', 'blue'}.issubset(set(pt_props_names)):
is_color = True
model['colors'] = np.zeros((n_pts, 3), np.float64)
is_texture_pt = False
if {'texture_u', 'texture_v'}.issubset(set(pt_props_names)):
is_texture_pt = True
model['texture_uv'] = np.zeros((n_pts, 2), np.float64)
is_texture_face = False
if {'texcoord'}.issubset(set(face_props_names)):
is_texture_face = True
model['texture_uv_face'] = np.zeros((n_faces, 6), np.float64)
# Formats for the binary case.
formats = {
'float': ('f', 4),
'double': ('d', 8),
'int': ('i', 4),
'uchar': ('B', 1)
}
# Load vertices.
for pt_id in range(n_pts):
prop_vals = {}
load_props = ['x', 'y', 'z', 'nx', 'ny', 'nz',
'red', 'green', 'blue', 'texture_u', 'texture_v']
if is_binary:
for prop in pt_props:
format = formats[prop[1]]
read_data = f.read(format[1])
val = struct.unpack(format[0], read_data)[0]
if prop[0] in load_props:
prop_vals[prop[0]] = val
else:
elems = f.readline().decode('utf8').rstrip('\n').rstrip('\r').split()
for prop_id, prop in enumerate(pt_props):
if prop[0] in load_props:
prop_vals[prop[0]] = elems[prop_id]
model['pts'][pt_id, 0] = float(prop_vals['x'])
model['pts'][pt_id, 1] = float(prop_vals['y'])
model['pts'][pt_id, 2] = float(prop_vals['z'])
if is_normal:
model['normals'][pt_id, 0] = float(prop_vals['nx'])
model['normals'][pt_id, 1] = float(prop_vals['ny'])
model['normals'][pt_id, 2] = float(prop_vals['nz'])
if is_color:
model['colors'][pt_id, 0] = float(prop_vals['red'])
model['colors'][pt_id, 1] = float(prop_vals['green'])
model['colors'][pt_id, 2] = float(prop_vals['blue'])
if is_texture_pt:
model['texture_uv'][pt_id, 0] = float(prop_vals['texture_u'])
model['texture_uv'][pt_id, 1] = float(prop_vals['texture_v'])
# Load faces.
for face_id in range(n_faces):
prop_vals = {}
if is_binary:
for prop in face_props:
format = formats[prop[1]]
val = struct.unpack(format[0], f.read(format[1]))[0]
if prop[0] == 'n_corners':
if val != face_n_corners:
raise ValueError('Only triangular faces are supported.')
elif prop[0] == 'texcoord':
if val != face_n_corners * 2:
raise ValueError('Wrong number of UV face coordinates.')
else:
prop_vals[prop[0]] = val
else:
elems = f.readline().decode('utf8').rstrip('\n').rstrip('\r').split()
for prop_id, prop in enumerate(face_props):
if prop[0] == 'n_corners':
if int(elems[prop_id]) != face_n_corners:
raise ValueError('Only triangular faces are supported.')
elif prop[0] == 'texcoord':
if int(elems[prop_id]) != face_n_corners * 2:
raise ValueError('Wrong number of UV face coordinates.')
else:
prop_vals[prop[0]] = elems[prop_id]
model['faces'][face_id, 0] = int(prop_vals['ind_0'])
model['faces'][face_id, 1] = int(prop_vals['ind_1'])
model['faces'][face_id, 2] = int(prop_vals['ind_2'])
if is_texture_face:
for i in range(6):
model['texture_uv_face'][face_id, i] = float(
prop_vals['texcoord_ind_{}'.format(i)])
f.close()
return model
def calc_pts_diameter(pts):
"""Calculates the diameter of a set of 3D points (i.e. the maximum distance
between any two points in the set).
:param pts: nx3 ndarray with 3D points.
:return: The calculated diameter.
"""
diameter = -1.0
for pt_id in range(pts.shape[0]):
pt_dup = np.tile(np.array([pts[pt_id, :]]), [pts.shape[0] - pt_id, 1])
pts_diff = pt_dup - pts[pt_id:, :]
max_dist = math.sqrt((pts_diff * pts_diff).sum(axis=1).max())
if max_dist > diameter:
diameter = max_dist
return diameter
def distance(point_one, point_two):
return ((point_one[0] - point_two[0]) ** 2 +
(point_one[1] - point_two[1]) ** 2 + (point_one[2] - point_two[2]) ** 2) ** 0.5
def max_distance(points):
return max(distance(p1, p2) for p1, p2 in zip(points, points[1:]))
def calc_pts_diameter1(path):
mesh = trimesh.load(path)
vertices = mesh.vertices
maxD = max_distance(vertices.tolist())
return maxD
if __name__=="__main__":
obj_ids =[1]
model_lpath = r"models/obj_01.ply"
models_info_path=r"models/models_info.yml"
models_info = {}
for obj_id in obj_ids:
print('Processing model of object {}...'.format(obj_id))
model = load_ply(model_lpath)
ref_pt = model['pts'].min(axis=0).flatten()
size = model['pts'].max(axis=0) - ref_pt
print(ref_pt)
print(size)
# Calculated diameter.
diameter = calc_pts_diameter(model['pts'])
#diameter = calc_pts_diameter1(model_lpath)
print(diameter)
models_info[obj_id] = {
'min_x': float(ref_pt[0]), 'min_y': float(ref_pt[1]), 'min_z': float(ref_pt[2]),
'size_x': float(size[0]), 'size_y': float(size[1]), 'size_z': float(size[2]),
'diameter': float(diameter)
}
print(yaml.dump(models_info))
# Save the calculated info about the object models.
#inout.save_json(dp_model['models_info_path'], models_info)
with open(models_info_path, 'w') as f:
yaml.dump(models_info, f)
生成info.yml的同时生成scene_camera.json后面gt会用
import numpy as np
import os
import json
import yaml
pose = 'transforms'
savepath = 'scene_camera.json'
s2 = 'info.yml'
camepath = 'intrinsics.json'
obj_id = 1
with open(camepath, 'r') as f:
content = f.read()
loadData = json.loads(content)
data = np.zeros(9)
data[0] = float(loadData['fx'])
data[2] = float(loadData['ppx'])
data[4] = float(loadData['fy'])
data[5] = float(loadData['ppy'])
data[8] = 1.0
#data = np.loadtxt(camepath).flatten()
files = os.listdir(pose)
scene_gt_info = {}
l = len(files)
for name in files:
i = int(name.split(".")[0])#int(name.split(".")[0][4:])
# scene_gt_info[i] = []
scene_gt_info[i]=({
'cam_K': [float(e) for e in data],
'depth_scale': float(float(loadData['depth_scale'])),
})
with open(savepath, 'w') as f:
if isinstance(scene_gt_info, dict):
f.write('{\n')
content_sorted = sorted(scene_gt_info.items(), key=lambda x: x[0])
for elem_id, (k, v) in enumerate(content_sorted):
f.write(' \"{}\": {}'.format(k, json.dumps(v, sort_keys=True)))
if elem_id != len(scene_gt_info) - 1:
f.write(',')
f.write('\n')
f.write('}')
with open(s2, 'w') as f:
yaml.dump(scene_gt_info, f)
先生成scene_gt.json,后面会用
import numpy as np
import os
import json
pose = r'transforms'
savepath = r'scene_gt.json'
obj_id = 1
files = os.listdir(pose)
scene_gt_info = {}
for name in files:
i = int(name.split(".")[0])
scene_gt_info[i] = []
loadData = np.load(pose+"/"+name)
r = loadData[0:3,0:3].flatten()
t = loadData[0:3,-1].flatten()
scene_gt_info[i].append({
'cam_R_m2c': [float(e) for e in r],
'cam_t_m2c': [float(e) for e in t],
'obj_id': int(obj_id)
})
with open(savepath, 'w') as f:
if isinstance(scene_gt_info, dict):
f.write('{\n')
content_sorted = sorted(scene_gt_info.items(), key=lambda x: x[0])
for elem_id, (k, v) in enumerate(content_sorted):
f.write(' \"{}\": {}'.format(k, json.dumps(v, sort_keys=True)))
if elem_id != len(scene_gt_info) - 1:
f.write(',')
f.write('\n')
f.write('}')
参考scripts/calc_gt_info.py,需要使用from bop_toolkit_lib import renderer_vispy
import os
import numpy as np
import yaml
import json
import imageio
from bop_toolkit_lib import renderer_vispy
def _camera_as_numpy(camera):
if 'cam_K' in camera.keys():
camera['cam_K'] = \
np.array(camera['cam_K'], np.float64).reshape((3, 3))
if 'cam_R_w2c' in camera.keys():
camera['cam_R_w2c'] = \
np.array(camera['cam_R_w2c'], np.float64).reshape((3, 3))
if 'cam_t_w2c' in camera.keys():
camera['cam_t_w2c'] = \
np.array(camera['cam_t_w2c'], np.float64).reshape((3, 1))
return camera
def load_json(path, keys_to_int=False):
"""Loads content of a JSON file.
:param path: Path to the JSON file.
:return: Content of the loaded JSON file.
"""
# Keys to integers.
def convert_keys_to_int(x):
return {int(k) if k.lstrip('-').isdigit() else k: v for k, v in x.items()}
with open(path, 'r') as f:
if keys_to_int:
content = json.load(f, object_hook=lambda x: convert_keys_to_int(x))
else:
content = json.load(f)
return content
def load_scene_camera(path):
scene_camera = load_json(path, keys_to_int=True)
for im_id in scene_camera.keys():
scene_camera[im_id] = _camera_as_numpy(scene_camera[im_id])
return scene_camera
def load_scene_gt(path):
"""Loads content of a JSON file with ground-truth annotations.
See docs/bop_datasets_format.md for details.
:param path: Path to the JSON file.
:return: Dictionary with the loaded content.
"""
scene_gt = load_json(path, keys_to_int=True)
for im_id, im_gt in scene_gt.items():
for gt in im_gt:
if 'cam_R_m2c' in gt.keys():
gt['cam_R_m2c'] = np.array(gt['cam_R_m2c'], np.float64).reshape((3, 3))
if 'cam_t_m2c' in gt.keys():
gt['cam_t_m2c'] = np.array(gt['cam_t_m2c'], np.float64).reshape((3, 1))
return scene_gt
def depth_im_to_dist_im_fast(depth_im, K):
"""Converts a depth image to a distance image.
:param depth_im: hxw ndarray with the input depth image, where depth_im[y, x]
is the Z coordinate of the 3D point [X, Y, Z] that projects to pixel [x, y],
or 0 if there is no such 3D point (this is a typical output of the
Kinect-like sensors).
:param K: 3x3 ndarray with an intrinsic camera matrix.
:return: hxw ndarray with the distance image, where dist_im[y, x] is the
distance from the camera center to the 3D point [X, Y, Z] that projects to
pixel [x, y], or 0 if there is no such 3D point.
"""
xs, ys = np.meshgrid(np.arange(depth_im.shape[1]), np.arange(depth_im.shape[0]))
pre_Xs = (xs - K[0, 2]) / np.float64(K[0, 0])
pre_Ys = (ys - K[1, 2]) / np.float64(K[1, 1])
dist_im = np.sqrt(
np.multiply(pre_Xs, depth_im)**2 +
np.multiply(pre_Ys, depth_im)**2 +
depth_im.astype(np.float64)**2)
return dist_im
def estimate_visib_mask(d_test, d_model, delta, visib_mode='bop19'):
"""Estimates a mask of the visible object surface.
:param d_test: Distance image of a scene in which the visibility is estimated.
:param d_model: Rendered distance image of the object model.
:param delta: Tolerance used in the visibility test.
:param visib_mode: Visibility mode:
1) 'bop18' - Object is considered NOT VISIBLE at pixels with missing depth.
2) 'bop19' - Object is considered VISIBLE at pixels with missing depth. This
allows to use the VSD pose error function also on shiny objects, which
are typically not captured well by the depth sensors. A possible problem
with this mode is that some invisible parts can be considered visible.
However, the shadows of missing depth measurements, where this problem is
expected to appear and which are often present at depth discontinuities,
are typically relatively narrow and therefore this problem is less
significant.
:return: Visibility mask.
"""
assert (d_test.shape == d_model.shape)
if visib_mode == 'bop18':
mask_valid = np.logical_and(d_test > 0, d_model > 0)
d_diff = d_model.astype(np.float32) - d_test.astype(np.float32)
visib_mask = np.logical_and(d_diff <= delta, mask_valid)
elif visib_mode == 'bop19':
d_diff = d_model.astype(np.float32) - d_test.astype(np.float32)
visib_mask = np.logical_and(
np.logical_or(d_diff <= delta, d_test == 0), d_model > 0)
else:
raise ValueError('Unknown visibility mode.')
return visib_mask
def clip_pt_to_im(pt, im_size):
"""Clips a 2D point to the image frame.
:param pt: 2D point (x, y).
:param im_size: Image size (width, height).
:return: Clipped 2D point (x, y).
"""
return [min(max(pt[0], 0), im_size[0] - 1),
min(max(pt[1], 0), im_size[1] - 1)]
def calc_2d_bbox(xs, ys, im_size=None, clip=False):
"""Calculates 2D bounding box of the given set of 2D points.
:param xs: 1D ndarray with x-coordinates of 2D points.
:param ys: 1D ndarray with y-coordinates of 2D points.
:param im_size: Image size (width, height) (used for optional clipping).
:param clip: Whether to clip the bounding box (default == False).
:return: 2D bounding box (x, y, w, h), where (x, y) is the top-left corner
and (w, h) is width and height of the bounding box.
"""
bb_min = [xs.min(), ys.min()]
bb_max = [xs.max(), ys.max()]
if clip:
assert (im_size is not None)
bb_min = clip_pt_to_im(bb_min, im_size)
bb_max = clip_pt_to_im(bb_max, im_size)
return [bb_min[0], bb_min[1], bb_max[0] - bb_min[0], bb_max[1] - bb_min[1]]
# PARAMETERS.
obj_id = 1
datasets_path= r"LINEMOD/duck2"
depth_tpath = datasets_path+"/depth"
model_fpath = r"LINEMOD/duck2/duck2.ply"
renderer_type='vispy'
scene_camera_tpath = r"LINEMOD/duck2/scene_camera.json"
scene_gt_tpath = r"/LINEMOD/duck2/scene_gt.json"
scene_gt_info_path = r"LINEMOD/duck2/gt.yml"
delta=1
scene_ids=list(range(1, 16))
rgb_ext = '.jpg'
gray_ext = '.png'
depth_ext = '.png'
im_width, im_height = [640,480]
# The renderer has a larger canvas for generation of masks of truncated objects.
ren_width, ren_height = 3 * im_width, 3 * im_height
ren_cx_offset, ren_cy_offset = im_width, im_height
ren =renderer_vispy.RendererVispy(ren_width, ren_height, mode='depth')
ren.add_object(obj_id, model_fpath)
#for scene_id in scene_ids:
# Load scene info and ground-truth poses.
scene_camera = load_scene_camera(scene_camera_tpath)
scene_gt = load_scene_gt(scene_gt_tpath)
scene_gt_info = {}
im_ids = sorted(scene_gt.keys())
for im_counter, im_id in enumerate(im_ids):
# Load depth image.
depth_fpath =depth_tpath+"/"+str(im_id)+".png"
d = imageio.imread(depth_fpath)
depth = d.astype(np.float32)
K = scene_camera[im_id]['cam_K']
fx, fy, cx, cy = K[0, 0], K[1, 1], K[0, 2], K[1, 2]
im_size = (depth.shape[1], depth.shape[0])
scene_gt_info[im_id] = []
for gt_id, gt in enumerate(scene_gt[im_id]):
r = gt['cam_R_m2c'].flatten()
t = gt['cam_t_m2c'].flatten()
# Render depth image of the object model in the ground-truth pose.
depth_gt_large = ren.render_object(
gt['obj_id'], gt['cam_R_m2c'], gt['cam_t_m2c'],
fx, fy, cx + ren_cx_offset, cy + ren_cy_offset)['depth']
depth_gt = depth_gt_large[
ren_cy_offset:(ren_cy_offset + im_height),
ren_cx_offset:(ren_cx_offset + im_width)]
# Convert depth images to distance images.
dist_gt = depth_im_to_dist_im_fast(depth_gt, K)
dist_im = depth_im_to_dist_im_fast(depth, K)
# Estimation of the visibility mask.
visib_gt = estimate_visib_mask(
dist_im, dist_gt, delta, visib_mode='bop19')
# Mask of the object in the GT pose.
obj_mask_gt_large = depth_gt_large > 0
obj_mask_gt = dist_gt > 0
# Number of pixels in the visible part of the object silhouette.
px_count_visib = visib_gt.sum()
# Bounding box of the whole object silhouette
# (including the truncated part).
bbox = [-1, -1, -1, -1]
if px_count_visib > 0:
ys, xs = obj_mask_gt_large.nonzero()
ys -= ren_cy_offset
xs -= ren_cx_offset
bbox = calc_2d_bbox(xs, ys, im_size)
# Store the calculated info.
scene_gt_info[im_id].append({
'cam_R_m2c': [float(e) for e in r],
'cam_t_m2c': [float(e) for e in t],
'bbox_obj': [int(e) for e in bbox],
'obj_id': int(obj_id)
})
# Save the info for the current scene.
if not os.path.exists(os.path.dirname(scene_gt_info_path)):
os.makedirs(os.path.dirname(scene_gt_info_path))
with open(scene_gt_info_path, 'w') as f:
yaml.dump(scene_gt_info, f)
我发现我好像整复杂了,其实未知的信息只有bbox_obj,这个我感觉就是矩形框,不过这个是从depth里面算的,那objectDatasetTools里面就有get_BBs.py,是从mask里面的计算的,我觉得计算两个应该是一样的,然后其他的就是已经知道的,拼一起就行。