Paper Link: Compressing Volumetric Radiance Fields to 1 MB
Code Link: VQRF
python run_final.py --config ./configs/debug/debug_ship.py --eval_ssim --eval_lpips_vgg --eval_lpips_alex --render_test --fully_vq --render_fine --importance_prune 0.999 --importance_include 0.6
# load setup
parser = config_parser()
args = parser.parse_args()
cfg = mmcv.Config.fromfile(args.config)
# init enviroment
if torch.cuda.is_available():
torch.set_default_tensor_type('torch.cuda.FloatTensor')
device = torch.device('cuda')
else:
device = torch.device('cpu')
seed_everything()
# load images / poses / camera settings / data split
data_dict = load_everything(args=args, cfg=cfg)
# export scene bbox and camera poses in 3d for debugging and visualization
if args.export_bbox_and_cams_only:
print('Export bbox and cameras...')
xyz_min, xyz_max = compute_bbox_by_cam_frustrm(args=args, cfg=cfg, **data_dict)
poses, HW, Ks, i_train = data_dict['poses'], data_dict['HW'], data_dict['Ks'], data_dict['i_train']
near, far = data_dict['near'], data_dict['far']
if data_dict['near_clip'] is not None:
near = data_dict['near_clip']
cam_lst = []
for c2w, (H, W), K in zip(poses[i_train], HW[i_train], Ks[i_train]):
rays_o, rays_d, viewdirs = dvgo.get_rays_of_a_view(
H, W, K, c2w, cfg.data.ndc, inverse_y=cfg.data.inverse_y,
flip_x=cfg.data.flip_x, flip_y=cfg.data.flip_y,)
cam_o = rays_o[0,0].cpu().numpy()
cam_d = rays_d[[0,0,-1,-1],[0,-1,0,-1]].cpu().numpy()
cam_lst.append(np.array([cam_o, *(cam_o+cam_d*max(near, far*0.05))]))
np.savez_compressed(args.export_bbox_and_cams_only,
xyz_min=xyz_min.cpu().numpy(), xyz_max=xyz_max.cpu().numpy(),
cam_lst=np.array(cam_lst))
print('done')
sys.exit()
if args.export_coarse_only:
print('Export coarse visualization...')
with torch.no_grad():
ckpt_path = os.path.join(cfg.basedir, cfg.expname, 'coarse_last.tar')
model = utils.load_model(dvgo.DirectVoxGO, ckpt_path).to(device)
alpha = model.activate_density(model.density.get_dense_grid()).squeeze().cpu().numpy()
rgb = torch.sigmoid(model.k0.get_dense_grid()).squeeze().permute(1,2,3,0).cpu().numpy()
np.savez_compressed(args.export_coarse_only, alpha=alpha, rgb=rgb)
print('done')
sys.exit()
# train
if not args.render_only:
train(args, cfg, data_dict)
Loaded blender (400, 800, 800, 4) torch.Size([160, 4, 4]) [800, 800, 1111.1110311937682] /MULT/datasets/nerf-data/nerf_synthetic/ship
train: start
def train(args, cfg, data_dict):
# init
print('train: start')
eps_time = time.time()
os.makedirs(os.path.join(cfg.basedir, cfg.expname), exist_ok=True)
with open(os.path.join(cfg.basedir, cfg.expname, 'args.txt'), 'w') as file:
for arg in sorted(vars(args)):
attr = getattr(args, arg)
file.write('{} = {}\n'.format(arg, attr))
cfg.dump(os.path.join(cfg.basedir, cfg.expname, 'config.py'))
# coarse geometry searching (only works for inward bounded scenes)
# ....
# fine detail reconstruction
# ....
# vq finetune
# ....
eps_time = time.time() - eps_time
eps_time_str = f'{eps_time//3600:02.0f}:{eps_time//60%60:02.0f}:{eps_time%60:02.0f}'
print('train: finish (eps time', eps_time_str, ')')
# coarse geometry searching (only works for inward bounded scenes)
eps_coarse = time.time()
xyz_min_coarse, xyz_max_coarse = compute_bbox_by_cam_frustrm(args=args, cfg=cfg, **data_dict)
if not os.path.exists(os.path.join(cfg.basedir, cfg.expname, f'corse_last.tar')):
if cfg.coarse_train.N_iters > 0:
scene_rep_reconstruction(
args=args, cfg=cfg,
cfg_model=cfg.coarse_model_and_render, cfg_train=cfg.coarse_train,
xyz_min=xyz_min_coarse, xyz_max=xyz_max_coarse,
data_dict=data_dict, stage='coarse')
eps_coarse = time.time() - eps_coarse
eps_time_str = f'{eps_coarse//3600:02.0f}:{eps_coarse//60%60:02.0f}:{eps_coarse%60:02.0f}'
print('train: coarse geometry searching in', eps_time_str)
coarse_ckpt_path = os.path.join(cfg.basedir, cfg.expname, f'coarse_last.tar')
else:
print('train: skip coarse geometry searching')
coarse_ckpt_path = None
compute_bbox_by_cam_frustrm: start
compute_bbox_by_cam_frustrm: xyz_min tensor([-3.0168, -3.0026, -2.5941])
compute_bbox_by_cam_frustrm: xyz_max tensor([3.0162, 3.0171, 2.3367])
compute_bbox_by_cam_frustrm: finish
scene_rep_reconstruction (coarse): train from scratch
scene_rep_reconstruction (coarse): use dense voxel grid
dvgo: set density bias shift to tensor([-13.8155], device=‘cpu’)
dvgo: voxel_size tensor(0.0559)
dvgo: world_size tensor([107, 107, 88])
dvgo: voxel_size_base tensor(0.0559)
dvgo: voxel_size_ratio tensor(1.)
initialization finished
create_optimizer_or_freeze_model: param density lr 0.1
create_optimizer_or_freeze_model: param k0 lr 0.1
create_optimizer_or_freeze_model: param rgbnet not exist
get_training_rays: start
get_training_rays: finish (eps time: 0.03049182891845703 sec)
dvgo: voxel_count_views start
dvgo: voxel_count_views finish (eps time: 9.162418603897095 sec)
scene_rep_reconstruction (coarse): iter 500 / Loss: 0.222001418 / PSNR: 5.29 / Eps: 00:00:07
scene_rep_reconstruction (coarse): iter 1000 / Loss: 0.008908743 / PSNR: 19.80 / Eps: 00:00:15
scene_rep_reconstruction (coarse): iter 1500 / Loss: 0.007740345 / PSNR: 22.33 / Eps: 00:00:20
scene_rep_reconstruction (coarse): iter 2000 / Loss: 0.007822300 / PSNR: 22.66 / Eps: 00:00:27
scene_rep_reconstruction (coarse): iter 2500 / Loss: 0.007116472 / PSNR: 22.82 / Eps: 00:00:34
scene_rep_reconstruction (coarse): iter 3000 / Loss: 0.007190717 / PSNR: 22.88 / Eps: 00:00:41
scene_rep_reconstruction (coarse): iter 3500 / Loss: 0.006909440 / PSNR: 22.94 / Eps: 00:00:48
scene_rep_reconstruction (coarse): iter 4000 / Loss: 0.007247489 / PSNR: 22.99 / Eps: 00:00:54
scene_rep_reconstruction (coarse): iter 4500 / Loss: 0.006814179 / PSNR: 23.03 / Eps: 00:01:01
scene_rep_reconstruction (coarse): iter 5000 / Loss: 0.006830918 / PSNR: 23.06 / Eps: 00:01:08
100%|████████████████████████████████████████| 5000/5000 [01:07<00:00, 73.73it/s]
scene_rep_reconstruction (coarse): saved checkpoints at ./logs/debug/debug_ship/coarse_last.tar
train: coarse geometry searching in 00:01:18
# fine detail reconstruction
eps_fine = time.time()
if cfg.coarse_train.N_iters == 0:
xyz_min_fine, xyz_max_fine = xyz_min_coarse.clone(), xyz_max_coarse.clone()
else:
xyz_min_fine, xyz_max_fine = compute_bbox_by_coarse_geo(
model_class=dvgo.DirectVoxGO, model_path=coarse_ckpt_path,
thres=cfg.fine_model_and_render.bbox_thres)
if not os.path.exists(os.path.join(cfg.basedir, cfg.expname, f'fine_last.tar')):
scene_rep_reconstruction(
args=args, cfg=cfg,
cfg_model=cfg.fine_model_and_render, cfg_train=cfg.fine_train,
xyz_min=xyz_min_fine, xyz_max=xyz_max_fine,
data_dict=data_dict, stage='fine',
coarse_ckpt_path=coarse_ckpt_path)
eps_fine = time.time() - eps_fine
eps_time_str = f'{eps_fine//3600:02.0f}:{eps_fine//60%60:02.0f}:{eps_fine%60:02.0f}'
print('train: fine detail reconstruction in', eps_time_str)
compute_bbox_by_coarse_geo: start
dvgo: set density bias shift to tensor([-13.8155], device=‘cpu’)
dvgo: voxel_size tensor(0.0559)
dvgo: world_size tensor([107, 107, 88])
dvgo: voxel_size_base tensor(0.0559)
dvgo: voxel_size_ratio tensor(1.)
initialization finished
compute_bbox_by_coarse_geo: xyz_min tensor([-1.3663, -1.2989, -0.7238])
compute_bbox_by_coarse_geo: xyz_max tensor([1.3657, 1.3134, 0.6931])
compute_bbox_by_coarse_geo: finish (eps time: 0.03095865249633789 secs)
scene_rep_reconstruction (fine): train from scratch
scene_rep_reconstruction (fine): use dense voxel grid
dvgo: set density bias shift to tensor([-4.5951], device=‘cpu’)
dvgo: voxel_size tensor(0.0358)
dvgo: world_size tensor([80, 76, 41])
dvgo: voxel_size_base tensor(0.0142)
dvgo: voxel_size_ratio tensor(2.5198)
dvgo: feature voxel grid DenseGrid(
channels=12, world_size=[80, 76, 41]
(trilinear_interpolation): TrilinearIntepolation()
)
dvgo: mlp Sequential(
(0): Linear(in_features=39, out_features=128, bias=True)
(1): ReLU(inplace=True)
(2): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): ReLU(inplace=True)
)
(3): Linear(in_features=128, out_features=3, bias=True)
)
initialization finished
create_optimizer_or_freeze_model: param density lr 0.1
create_optimizer_or_freeze_model: param k0 lr 0.1
create_optimizer_or_freeze_model: param rgbnet lr 0.001
get_training_rays_in_maskcache_sampling: start
get_training_rays_in_maskcache_sampling: ratio tensor(0.6117)
get_training_rays_in_maskcache_sampling: finish (eps time: 4.257694721221924 sec)
scene_rep_reconstruction (fine): iter 500 / Loss: 0.005494572 / PSNR: 20.61 / Eps: 00:00:11
5%|████████▊ | 999/20000 [00:21<05:51, 54.12it/s]dvgo: scale_volume_grid start
dvgo: voxel_size tensor(0.0284)
dvgo: world_size tensor([101, 96, 52])
dvgo: voxel_size_base tensor(0.0142)
dvgo: voxel_size_ratio tensor(2.)
dvgo: scale_volume_grid scale world_size from [80, 76, 41] to [101, 96, 52]
dvgo: scale_volume_grid finish
create_optimizer_or_freeze_model: param density lr 0.1
create_optimizer_or_freeze_model: param k0 lr 0.1
create_optimizer_or_freeze_model: param rgbnet lr 0.001
scene_rep_reconstruction (fine): iter 1000 / Loss: 0.010388801 / PSNR: 23.40 / Eps: 00:00:22
scene_rep_reconstruction (fine): iter 1500 / Loss: 0.003825413 / PSNR: 23.80 / Eps: 00:00:43
10%|█████████████████▎ | 1992/20000 [00:56<02:30, 119.98it/s]dvgo: scale_volume_grid start
dvgo: voxel_size tensor(0.0225)
dvgo: world_size tensor([127, 121, 66])
dvgo: voxel_size_base tensor(0.0142)
dvgo: voxel_size_ratio tensor(1.5874)
dvgo: scale_volume_grid scale world_size from [101, 96, 52] to [127, 121, 66]
dvgo: scale_volume_grid finish
create_optimizer_or_freeze_model: param density lr 0.1
create_optimizer_or_freeze_model: param k0 lr 0.1
create_optimizer_or_freeze_model: param rgbnet lr 0.001
scene_rep_reconstruction (fine): iter 2000 / Loss: 0.008117034 / PSNR: 24.49 / Eps: 00:00:56
scene_rep_reconstruction (fine): iter 2500 / Loss: 0.003130975 / PSNR: 24.65 / Eps: 00:01:10
15%|██████████████████████████▏ | 2999/20000 [01:24<08:18, 34.09it/s]dvgo: scale_volume_grid start
dvgo: voxel_size tensor(0.0179)
dvgo: world_size tensor([160, 153, 83])
dvgo: voxel_size_base tensor(0.0142)
dvgo: voxel_size_ratio tensor(1.2599)
dvgo: scale_volume_grid scale world_size from [127, 121, 66] to [160, 153, 83]
dvgo: scale_volume_grid finish
create_optimizer_or_freeze_model: param density lr 0.1
create_optimizer_or_freeze_model: param k0 lr 0.1
create_optimizer_or_freeze_model: param rgbnet lr 0.001
scene_rep_reconstruction (fine): iter 3000 / Loss: 0.006550554 / PSNR: 25.32 / Eps: 00:01:25
scene_rep_reconstruction (fine): iter 3500 / Loss: 0.002785042 / PSNR: 25.26 / Eps: 00:01:41
20%|██████████████████████████████████▉ | 3998/20000 [01:56<07:52, 33.87it/s]dvgo: scale_volume_grid start
dvgo: voxel_size tensor(0.0142)
dvgo: world_size tensor([202, 193, 104])
dvgo: voxel_size_base tensor(0.0142)
dvgo: voxel_size_ratio tensor(1.)
dvgo: scale_volume_grid scale world_size from [160, 153, 83] to [202, 193, 104]
dvgo: scale_volume_grid finish
create_optimizer_or_freeze_model: param density lr 0.1
create_optimizer_or_freeze_model: param k0 lr 0.1
create_optimizer_or_freeze_model: param rgbnet lr 0.001
scene_rep_reconstruction (fine): iter 4000 / Loss: 0.005459503 / PSNR: 25.94 / Eps: 00:01:57
scene_rep_reconstruction (fine): iter 4500 / Loss: 0.002446621 / PSNR: 25.89 / Eps: 00:02:14
scene_rep_reconstruction (fine): iter 5000 / Loss: 0.002263900 / PSNR: 26.69 / Eps: 00:02:32
scene_rep_reconstruction (fine): iter 5500 / Loss: 0.002342039 / PSNR: 26.97 / Eps: 00:02:48
scene_rep_reconstruction (fine): iter 6000 / Loss: 0.002092283 / PSNR: 27.17 / Eps: 00:03:05
scene_rep_reconstruction (fine): iter 6500 / Loss: 0.002112625 / PSNR: 27.32 / Eps: 00:03:21
scene_rep_reconstruction (fine): iter 7000 / Loss: 0.001854514 / PSNR: 27.46 / Eps: 00:03:37
scene_rep_reconstruction (fine): iter 7500 / Loss: 0.001792918 / PSNR: 27.59 / Eps: 00:03:49
scene_rep_reconstruction (fine): iter 8000 / Loss: 0.001842595 / PSNR: 27.66 / Eps: 00:04:05
scene_rep_reconstruction (fine): iter 8500 / Loss: 0.001811145 / PSNR: 27.78 / Eps: 00:04:21
scene_rep_reconstruction (fine): iter 9000 / Loss: 0.001662094 / PSNR: 27.85 / Eps: 00:04:37
scene_rep_reconstruction (fine): iter 9500 / Loss: 0.001807427 / PSNR: 27.90 / Eps: 00:04:53
scene_rep_reconstruction (fine): iter 10000 / Loss: 0.001646313 / PSNR: 28.18 / Eps: 00:05:11
scene_rep_reconstruction (fine): iter 10500 / Loss: 0.001576317 / PSNR: 28.19 / Eps: 00:05:27
scene_rep_reconstruction (fine): iter 11000 / Loss: 0.001669034 / PSNR: 28.23 / Eps: 00:05:42
scene_rep_reconstruction (fine): iter 11500 / Loss: 0.001828440 / PSNR: 28.25 / Eps: 00:05:58
scene_rep_reconstruction (fine): iter 12000 / Loss: 0.001539308 / PSNR: 28.29 / Eps: 00:06:14
scene_rep_reconstruction (fine): iter 12500 / Loss: 0.001411486 / PSNR: 28.33 / Eps: 00:06:26
scene_rep_reconstruction (fine): iter 13000 / Loss: 0.001546775 / PSNR: 28.38 / Eps: 00:06:42
scene_rep_reconstruction (fine): iter 13500 / Loss: 0.001718764 / PSNR: 28.40 / Eps: 00:06:58
scene_rep_reconstruction (fine): iter 14000 / Loss: 0.001626032 / PSNR: 28.43 / Eps: 00:07:14
scene_rep_reconstruction (fine): iter 14500 / Loss: 0.001404640 / PSNR: 28.54 / Eps: 00:07:21
scene_rep_reconstruction (fine): iter 15000 / Loss: 0.001561022 / PSNR: 28.67 / Eps: 00:07:27
scene_rep_reconstruction (fine): iter 15500 / Loss: 0.001633905 / PSNR: 28.67 / Eps: 00:07:32
scene_rep_reconstruction (fine): iter 16000 / Loss: 0.001533279 / PSNR: 28.68 / Eps: 00:07:39
scene_rep_reconstruction (fine): iter 16500 / Loss: 0.001540987 / PSNR: 28.66 / Eps: 00:07:45
scene_rep_reconstruction (fine): iter 17000 / Loss: 0.001619225 / PSNR: 28.68 / Eps: 00:07:52
scene_rep_reconstruction (fine): iter 17500 / Loss: 0.001415056 / PSNR: 28.72 / Eps: 00:07:58
scene_rep_reconstruction (fine): iter 18000 / Loss: 0.001404170 / PSNR: 28.74 / Eps: 00:08:04
scene_rep_reconstruction (fine): iter 18500 / Loss: 0.001701137 / PSNR: 28.75 / Eps: 00:08:11
scene_rep_reconstruction (fine): iter 19000 / Loss: 0.001484122 / PSNR: 28.75 / Eps: 00:08:17
scene_rep_reconstruction (fine): iter 19500 / Loss: 0.001484496 / PSNR: 28.90 / Eps: 00:08:26
scene_rep_reconstruction (fine): iter 20000 / Loss: 0.001495765 / PSNR: 28.94 / Eps: 00:08:32
100%|█████████████████████████████████████████| 20000/20000 [08:32<00:00, 39.06it/s]
scene_rep_reconstruction (fine): saved checkpoints at ./logs/debug/debug_ship/fine_last.tar
train: fine detail reconstruction in 00:08:38
vq_finetune(
args=args, cfg=cfg,
cfg_model=cfg.vq_model_and_render, cfg_train=cfg.vq_train,
xyz_min=xyz_min_fine, xyz_max=xyz_max_fine,
data_dict=data_dict, stage='vq',
load_ckpt_path=os.path.join(cfg.basedir, cfg.expname, f'fine_last.tar'))
eps_fine = time.time() - eps_fine
eps_time_str = f'{eps_fine//3600:02.0f}:{eps_fine//60%60:02.0f}:{eps_fine%60:02.0f}'
print('train: fine VQ finetune reconstruction in', eps_time_str)
eps_time = time.time() - eps_time
eps_time_str = f'{eps_time//3600:02.0f}:{eps_time//60%60:02.0f}:{eps_time%60:02.0f}'
print('train: finish (eps time', eps_time_str, ')')
scene_rep_reconstruction (vq fintune): reload from ./logs/debug/debug_ship/fine_last.tar
dvgo: set density bias shift to tensor([-4.5951], device=‘cpu’)
dvgo: voxel_size tensor(0.0142)
dvgo: world_size tensor([202, 193, 104])
dvgo: voxel_size_base tensor(0.0142)
dvgo: voxel_size_ratio tensor(1.)
dvgo: feature voxel grid DenseGrid(
channels=12, world_size=[202, 193, 104]
(trilinear_interpolation): TrilinearIntepolation()
)
dvgo: mlp Sequential(
(0): Linear(in_features=39, out_features=128, bias=True)
(1): ReLU(inplace=True)
(2): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): ReLU(inplace=True)
)
(3): Linear(in_features=128, out_features=3, bias=True)
)
{‘voxel_size_ratio’: tensor(1.), ‘density_factor’: 1, ‘mpi_depth’: 128, ‘nearest’: False, ‘pre_act_density’: False, ‘in_act_density’: False, ‘bbox_thres’: 0.001, ‘maskout_near_cam_vox’: False, ‘world_bound_scale’: 1.05, ‘stepsize’: 0.5, ‘use_vq’: True, ‘use_cosine_sim’: False, ‘codebook_size’: 4096}
initialization finished
create_optimizer_or_freeze_model: param density lr 0.1
create_optimizer_or_freeze_model: param k0 lr 0.1
create_optimizer_or_freeze_model: param rgbnet lr 0.001
get_training_rays_in_maskcache_sampling: start
get_training_rays_in_maskcache_sampling: ratio tensor(0.5469)
get_training_rays_in_maskcache_sampling: finish (eps time: 7.172531843185425 sec)
load exsited importance calculation
start cdf three split
15.08% of most important points contribute over 99.90% importance
2.13% of most important points contribute over 60.00% importance
100%|██████████████████████████████████████████| 1000/1000 [00:24<00:00, 41.00it/s]
start fully vector quantize
caculate vq features
start fully vector quantize
100%|██████████████████████████████████████████| 495/495 [00:01<00:00, 383.86it/s]
start cdf three split
start cdf three split
15.08% of most important points contribute over 99.90% importance
2.13% of most important points contribute over 60.00% importance
finish fully vector quantize
create_optimizer_or_freeze_model: param density lr 0.1
create_optimizer_or_freeze_model: param k0 lr 0.1
create_optimizer_or_freeze_model: param rgbnet lr 0.001
scene_rep_reconstruction (vq): iter 500 / Loss: 0.001846973 / PSNR: 27.54 / Eps: 00:00:44
scene_rep_reconstruction (vq): iter 1000 / Loss: 0.001876028 / PSNR: 27.58 / Eps: 00:00:59
scene_rep_reconstruction (vq): iter 1500 / Loss: 0.001770007 / PSNR: 27.62 / Eps: 00:01:15
scene_rep_reconstruction (vq): iter 2000 / Loss: 0.001691894 / PSNR: 27.67 / Eps: 00:01:30
scene_rep_reconstruction (vq): iter 2500 / Loss: 0.001821004 / PSNR: 27.73 / Eps: 00:01:45
scene_rep_reconstruction (vq): iter 3000 / Loss: 0.001950841 / PSNR: 27.76 / Eps: 00:02:01
scene_rep_reconstruction (vq): iter 3500 / Loss: 0.001997649 / PSNR: 27.78 / Eps: 00:02:16
scene_rep_reconstruction (vq): iter 4000 / Loss: 0.001961108 / PSNR: 27.83 / Eps: 00:02:32
scene_rep_reconstruction (vq): iter 4500 / Loss: 0.001623710 / PSNR: 27.96 / Eps: 00:02:50
scene_rep_reconstruction (vq): iter 5000 / Loss: 0.001759083 / PSNR: 28.02 / Eps: 00:03:05
scene_rep_reconstruction (vq): iter 5500 / Loss: 0.001650459 / PSNR: 28.03 / Eps: 00:03:20
scene_rep_reconstruction (vq): iter 6000 / Loss: 0.001752717 / PSNR: 28.03 / Eps: 00:03:35
scene_rep_reconstruction (vq): iter 6500 / Loss: 0.001810986 / PSNR: 28.04 / Eps: 00:03:51
scene_rep_reconstruction (vq): iter 7000 / Loss: 0.001690005 / PSNR: 28.06 / Eps: 00:04:07
scene_rep_reconstruction (vq): iter 7500 / Loss: 0.001634079 / PSNR: 28.11 / Eps: 00:04:23
scene_rep_reconstruction (vq): iter 8000 / Loss: 0.001607755 / PSNR: 28.09 / Eps: 00:04:38
scene_rep_reconstruction (vq): iter 8500 / Loss: 0.001773191 / PSNR: 28.11 / Eps: 00:04:53
scene_rep_reconstruction (vq): iter 9000 / Loss: 0.001653836 / PSNR: 28.25 / Eps: 00:05:10
scene_rep_reconstruction (vq): iter 9500 / Loss: 0.001498284 / PSNR: 28.24 / Eps: 00:05:25
100%|███████████████████████████████████████████████| 9999/9999 [05:14<00:00, 31.77it/s]
scene_rep_reconstruction (vq): saved checkpoints at ./logs/debug/debug_ship/vq_last.tar
train: fine VQ finetune reconstruction in 00:05:50
train: finish (eps time 00:06:00 )
# load fine model for rendering
if args.render_test or args.render_train or args.render_video:
ckpt_path_fine = os.path.join(cfg.basedir, cfg.expname, 'fine_last.tar')
ckpt_path_vq = os.path.join(cfg.basedir, cfg.expname, 'vq_last.tar')
if cfg.data.ndc:
model_class = dmpigo.DirectMPIGO
elif cfg.data.unbounded_inward:
model_class = dcvgo.DirectContractedVoxGO
else:
model_class = dvgo.DirectVoxGO
model_fine = utils.load_model(model_class, ckpt_path_fine).to(device)
model_vq = utils.load_model(model_class, ckpt_path_vq).to(device)
stepsize = cfg.fine_model_and_render.stepsize
fine_render_viewpoints_kwargs = {
'model': model_fine,
'ndc': cfg.data.ndc,
'render_kwargs': {
'near': data_dict['near'],
'far': data_dict['far'],
'bg': 1 if cfg.data.white_bkgd else 0,
'stepsize': stepsize,
'inverse_y': cfg.data.inverse_y,
'flip_x': cfg.data.flip_x,
'flip_y': cfg.data.flip_y,
'render_depth': True,
},
}
vq_render_viewpoints_kwargs = {
'model': model_vq,
'ndc': cfg.data.ndc,
'render_kwargs': {
'near': data_dict['near'],
'far': data_dict['far'],
'bg': 1 if cfg.data.white_bkgd else 0,
'stepsize': stepsize,
'inverse_y': cfg.data.inverse_y,
'flip_x': cfg.data.flip_x,
'flip_y': cfg.data.flip_y,
'render_depth': True,
},
}
model_fine.eval()
saving_dict = {'k0':model_fine.k0.grid,
'density':model_fine.density.grid,
'rgbnet':model_fine.rgbnet
}
saving_path = os.path.join(os.path.dirname(ckpt_path_fine),'fine_model.pth')
torch.save(saving_dict, saving_path)
os.system(f"zip {saving_path+'.zip'} {saving_path}")
model_vq.eval()
model_vq.importance = torch.load( os.path.join(cfg.basedir, cfg.expname, 'importance.pth'))
model_vq.fully_vq_reformat(args.importance_prune, args.importance_include,
save_path=os.path.join(cfg.basedir, cfg.expname))
model_vq.mask_cache.mask[:] = True
model_vq.update_occupancy_cache()
dvgo: set density bias shift to tensor([-4.5951], device=‘cpu’)
dvgo: voxel_size tensor(0.0142)
dvgo: world_size tensor([202, 193, 104])
dvgo: voxel_size_base tensor(0.0142)
dvgo: voxel_size_ratio tensor(1.)
dvgo: feature voxel grid DenseGrid(
channels=12, world_size=[202, 193, 104]
(trilinear_interpolation): TrilinearIntepolation()
)
dvgo: mlp Sequential(
(0): Linear(in_features=39, out_features=128, bias=True)
(1): ReLU(inplace=True)
(2): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): ReLU(inplace=True)
)
(3): Linear(in_features=128, out_features=3, bias=True)
)
initialization finished
dvgo: set density bias shift to tensor([-4.5951], device=‘cpu’)
dvgo: voxel_size tensor(0.0142)
dvgo: world_size tensor([202, 193, 104])
dvgo: voxel_size_base tensor(0.0142)
dvgo: voxel_size_ratio tensor(1.)
dvgo: feature voxel grid DenseGrid(
channels=12, world_size=[202, 193, 104]
(trilinear_interpolation): TrilinearIntepolation()
)
dvgo: mlp Sequential(
(0): Linear(in_features=39, out_features=128, bias=True)
(1): ReLU(inplace=True)
(2): Sequential(
(0): Linear(in_features=128, out_features=128, bias=True)
(1): ReLU(inplace=True)
)
(3): Linear(in_features=128, out_features=3, bias=True)
)
{‘voxel_size_ratio’: tensor(1.), ‘density_factor’: 1, ‘codebook_size’: 4096, ‘use_cosine_sim’: False, ‘use_vq’: True}
initialization finished
/bin/bash: /opt/conda/lib/libtinfo.so.6: no version information available (required by /bin/bash)
updating: logs/debug/debug_ship/fine_model.pth (deflated 49%)
start fully vector quantize
caculate vq features
start fully vector quantize
100%|████████████████████████████████████████| 495/495 [00:01<00:00, 429.03it/s]
start cdf three split
start cdf three split
15.08% of most important points contribute over 99.90% importance
2.13% of most important points contribute over 60.00% importance
/bin/bash: /opt/conda/lib/libtinfo.so.6: no version information available (required by /bin/bash)
updating: logs/debug/debug_ship/extreme_saving/ (stored 0%)
updating: logs/debug/debug_ship/extreme_saving/codebook.npz (deflated 0%)
updating: logs/debug/debug_ship/extreme_saving/vq_indexs.npz (deflated 0%)
updating: logs/debug/debug_ship/extreme_saving/non_vq_grid.npz (deflated 0%)
updating: logs/debug/debug_ship/extreme_saving/metadata.npz (stored 0%)
updating: logs/debug/debug_ship/extreme_saving/non_prune_density.npz (deflated 0%)
updating: logs/debug/debug_ship/extreme_saving/rgbnet.npz (deflated 0%)
updating: logs/debug/debug_ship/extreme_saving/keep_mask.npz (deflated 0%)
updating: logs/debug/debug_ship/extreme_saving/non_prune_mask.npz (deflated 1%)
finish fully vector quantize
# render trainset and eval
if args.render_train:
if args.render_fine:
testsavedir = os.path.join(cfg.basedir, cfg.expname, f'render_train_fine_last')
os.makedirs(testsavedir, exist_ok=True)
print('All results are dumped into', testsavedir)
rgbs, depths, bgmaps = render_viewpoints(
render_poses=data_dict['poses'][data_dict['i_train']],
HW=data_dict['HW'][data_dict['i_train']],
Ks=data_dict['Ks'][data_dict['i_train']],
gt_imgs=[data_dict['images'][i].cpu().numpy() for i in data_dict['i_train']],
savedir=testsavedir, dump_images=args.dump_images,
eval_ssim=args.eval_ssim, eval_lpips_alex=args.eval_lpips_alex, eval_lpips_vgg=args.eval_lpips_vgg,
**fine_render_viewpoints_kwargs)
imageio.mimwrite(os.path.join(testsavedir, 'video.rgb.mp4'), utils.to8b(rgbs), fps=30, quality=8)
imageio.mimwrite(os.path.join(testsavedir, 'video.depth.mp4'), utils.to8b(1 - depths / np.max(depths)), fps=30, quality=8)
testsavedir = os.path.join(cfg.basedir, cfg.expname, f'render_train_vq_last')
os.makedirs(testsavedir, exist_ok=True)
print('All results are dumped into', testsavedir)
rgbs, depths, bgmaps = render_viewpoints(
render_poses=data_dict['poses'][data_dict['i_train']],
HW=data_dict['HW'][data_dict['i_train']],
Ks=data_dict['Ks'][data_dict['i_train']],
gt_imgs=[data_dict['images'][i].cpu().numpy() for i in data_dict['i_train']],
savedir=testsavedir, dump_images=args.dump_images,
eval_ssim=args.eval_ssim, eval_lpips_alex=args.eval_lpips_alex, eval_lpips_vgg=args.eval_lpips_vgg,
**vq_render_viewpoints_kwargs)
imageio.mimwrite(os.path.join(testsavedir, 'video.rgb.mp4'), utils.to8b(rgbs), fps=30, quality=8)
imageio.mimwrite(os.path.join(testsavedir, 'video.depth.mp4'), utils.to8b(1 - depths / np.max(depths)), fps=30, quality=8)
# render testset and eval
if args.render_test:
if args.render_fine:
testsavedir = os.path.join(cfg.basedir, cfg.expname, f'render_test_fine_last')
os.makedirs(testsavedir, exist_ok=True)
print('All results are dumped into', testsavedir)
rgbs, depths, bgmaps = render_viewpoints(
render_poses=data_dict['poses'][data_dict['i_test']],
HW=data_dict['HW'][data_dict['i_test']],
Ks=data_dict['Ks'][data_dict['i_test']],
gt_imgs=[data_dict['images'][i].cpu().numpy() for i in data_dict['i_test']],
savedir=testsavedir, dump_images=args.dump_images,
eval_ssim=args.eval_ssim, eval_lpips_alex=args.eval_lpips_alex, eval_lpips_vgg=args.eval_lpips_vgg,
**fine_render_viewpoints_kwargs)
imageio.mimwrite(os.path.join(testsavedir, 'video.rgb.mp4'), utils.to8b(rgbs), fps=30, quality=8)
imageio.mimwrite(os.path.join(testsavedir, 'video.depth.mp4'), utils.to8b(1 - depths / np.max(depths)), fps=30, quality=8)
testsavedir = os.path.join(cfg.basedir, cfg.expname, f'render_test_vq_last')
os.makedirs(testsavedir, exist_ok=True)
print('All results are dumped into', testsavedir)
rgbs, depths, bgmaps = render_viewpoints(
render_poses=data_dict['poses'][data_dict['i_test']],
HW=data_dict['HW'][data_dict['i_test']],
Ks=data_dict['Ks'][data_dict['i_test']],
gt_imgs=[data_dict['images'][i].cpu().numpy() for i in data_dict['i_test']],
savedir=testsavedir, dump_images=args.dump_images,
eval_ssim=args.eval_ssim, eval_lpips_alex=args.eval_lpips_alex, eval_lpips_vgg=args.eval_lpips_vgg,
**vq_render_viewpoints_kwargs)
imageio.mimwrite(os.path.join(testsavedir, 'video.rgb.mp4'), utils.to8b(rgbs), fps=30, quality=8)
imageio.mimwrite(os.path.join(testsavedir, 'video.depth.mp4'), utils.to8b(1 - depths / np.max(depths)), fps=30, quality=8)
# render video
if args.render_video:
if args.render_fine:
testsavedir = os.path.join(cfg.basedir, cfg.expname, f'render_video_fine_last')
os.makedirs(testsavedir, exist_ok=True)
print('All results are dumped into', testsavedir)
rgbs, depths, bgmaps = render_viewpoints(
render_poses=data_dict['render_poses'],
HW=data_dict['HW'][data_dict['i_test']][[0]].repeat(len(data_dict['render_poses']), 0),
Ks=data_dict['Ks'][data_dict['i_test']][[0]].repeat(len(data_dict['render_poses']), 0),
render_factor=args.render_video_factor,
render_video_flipy=args.render_video_flipy,
render_video_rot90=args.render_video_rot90,
savedir=testsavedir, dump_images=args.dump_images,
**fine_render_viewpoints_kwargs)
imageio.mimwrite(os.path.join(testsavedir, 'video.rgb.mp4'), utils.to8b(rgbs), fps=30, quality=8)
import matplotlib.pyplot as plt
depths_vis = depths * (1-bgmaps) + bgmaps
dmin, dmax = np.percentile(depths_vis[bgmaps < 0.1], q=[5, 95])
depth_vis = plt.get_cmap('rainbow')(1 - np.clip((depths_vis - dmin) / (dmax - dmin), 0, 1)).squeeze()[..., :3]
imageio.mimwrite(os.path.join(testsavedir, 'video.depth.mp4'), utils.to8b(depth_vis), fps=30, quality=8)
testsavedir = os.path.join(cfg.basedir, cfg.expname, f'render_video_vq_last')
os.makedirs(testsavedir, exist_ok=True)
print('All results are dumped into', testsavedir)
rgbs, depths, bgmaps = render_viewpoints(
render_poses=data_dict['render_poses'],
HW=data_dict['HW'][data_dict['i_test']][[0]].repeat(len(data_dict['render_poses']), 0),
Ks=data_dict['Ks'][data_dict['i_test']][[0]].repeat(len(data_dict['render_poses']), 0),
render_factor=args.render_video_factor,
render_video_flipy=args.render_video_flipy,
render_video_rot90=args.render_video_rot90,
savedir=testsavedir, dump_images=args.dump_images,
**vq_render_viewpoints_kwargs)
imageio.mimwrite(os.path.join(testsavedir, 'video.rgb.mp4'), utils.to8b(rgbs), fps=30, quality=8)
import matplotlib.pyplot as plt
depths_vis = depths * (1-bgmaps) + bgmaps
dmin, dmax = np.percentile(depths_vis[bgmaps < 0.1], q=[5, 95])
depth_vis = plt.get_cmap('rainbow')(1 - np.clip((depths_vis - dmin) / (dmax - dmin), 0, 1)).squeeze()[..., :3]
imageio.mimwrite(os.path.join(testsavedir, 'video.depth.mp4'), utils.to8b(depth_vis), fps=30, quality=8)
print('Done')
All results are dumped into ./logs/debug/debug_ship/render_test_fine_last
0%| | 0/200 [00:00, ?it/s]
Testing (800, 800, 3)
init_lpips: lpips_alex
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.8/site-packages/lpips/weights/v0.1/alex.pth
init_lpips: lpips_vgg
Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.8/site-packages/lpips/weights/v0.1/vgg.pth
100%|███████████████████████████████████████| 200/200 [06:42<00:00, 2.01s/it]
Testing psnr 29.087039160728455 (avg)
Testing ssim 0.8778315343072913 (avg)
Testing lpips (vgg) 0.1602732403203845 (avg)
Testing lpips (alex) 0.1163811163417995 (avg)
All results are dumped into ./logs/debug/debug_ship/render_test_vq_last
0%| | 0/200 [00:00, ?it/s]
Testing (800, 800, 3)
100%|███████████████████████████████████████| 200/200 [04:43<00:00, 1.42s/it]
Testing psnr 29.144386565685274 (avg)
Testing ssim 0.8766373125894208 (avg)
Testing lpips (vgg) 0.1602870875597 (avg)
Testing lpips (alex) 0.1128024041466415 (avg)
Done