Paper: Masked Wavelet Representation for Compact Neural Radiance Fields
Code: masked_wavelet_nerf
conda create -n MaskDWT python=3.8
conda activate MaskDWT
pip install torch torchvision
pip install tqdm scikit-image opencv-python configargparse lpips imageio-ffmpeg kornia lpips tensorboard
pip install plyfile
pytorch_wavelets包
git clone https://github.com/fbcotter/pytorch_wavelets
cd pytorch_wavelets
pip install .
python train.py --config=configs/drums.txt --use_mask --mask_weight=1e-10 --grid_bit=8 --use_dwt --dwt_level=4
log:
Namespace(L1_weight_inital=0.0, L1_weight_rest=0.0, N_vis=5, N_voxel_final=27000000, N_voxel_init=2097156, Ortho_weight=0.0, TV_weight_app=0.0, TV_weight_density=0.0, accumulate_decay=0.998, add_timestamp=0, alpha_mask_thre=0.0001, alpha_offset=0.0001, basedir=‘./log’, batch_size=4096, ckpt=None, compress=0, compress_levelwise=0, config=‘configs/drums.txt’, data_dim_color=27, datadir=‘/MULT/datasets/nerf-data_wn/nerf_synthetic/drums’, dataset_name=‘blender’, decompress=0, decompress_and_validate=1, decompress_levelwise=0, density_shift=-10, distance_scale=25, downsample_test=1.0, downsample_train=1.0, dwt_level=4, expname=‘tensorf_drums_VM’, export_mesh=0, fea2denseAct=‘softplus’, fea_pe=2, featureC=128, grid_bit=8, idx_view=0, lindisp=False, lr_basis=0.001, lr_decay_iters=-1, lr_decay_target_ratio=0.1, lr_init=0.02, lr_upsample_reset=1, mask_weight=1e-10, model_name=‘TensorVMSplit’, nSamples=1000000.0, n_iters=30000, n_lamb_sh=[48, 48, 48], n_lamb_sigma=[16, 16, 16], ndc_ray=0, perturb=1.0, pos_pe=0, progress_refresh_rate=10, reconstruct_mask=1, render_only=0, render_path=0, render_test=1, render_train=0, rm_weight_mask_thre=0.0001, shadingMode=‘MLP_Fea’, step_ratio=0.5, trans_func=‘bior4.4’, update_AlphaMask_list=[2000, 4000, 6000, 11000, 16000, 21000, 26000], upsamp_list=[2000, 3000, 4000, 5500, 7000], use_dwt=True, use_mask=True, view_pe=2, vis_every=10000, weight_decay=0.0, white_bkgd=False, with_depth=False)
Loading data train (100): 100%|██████████| 100/100 [00:03<00:00, 32.88it/s]
Loading data test (200): 100%|██████████████████| 200/200 [00:05<00:00, 36.57it/s]
aabb tensor([-1.5000, -1.5000, -1.5000, 1.5000, 1.5000, 1.5000], device=‘cuda:0’)
grid size [128, 128, 128]
sampling step size: tensor(0.0118, device=‘cuda:0’)
sampling number: 440
pos_pe 0 view_pe 2 fea_pe 2
MLPRender_Fea(
(mlp): Sequential(
(0): Linear(in_features=150, out_features=128, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=128, out_features=128, bias=True)
(3): ReLU(inplace=True)
(4): Linear(in_features=128, out_features=3, bias=True)
)
)
24.340526580810547MB
lr decay 0.1 30000
=> filtering rays …
Ray filtering done! takes 0.7676050662994385 s. ray mask ratio: 0.9997473955154419
initial Ortho_reg_weight 0.0
initial L1_reg_weight 0.0
initial TV_weight density: 0.0 appearance: 0.0
Iteration 02000: train_psnr = 22.89 test_psnr = 0.00 mse = 0.005108: 7%|██▊ | 2000/30000 [02:40<31:53, 14.63it/s]
bbox: (tensor([-1.5000, -0.8386, -0.7677], device=‘cuda:0’), tensor([1.2402, 1.0748, 1.3583], device=‘cuda:0’)) alpha rest %6.792450
=> shrinking …
aabb tensor([-1.5000, -0.8386, -0.7677, 1.2402, 1.0748, 1.3583], device=‘cuda:0’)
grid size (112, 80, 80)
sampling step size: tensor(0.0126, device=‘cuda:0’)
sampling number: 314
continuing L1_reg_weight 0.0
aabb tensor([-1.5000, -0.8386, -0.7677, 1.2402, 1.0748, 1.3583], device=‘cuda:0’)
grid size [176, 128, 144]
sampling step size: tensor(0.0076, device=‘cuda:0’)
sampling number: 522
upsamping to [176, 128, 144]
reset lr to initial
Iteration 03000: train_psnr = 22.08 test_psnr = 0.00 mse = 0.005758: 10%|████▏ | 3000/30000 [03:55<32:06, 14.01it/s]
aabb tensor([-1.5000, -0.8386, -0.7677, 1.2402, 1.0748, 1.3583], device=‘cuda:0’)
grid size [208, 144, 160]
sampling step size: tensor(0.0067, device=‘cuda:0’)
sampling number: 595
upsamping to [208, 144, 160]
reset lr to initial
Iteration 04000: train_psnr = 23.58 test_psnr = 0.00 mse = 0.004786: 13%|█████▌ | 4000/30000 [05:06<30:28, 14.22it/s]
bbox: (tensor([-1.5000, -0.8386, -0.7677], device=‘cuda:0’), tensor([1.2402, 1.0748, 1.3583], device=‘cuda:0’)) alpha rest %12.232906
=> filtering rays …
Ray filtering done! takes 5.870962142944336 s. ray mask ratio: 0.7819146513938904
aabb tensor([-1.5000, -0.8386, -0.7677, 1.2402, 1.0748, 1.3583], device=‘cuda:0’)
grid size [256, 176, 192]
sampling step size: tensor(0.0055, device=‘cuda:0’)
sampling number: 725
upsamping to [256, 176, 192]
reset lr to initial
Iteration 05500: train_psnr = 23.60 test_psnr = 0.00 mse = 0.004178: 18%|███████▋ | 5500/30000 [07:07<28:56, 14.11it/s]
aabb tensor([-1.5000, -0.8386, -0.7677, 1.2402, 1.0748, 1.3583], device=‘cuda:0’)
grid size [304, 208, 240]
sampling step size: tensor(0.0045, device=‘cuda:0’)
sampling number: 875
upsamping to [304, 208, 240]
reset lr to initial
Iteration 06000: train_psnr = 23.56 test_psnr = 0.00 mse = 0.004612: 20%|████████▍ | 6000/30000 [07:42<28:22, 14.10it/s]
bbox: (tensor([-1.1292, -0.7554, -0.5809], device=‘cuda:0’), tensor([1.1316, 0.9731, 1.2693], device=‘cuda:0’)) alpha rest %5.504399
Iteration 07000: train_psnr = 24.14 test_psnr = 0.00 mse = 0.004012: 23%|█████████▊ | 7000/30000 [08:58<23:16, 16.47it/s]
aabb tensor([-1.5000, -0.8386, -0.7677, 1.2402, 1.0748, 1.3583], device=‘cuda:0’)
grid size [368, 256, 272]
sampling step size: tensor(0.0038, device=‘cuda:0’)
sampling number: 1042
upsamping to [368, 256, 272]
reset lr to initial
5it [00:48, 9.69s/it]_psnr = 25.00 test_psnr = 0.00 mse = 0.003241: 33%|█████████████▉ | 9990/30000 [12:43<30:44, 10.85it/s]
Iteration 11000: train_psnr = 25.29 test_psnr = 24.78 mse = 0.002538: 37%|██████████████▋ | 11000/30000 [15:06<28:46, 11.00it/s]
bbox: (tensor([-1.1292, -0.7554, -0.5809], device=‘cuda:0’), tensor([1.1316, 0.9639, 1.2515], device=‘cuda:0’)) alpha rest %4.673168
Iteration 16000: train_psnr = 26.04 test_psnr = 24.78 mse = 0.002895: 53%|█████████████████████▎ | 16000/30000 [20:58<12:55, 18.05it/s]bbox: (tensor([-1.1292, -0.7554, -0.5542], device=‘cuda:0’), tensor([1.1316, 0.9639, 1.2604], device=‘cuda:0’)) alpha rest %4.489262
5it [00:28, 5.71s/it]_psnr = 26.42 test_psnr = 24.78 mse = 0.002086: 67%|██████████████████████████▋ | 19990/30000 [24:59<10:43, 15.56it/s]
Iteration 21000: train_psnr = 26.28 test_psnr = 25.53 mse = 0.002177: 70%|████████████████████████████ | 21000/30000 [26:29<08:40, 17.29it/s]bbox: (tensor([-1.1292, -0.7554, -0.5631], device=‘cuda:0’), tensor([1.1316, 0.9731, 1.2515], device=‘cuda:0’)) alpha rest %4.423907
Iteration 26000: train_psnr = 26.57 test_psnr = 25.53 mse = 0.002366: 87%|██████████████████████████████████▋ | 26000/30000 [31:23<03:39, 18.22it/s]bbox: (tensor([-1.1654, -0.7554, -0.5631], device=‘cuda:0’), tensor([1.1316, 0.9731, 1.2515], device=‘cuda:0’)) alpha rest %4.378057
5it [00:37, 7.53s/it]_psnr = 26.80 test_psnr = 25.53 mse = 0.001910: 100%|███████████████████████████████████████▉| 29990/30000 [36:07<00:00, 13.43it/s]
Iteration 29990: train_psnr = 26.80 test_psnr = 25.53 mse = 0.001910: 100%|████████████████████████████████████████| 30000/30000 [36:47<00:00, 13.59it/s]
total: 16.317MB (G (8bit): 16.164MB) (N: 0.153027MB)
non-masked ratio: 0.0282
masked_total: 0.609MB (G (8bit): 0.456MB) (N: 0.153027MB)
bbox: (tensor([-1.1292, -0.7554, -0.5631], device=‘cuda:0’), tensor([1.1316, 0.9731, 1.2515], device=‘cuda:0’)) alpha rest %4.379857
0it [00:00, ?it/s]init_lpips: lpips_alex
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.8/site-packages/lpips/weights/v0.1/alex.pth
init_lpips: lpips_vgg
Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.8/site-packages/lpips/weights/v0.1/vgg.pth
200it [27:00, 8.10s/it]
=> tensorf_drums_VM test all psnr: 25.4535889506069 <========================
python compress.py --compress=1 --compress_levelwise=1 --ckpt=log/tensorf_drums_VM/tensorf_drums_VM.th
log:
aabb tensor([-1.5000, -0.8386, -0.7677, 1.2402, 1.0748, 1.3583], device=‘cuda:0’)
grid size [368, 256, 272]
sampling step size: tensor(0.0038, device=‘cuda:0’)
sampling number: 1042
pos_pe 0 view_pe 2 fea_pe 2
MLPRender_Fea(
(mlp): Sequential(
(0): Linear(in_features=150, out_features=128, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=128, out_features=128, bias=True)
(3): ReLU(inplace=True)
(4): Linear(in_features=128, out_features=3, bias=True)
)
)
=> Grid + Mask + MLP (mb): 0.9569406509399414 <============
=> kwargs (mb): 0.0012617111206054688 <============
encoding done.
python compress.py --decompress=1 --decompress_levelwise=1 --config=configs/drums.txt --ckpt=log/tensorf_drums_VM/params.th
log:
aabb tensor([-1.5000, -0.8386, -0.7677, 1.2402, 1.0748, 1.3583], device=‘cuda:0’)
grid size [368, 256, 272]
sampling step size: tensor(0.0038, device=‘cuda:0’)
sampling number: 1042
pos_pe 0 view_pe 2 fea_pe 2
MLPRender_Fea(
(mlp): Sequential(
(0): Linear(in_features=150, out_features=128, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=128, out_features=128, bias=True)
(3): ReLU(inplace=True)
(4): Linear(in_features=128, out_features=3, bias=True)
)
)
bbox: (tensor([-1.1292, -0.7554, -0.5631], device=‘cuda:0’), tensor([1.1316, 0.9731, 1.2515], device=‘cuda:0’)) alpha rest %4.379857
model loaded.
Loading data test (200): 100%|██████████████████████████| 200/200 [00:05<00:00, 33.45it/s]
0it [00:00, ?it/s]init_lpips: lpips_alex
Setting up [LPIPS] perceptual loss: trunk [alex], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.8/site-packages/lpips/weights/v0.1/alex.pth
init_lpips: lpips_vgg
Setting up [LPIPS] perceptual loss: trunk [vgg], v[0.1], spatial [off]
Loading model from: /opt/conda/lib/python3.8/site-packages/lpips/weights/v0.1/vgg.pth
200it [27:00, 8.10s/it]
=> tensorf_drums_VM test all psnr: 25.45358887581467 <============
sh drums.sh
#!/bin/bash
python train.py --config=configs/drums.txt --use_mask --mask_weight=1e-10 --grid_bit=8 --use_dwt --dwt_level=4
python compress.py --compress=1 --compress_levelwise=1 --ckpt=log/tensorf_drums_VM/tensorf_drums_VM.th
python compress.py --decompress=1 --decompress_levelwise=1 --config=configs/drums.txt --ckpt=log/tensorf_drums_VM/params.th
复现结果:
(nerf_synthetic dataset)