【code】Dynamic NeRF | generate_data

源代码:Dynamic NeRF | generate_data 用来从video中按照一定帧率抽取图像,并用Maskcnn产生背景掩码

torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)使用预训练权重的maskrcnn

os.path.basename()返回path最后的文件名

os.path.splitext(“文件路径”) 分离文件名与扩展名

imageio.get_reader()使用imageio库来读取视频;可以用fps = reader.get_meta_data()[‘fps’]获取帧率

np.ceil()向上取整,比如2.3取为3

torchvision.transforms.functional.to_tensor(img) 将PIL image 或者 arrary 转换为 tensor,会把图像从[0, 255]归一化到[0.0, 1.0]

torch.FloatTensor(H, W).fill_(1.0) 类型转换, 将list ,numpy转化为tensor,并用1.0来填充tensor(H,W)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def multi_view_multi_time(args):
    """
    Generating multi view multi time data
    """

    Maskrcnn = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True).cuda().eval()
    # Maskrcnn = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=True).cuda().eval()
    # Maskrcnn = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=True).cuda().eval()

    threshold = 0.5


    videoname, ext = os.path.splitext(os.path.basename(args.videopath))
    # 怎么读视频
    imgs = []
    reader = imageio.get_reader(args.videopath)
    for i, im in enumerate(reader):
        imgs.append(im)


    imgs = np.array(imgs)
    # 如果num_frames是112
    num_frames, H, W, _ = imgs.shape
    imgs = imgs[::int(np.ceil(num_frames / 100))]

    create_dir(os.path.join(args.data_dir, videoname, 'images'))
    create_dir(os.path.join(args.data_dir, videoname, 'images_colmap'))
    create_dir(os.path.join(args.data_dir, videoname, 'background_mask'))

    for idx, img in enumerate(imgs):
        print(idx)
        #写入png、jpg格式的图像
        imageio.imwrite(os.path.join(args.data_dir, videoname, 'images', str(idx).zfill(3) + '.png'), img)
        imageio.imwrite(os.path.join(args.data_dir, videoname, 'images_colmap', str(idx).zfill(3) + '.jpg'), img)

        # Get coarse background mask
        RGB---->  Tensor--->cuda
        img = torchvision.transforms.functional.to_tensor(img).to(device)
        background_mask = torch.FloatTensor(H, W).fill_(1.0).to(device)
        #预测背景mask
        objPredictions = Maskrcnn([img])[0]

        for intMask in range(len(objPredictions['masks'])):
            if objPredictions['scores'][intMask].item() > threshold:
                if objPredictions['labels'][intMask].item() == 1: # person
                    background_mask[objPredictions['masks'][intMask, 0, :, :] > threshold] = 0.0

        background_mask_np = ((background_mask.cpu().numpy() > 0.1) * 255).astype(np.uint8)
        imageio.imwrite(os.path.join(args.data_dir, videoname, 'background_mask', str(idx).zfill(3) + '.jpg.png'), background_mask_np)

你可能感兴趣的:(NeRF,笔记,python,numpy,深度学习)