源代码:Dynamic NeRF | generate_data 用来从video中按照一定帧率抽取图像,并用Maskcnn产生背景掩码
torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True)使用预训练权重的maskrcnn
os.path.basename()返回path最后的文件名
os.path.splitext(“文件路径”) 分离文件名与扩展名
imageio.get_reader()使用imageio库来读取视频;可以用fps = reader.get_meta_data()[‘fps’]获取帧率
np.ceil()向上取整,比如2.3取为3
torchvision.transforms.functional.to_tensor(img) 将PIL image 或者 arrary 转换为 tensor,会把图像从[0, 255]归一化到[0.0, 1.0]
torch.FloatTensor(H, W).fill_(1.0) 类型转换, 将list ,numpy转化为tensor,并用1.0来填充tensor(H,W)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def multi_view_multi_time(args):
"""
Generating multi view multi time data
"""
Maskrcnn = torchvision.models.detection.maskrcnn_resnet50_fpn(pretrained=True).cuda().eval()
# Maskrcnn = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=True).cuda().eval()
# Maskrcnn = torchvision.models.detection.maskrcnn_resnet50_fpn(weights=True).cuda().eval()
threshold = 0.5
videoname, ext = os.path.splitext(os.path.basename(args.videopath))
# 怎么读视频
imgs = []
reader = imageio.get_reader(args.videopath)
for i, im in enumerate(reader):
imgs.append(im)
imgs = np.array(imgs)
# 如果num_frames是112
num_frames, H, W, _ = imgs.shape
imgs = imgs[::int(np.ceil(num_frames / 100))]
create_dir(os.path.join(args.data_dir, videoname, 'images'))
create_dir(os.path.join(args.data_dir, videoname, 'images_colmap'))
create_dir(os.path.join(args.data_dir, videoname, 'background_mask'))
for idx, img in enumerate(imgs):
print(idx)
#写入png、jpg格式的图像
imageio.imwrite(os.path.join(args.data_dir, videoname, 'images', str(idx).zfill(3) + '.png'), img)
imageio.imwrite(os.path.join(args.data_dir, videoname, 'images_colmap', str(idx).zfill(3) + '.jpg'), img)
# Get coarse background mask
RGB----> Tensor--->cuda
img = torchvision.transforms.functional.to_tensor(img).to(device)
background_mask = torch.FloatTensor(H, W).fill_(1.0).to(device)
#预测背景mask
objPredictions = Maskrcnn([img])[0]
for intMask in range(len(objPredictions['masks'])):
if objPredictions['scores'][intMask].item() > threshold:
if objPredictions['labels'][intMask].item() == 1: # person
background_mask[objPredictions['masks'][intMask, 0, :, :] > threshold] = 0.0
background_mask_np = ((background_mask.cpu().numpy() > 0.1) * 255).astype(np.uint8)
imageio.imwrite(os.path.join(args.data_dir, videoname, 'background_mask', str(idx).zfill(3) + '.jpg.png'), background_mask_np)