网上解读MVSNet的博客已经很多了,大家可以自选学习,但更重要的是阅读理解原文,以及自己动手跑跑代码!
MVSNet服务器环境配置及测试https://blog.csdn.net/qq_43307074/article/details/128011842【论文简述及翻译】MVSNet:Depth Inference for Unstructured Multi-view Stereo(ECCV 2018)https://blog.csdn.net/qq_43307074/article/details/127876458
# 单应性变换:将src图根据ref和src的投影矩阵,投影到ref视角下,得到特征体
def homo_warping(src_fea, src_proj, ref_proj, depth_values):
# src_fea: [B, C, H, W] src图像的特征 此时的C已经是32维了
# src_proj: [B, 4, 4] src图像的投影矩阵
# ref_proj: [B, 4, 4] 参考图像的投影矩阵
# depth_values: [B, Ndepth] 深度假设范围数组
# out: [B, C, Ndepth, H, W]
batch, channels = src_fea.shape[0], src_fea.shape[1]
num_depth = depth_values.shape[1]
height, width = src_fea.shape[2], src_fea.shape[3]
# 阻止梯度计算,降低计算量
with torch.no_grad():
# src * ref.T,得到变换矩阵
# .matmul(input, other, out = None) :input 和 other 两个张量进行矩阵相乘
proj = torch.matmul(src_proj, torch.inverse(ref_proj))
# 取左上角三行三列得到旋转变换,[B,3,3]
rot = proj[:, :3, :3]
# 取最后一列的上面三行得到平移变换,[B,3,1]
trans = proj[:, :3, 3:4]
# 按照ref图像维度构建一张空的平面,之后要做的是根据投影矩阵把src中的像素映射到这张平面上,也就是提取特征的坐标
# y: Size([128, 160])
# y: tensor([[ 0., 0., 0., ..., 0., 0., 0.],
# [ 1., 1., 1., ..., 1., 1., 1.],
# [ 2., 2., 2., ..., 2., 2., 2.],
# ...,
# [125., 125., 125., ..., 125., 125., 125.],
# [126., 126., 126., ..., 126., 126., 126.],
# [127., 127., 127., ..., 127., 127., 127.]], device='cuda:0')
# x: Size([128, 160])
# x: tensor([[ 0., 1., 2., ..., 157., 158., 159.],
# [ 0., 1., 2., ..., 157., 158., 159.],
# [ 0., 1., 2., ..., 157., 158., 159.],
# ...,
# [ 0., 1., 2., ..., 157., 158., 159.],
# [ 0., 1., 2., ..., 157., 158., 159.],
# [ 0., 1., 2., ..., 157., 158., 159.]], device='cuda:0')
y, x = torch.meshgrid([torch.arange(0, height, dtype=torch.float32, device=src_fea.device),
torch.arange(0, width, dtype=torch.float32, device=src_fea.device)])
# 保证开辟的新空间是连续的(数组存储顺序与按行展开的顺序一致,transpose等操作是跟原tensor共享内存的)
y, x = y.contiguous(), x.contiguous()
# 将维度变换为图像样子
# x: :Size([20480])
# x: tensor([ 0., 1., 2., ..., 157., 158., 159.], device='cuda:0')
y, x = y.view(height * width), x.view(height * width)
# .ones_like(x):返回一个填充了标量值1的张量,其大小与x相同
# .stack: [3, H*W],即([3, 20480])
# xyz: tensor([[ 0., 1., 2., ..., 157., 158., 159.],
# [ 0., 0., 0., ..., 127., 127., 127.],
# [ 1., 1., 1., ..., 1., 1., 1.]], device='cuda:0')
xyz = torch.stack((x, y, torch.ones_like(x)))
# unsqueeze先将维度变[1, 3, H*W]
# repeat:将batch的维度引入进来[B, 3, H*W]
xyz = torch.unsqueeze(xyz, 0).repeat(batch, 1, 1)
# [B, 3, H*W] 先将坐标乘以旋转矩阵
rot_xyz = torch.matmul(rot, xyz)
# [B, 3, Ndepth, H*W] 再引入Ndepths维度,并将深度假设值填入这个维度
# rot_depth_xyz: Size([4, 3, 192, 20480])
# depth_values.view(batch, 1, num_depth,1): Size([4, 1, 192, 1])
rot_depth_xyz = rot_xyz.unsqueeze(2).repeat(1, 1, num_depth, 1) * depth_values.view(batch, 1, num_depth, 1)
# 旋转变换后的矩阵+平移矩阵 -> 投影变换后的坐标
# [B, 3, Ndepth, H*W]
proj_xyz = rot_depth_xyz + trans.view(batch, 3, 1, 1)
# xy分别除以z进行归一化
# [B, 2, Ndepth, H*W]
proj_xy = proj_xyz[:, :2, :, :] / proj_xyz[:, 2:3, :, :]
# F.grid_sample中的grid要求值在[-1,1]
# [B, Ndepth, H*W] x方向按照宽度进行归一化
# proj_x_normalized: Size([4, 192, 20480])
proj_x_normalized = proj_xy[:, 0, :, :] / ((width - 1) / 2) - 1
# y方向按照高度进行归一化
proj_y_normalized = proj_xy[:, 1, :, :] / ((height - 1) / 2) - 1
# 再把归一化后的x和y拼起来
# [B, Ndepth, H*W, 2]
proj_xy = torch.stack((proj_x_normalized, proj_y_normalized), dim=3)
grid = proj_xy
# 根据变化后的坐标在源特征图检索对应的值,即为变化后的值
# warped_src_fea: Size([4, 32, 24576, 160])
warped_src_fea = F.grid_sample(src_fea, grid.view(batch, num_depth * height, width, 2), mode='bilinear',
padding_mode='zeros')
# 将上一步编码到height维的深度信息分离出来
# warped_src_fea: Size([4, 32, 192, 128, 160])
warped_src_fea = warped_src_fea.view(batch, channels, num_depth, height, width)
# 最终得到的可以理解为src特征图按照不同假设的深度值投影到ref后构建的特征体
# [B, C, Ndepth, H, W]
return warped_src_fea
# 深度回归:根据之前假设的192个深度经过网络算完得到的不同概率,乘以深度假设,求得期望
# p: probability volume [B, D, H, W]
# depth_values: discrete depth values [B, D]
def depth_regression(p, depth_values):
depth_values = depth_values.view(*depth_values.shape, 1, 1)
# 最后在深度假设维度做了加法,运算后深度假设这一维度就没了,期望即为最终估计的深度图
depth = torch.sum(p * depth_values, 1)
return depth
# 特征提取网络
# 将3通道的rgb图像转换为32维的高维深度特征,同时图像进行了4倍下采样
# 输入:[3, H, W]
# 输出:[32, H/4, W/4],即为(32, 160, 128)
class FeatureNet(nn.Module):
def __init__(self):
super(FeatureNet, self).__init__()
self.inplanes = 32
self.conv0 = ConvBnReLU(3, 8, 3, 1, 1)
self.conv1 = ConvBnReLU(8, 8, 3, 1, 1)
self.conv2 = ConvBnReLU(8, 16, 5, 2, 2)
self.conv3 = ConvBnReLU(16, 16, 3, 1, 1)
self.conv4 = ConvBnReLU(16, 16, 3, 1, 1)
self.conv5 = ConvBnReLU(16, 32, 5, 2, 2)
self.conv6 = ConvBnReLU(32, 32, 3, 1, 1)
self.feature = nn.Conv2d(32, 32, 3, 1, 1)
def forward(self, x):
x = self.conv1(self.conv0(x))
x = self.conv4(self.conv3(self.conv2(x)))
x = self.feature(self.conv6(self.conv5(x)))
return x
# 代价体正则化网络
# 先进行3D卷积降维,再反卷积升维,过程中把每一步卷积和反卷积对应的代价体都加起来,实现跳跃连接
# 输入:[B, C, D, H/4, W/4]
# 输出:[B, 1, D, H/4, W/4],即(B, 1, 192, 160, 128)
class CostRegNet(nn.Module):
def __init__(self):
super(CostRegNet, self).__init__()
self.conv0 = ConvBnReLU3D(32, 8)
self.conv1 = ConvBnReLU3D(8, 16, stride=2)
self.conv2 = ConvBnReLU3D(16, 16)
self.conv3 = ConvBnReLU3D(16, 32, stride=2)
self.conv4 = ConvBnReLU3D(32, 32)
self.conv5 = ConvBnReLU3D(32, 64, stride=2)
self.conv6 = ConvBnReLU3D(64, 64)
self.conv7 = nn.Sequential(
nn.ConvTranspose3d(64, 32, kernel_size=3, padding=1, output_padding=1, stride=2, bias=False),
nn.BatchNorm3d(32),
nn.ReLU(inplace=True))
self.conv9 = nn.Sequential(
nn.ConvTranspose3d(32, 16, kernel_size=3, padding=1, output_padding=1, stride=2, bias=False),
nn.BatchNorm3d(16),
nn.ReLU(inplace=True))
self.conv11 = nn.Sequential(
nn.ConvTranspose3d(16, 8, kernel_size=3, padding=1, output_padding=1, stride=2, bias=False),
nn.BatchNorm3d(8),
nn.ReLU(inplace=True))
self.prob = nn.Conv3d(8, 1, 3, stride=1, padding=1)
def forward(self, x):
conv0 = self.conv0(x)
conv2 = self.conv2(self.conv1(conv0))
conv4 = self.conv4(self.conv3(conv2))
x = self.conv6(self.conv5(conv4))
x = conv4 + self.conv7(x)
x = conv2 + self.conv9(x)
x = conv0 + self.conv11(x)
x = self.prob(x)
return x
# 深度图边缘优化残差网络
# 输入: [B, 4, H/4, W/4],4是即为img有3通道,depth有1通道
# 输出: [B, 1, H/4, W/4],即(B, 1, 160, 128)
class RefineNet(nn.Module):
def __init__(self):
super(RefineNet, self).__init__()
self.conv1 = ConvBnReLU(4, 32)
self.conv2 = ConvBnReLU(32, 32)
self.conv3 = ConvBnReLU(32, 32)
self.res = ConvBnReLU(32, 1)
def forward(self, img, depth_init):
concat = F.cat((img, depth_init), dim=1)
depth_residual = self.res(self.conv3(self.conv2(self.conv1(concat))))
depth_refined = depth_init + depth_residual
return depth_refined
class MVSNet(nn.Module):
def __init__(self, refine=True):
super(MVSNet, self).__init__()
self.refine = refine
self.feature = FeatureNet()
self.cost_regularization = CostRegNet()
# 深度图边缘优化残差网络
if self.refine:
self.refine_network = RefineNet()
# 输入:图像、内外参数、采样的深度值
def forward(self, imgs, proj_matrices, depth_values):
# .unbind():对某一个维度进行长度为1的切片,并将所有切片结果返回,组成列表
imgs = torch.unbind(imgs, 1)
proj_matrices = torch.unbind(proj_matrices, 1)
# 确定图像列表和参数列表长度相等
assert len(imgs) == len(proj_matrices), "Different number of images and projection matrices"
img_height, img_width = imgs[0].shape[2], imgs[0].shape[3]
num_depth = depth_values.shape[1]
num_views = len(imgs)
# step 1. 特征提取
# 输入:每张图片[B, 3, H, W]
# 输出:特征图[B, 32, H/4, W/4],即为(32, 160, 128)
features = [self.feature(img) for img in imgs]
# ref为参考图像,src为源图像
ref_feature, src_features = features[0], features[1:]
ref_proj, src_projs = proj_matrices[0], proj_matrices[1:]
# step 2. 通过单应性变化构建代价体
# 将ref的32维特征和ref投过来的特征图通过方差构建原始的代价体
# ref_volume:Size([4, 32, 192, 128, 160])
ref_volume = ref_feature.unsqueeze(2).repeat(1, 1, num_depth, 1, 1)
volume_sum = ref_volume
volume_sq_sum = ref_volume ** 2
# del删除的是变量,而不是数据
del ref_volume
# zip():将对象中对应的元素打包成一个个元组,然后返回由这些元组组成的列表
for src_fea, src_proj in zip(src_features, src_projs):
# 单应性变化
warped_volume = homo_warping(src_fea, src_proj, ref_proj, depth_values)
if self.training:
volume_sum = volume_sum + warped_volume
volume_sq_sum = volume_sq_sum + warped_volume ** 2
else:
# TODO: this is only a temporal solution to save memory, better way?
volume_sum += warped_volume
# pow():返回x的y次方的值。
# the memory of warped_volume has been modified
volume_sq_sum += warped_volume.pow_(2)
del warped_volume
# aggregate multiple feature volumes by variance
# 通过公式计算方差得到合并的代价体(在实现里通过公式简化计算)
# 最终的cost volume维度是[B, 32, 192, H/4, W/4]
volume_variance = volume_sq_sum.div_(num_views).sub_(volume_sum.div_(num_views).pow_(2))
# step 3. 代价体正则化,
# 首先通过代价体正则化网络进行进一步信息聚合,最终得到的维度是[B, 1, 192, H/4, W/4]
cost_reg = self.cost_regularization(volume_variance)
# cost_reg = F.upsample(cost_reg, [num_depth * 4, img_height, img_width], mode='trilinear')
# 通过squeeze将维度为1的维度去除掉,得到[B, 192, H/4, W/4]
cost_reg = cost_reg.squeeze(1)
# 通过Softmax函数,将深度维度的信息压缩为0~1之间的分布,得到概率体
prob_volume = F.softmax(cost_reg, dim=1)
# 回归得到深度图
depth = depth_regression(prob_volume, depth_values=depth_values)
with torch.no_grad():
# photometric confidence:用于进行光度一致性校验,最终得到跟深度图尺寸一样的置信度图:
# 简单来说就是选取上面估计的最优深度附近的四个点,再次通过depth regression得到深度值索引,
# 再通过gather函数从192个深度假设层中获取index对应位置的数据
# F.pad:参数pad定义了六个参数,表示对输入矩阵的后三个维度进行扩充
# prob_volume_sum4:Size([4, 192, 128, 160])
# prob_volume:Size([4, 192, 128, 160])
# depth_index:Size([4, 1,128, 160])
prob_volume_sum4 = 4 * F.avg_pool3d(F.pad(prob_volume.unsqueeze(1), pad=(0, 0, 0, 0, 1, 2)), (4, 1, 1), stride=1, padding=0).squeeze(1)
depth_index = depth_regression(prob_volume, depth_values=torch.arange(num_depth, device=prob_volume.device, dtype=torch.float)).long()
# photometric_confidence:Size([4, 128, 160])
photometric_confidence = torch.gather(prob_volume_sum4, 1, depth_index.unsqueeze(1)).squeeze(1)
# step 4. 深度图改进
# 将原图和得到的深度图合并输入至优化残差网络,输出优化后的深度图
if not self.refine:
return {"depth": depth, "photometric_confidence": photometric_confidence}
else:
refined_depth = self.refine_network(torch.cat((imgs[0], depth), 1))
return {"depth": depth, "refined_depth": refined_depth, "photometric_confidence": photometric_confidence}
# 由于是有监督学习,loss就是估计的深度图和真实深度图的smoothl1
# 唯一要注意的是,数据集中的mask终于在这发挥作用了,我们只选取mask>0.5,也就是可视化中白色的部分计算loss,只有这部分的点深度是有效的
def mvsnet_loss(depth_est, depth_gt, mask):
mask = mask > 0.5
return F.smooth_l1_loss(depth_est[mask], depth_gt[mask], size_average=True)
# parse arguments and check
args = parser.parse_args()
if args.resume:
assert args.mode == "train"
assert args.loadckpt is None
if args.testpath is None:
args.testpath = args.trainpath
# 为CPU设置种子用于生成随机数
# 这样的意义在于可以保证在深度网络在随机初始化各层权重时,多次试验的初始化权重是一样的,结果可以复现
torch.manual_seed(args.seed)
# torch.cuda.manual_seed()为当前GPU设置随机种子
torch.cuda.manual_seed(args.seed)
# 如果训练
# 为模式"train" and "testall"创建记录器
if args.mode == "train":
# 判断某一路径是否为目录
if not os.path.isdir(args.logdir):
# 创建目录
os.mkdir(args.logdir)
# strftime()(格式化日期时间的函数)
current_time_str = str(datetime.datetime.now().strftime('%Y%m%d_%H%M%S'))
print("current time", current_time_str)
# 构建SummaryWriter(使用tensorboardx进行可视化)
print("creating new summary file")
logger = (args.logdir)
# sys.argv[]是用来获取命令行参数的,sys.argv[0]表示代码本身文件路径
# sys.argv[1:]表示从第二个参数到最后结尾
print("argv:", sys.argv[1:])
print_args(args)
# 构建MVSDataset和DatasetLoader
# 训练时调用dtu_yao.py,测试时为dtu_yao_eval.py
MVSDataset = find_dataset_def(args.dataset)
train_dataset = MVSDataset(args.trainpath, args.trainlist, "train", 3, args.numdepth, args.interval_scale)
test_dataset = MVSDataset(args.testpath, args.testlist, "test", 5, args.numdepth, args.interval_scale)
# drop_last (bool, optional) – 当样本数不能被batchsize整除时,最后一批数据是否舍弃(default: False)
TrainImgLoader = DataLoader(train_dataset, args.batch_size, shuffle=True, num_workers=8, drop_last=True)
TestImgLoader = DataLoader(test_dataset, args.batch_size, shuffle=False, num_workers=4, drop_last=False)
# 载入MVSNet, mvsnet_loss,优化器
model = MVSNet(refine=False)
# 多个GPU来加速训练
if args.mode in ["train", "test"]:
model = nn.DataParallel(model)
model.cuda()
# 载入损失函数
model_loss = mvsnet_loss
# 载入优化器
optimizer = optim.Adam(model.parameters(), lr=args.lr, betas=(0.9, 0.999), weight_decay=args.wd)
start_epoch = 0
# 如果之前有训练模型,从上次末尾或指定的模型继续训练,载入参数
if (args.mode == "train" and args.resume) or (args.mode == "test" and not args.loadckpt):
saved_models = [fn for fn in os.listdir(args.logdir) if fn.endswith(".ckpt")]
saved_models = sorted(saved_models, key=lambda x: int(x.split('_')[-1].split('.')[0]))
# use the latest checkpoint file
loadckpt = os.path.join(args.logdir, saved_models[-1])
print("resuming", loadckpt)
state_dict = torch.load(loadckpt)
model.load_state_dict(state_dict['model'])
optimizer.load_state_dict(state_dict['optimizer'])
start_epoch = state_dict['epoch'] + 1
elif args.loadckpt:
# load checkpoint file specified by args.loadckpt
print("loading model {}".format(args.loadckpt))
state_dict = torch.load(args.loadckpt)
model.load_state_dict(state_dict['model'])
print("start at epoch {}".format(start_epoch))
print('Number of model parameters: {}'.format(sum([p.data.nelement() for p in model.parameters()])))
# 训练函数
def train():
# 设置milestone动态调整学习率
milestones = [int(epoch_idx) for epoch_idx in args.lrepochs.split(':')[0].split(',')]
lr_gamma = 1 / float(args.lrepochs.split(':')[1])
# MultiStepLR是一个非常常见的学习率调整策略,它会在每个milestone时,将此前学习率乘以gamma
lr_scheduler = torch.optim.lr_scheduler.MultiStepLR(optimizer, milestones, gamma=lr_gamma, last_epoch=start_epoch - 1)
# 对于每个epoch训练,args.epochs决定训练周期
for epoch_idx in range(start_epoch, args.epochs):
print('Epoch {}:'.format(epoch_idx))
# scheduler.step()是对lr进行调整
# 在scheduler的step_size表示scheduler.step()每调用step_size次,对应的学习率就会按照策略调整一次
lr_scheduler.step()
global_step = len(TrainImgLoader) * epoch_idx
# 对于每个batch数据进行训练
for batch_idx, sample in enumerate(TrainImgLoader):
# 计时
start_time = time.time()
# 计算当前总体step
global_step = len(TrainImgLoader) * epoch_idx + batch_idx
# 隔summary_freq做记录,summary_freq为正
# % 取模 - 返回除法的余数
# == 比较对象是否相等,相等返回ture
do_summary = global_step % args.summary_freq == 0
# train_sample(),输出训练中的信息(loss和图像信息)
loss, scalar_outputs, image_outputs = train_sample(sample, detailed_summary=do_summary)
# 记录损失
if do_summary:
save_scalars(logger, 'train', scalar_outputs, global_step)
save_images(logger, 'train', image_outputs, global_step)
del scalar_outputs, image_outputs
print(
'Epoch {}/{}, Iter {}/{}, train loss = {:.3f}, time = {:.3f}'.format(epoch_idx, args.epochs, batch_idx,
len(TrainImgLoader), loss,
time.time() - start_time))
# 每个epoch后训练完保存模型
# torch.save(state, dir)
# state可以用字典,保存参数
# 其中dir表示保存文件的绝对路径+保存文件名,如'/home/q/Desktop/modelpara.pth'
if (epoch_idx + 1) % args.save_freq == 0:
torch.save({
'epoch': epoch_idx,
'model': model.state_dict(),
'optimizer': optimizer.state_dict()},
"{}/model_{:0>6}.ckpt".format(args.logdir, epoch_idx))
# 每轮模型训练完进行验证
# 主要存储loss那些信息,方便计算均值输出到fulltest
avg_test_scalars = DictAverageMeter()
for batch_idx, sample in enumerate(TestImgLoader):
start_time = time.time()
global_step = len(TrainImgLoader) * epoch_idx + batch_idx
do_summary = global_step % args.summary_freq == 0
loss, scalar_outputs, image_outputs = test_sample(sample, detailed_summary=do_summary)
if do_summary:
save_scalars(logger, 'test', scalar_outputs, global_step)
save_images(logger, 'test', image_outputs, global_step)
avg_test_scalars.update(scalar_outputs)
del scalar_outputs, image_outputs
print('Epoch {}/{}, Iter {}/{}, test loss = {:.3f}, time = {:3f}'.format(epoch_idx, args.epochs, batch_idx,
len(TestImgLoader), loss,
time.time() - start_time))
save_scalars(logger, 'fulltest', avg_test_scalars.mean(), global_step)
print("avg_test_scalars:", avg_test_scalars.mean())
# gc.collect()
def test():
avg_test_scalars = DictAverageMeter()
for batch_idx, sample in enumerate(TestImgLoader):
start_time = time.time()
loss, scalar_outputs, image_outputs = test_sample(sample, detailed_summary=True)
avg_test_scalars.update(scalar_outputs)
del scalar_outputs, image_outputs
print('Iter {}/{}, test loss = {:.3f}, time = {:3f}'.format(batch_idx, len(TestImgLoader), loss,
time.time() - start_time))
if batch_idx % 100 == 0:
print("Iter {}/{}, test results = {}".format(batch_idx, len(TestImgLoader), avg_test_scalars.mean()))
print("final", avg_test_scalars)
def train_sample(sample, detailed_summary=False):
# train模式
model.train()
# 优化器梯度清零开始新一次的训练
optimizer.zero_grad()
# 将所有Tensor类型的变量使用cuda计算
sample_cuda = tocuda(sample)
# 真实的深度图
depth_gt = sample_cuda["depth"]
# mask用于将没有深度的地方筛除掉,不计算loss
mask = sample_cuda["mask"]
# 输入模型计算深度图
outputs = model(sample_cuda["imgs"], sample_cuda["proj_matrices"], sample_cuda["depth_values"])
# MVSNet得到的深度估计图
depth_est = outputs["depth"]
# 计算估计深度和真实深度之间的损失,mask用于选取有深度值的位置,只用这些位置的深度真值计算loss
loss = model_loss(depth_est, depth_gt, mask)
# 反向传播,计算当前梯度;
loss.backward()
# 根据梯度更新网络参数
optimizer.step()
# 保存训练得到的loss
scalar_outputs = {"loss": loss}
# depth_est * mask:深度图估计(滤除掉本来就没有深度的位置)
# depth_gt:深度图真值
# ref_img:要估计深度的ref图
# mask:mask图,0-1二值图,为1代表这里有深度值
image_outputs = {"depth_est": depth_est * mask, "depth_gt": sample["depth"],
"ref_img": sample["imgs"][:, 0],
"mask": sample["mask"]}
if detailed_summary:
# 预测图和真值图的绝对差值
image_outputs["errormap"] = (depth_est - depth_gt).abs() * mask
# 绝对平均深度误差:mean[abs(est - gt)]
scalar_outputs["abs_depth_error"] = AbsDepthError_metrics(depth_est, depth_gt, mask > 0.5)
# 整个场景深度估计误差大于2mm的值,mean[abs(est - gt) > threshold]
scalar_outputs["thres2mm_error"] = Thres_metrics(depth_est, depth_gt, mask > 0.5, 2)
scalar_outputs["thres4mm_error"] = Thres_metrics(depth_est, depth_gt, mask > 0.5, 4)
scalar_outputs["thres8mm_error"] = Thres_metrics(depth_est, depth_gt, mask > 0.5, 8)
return tensor2float(loss), tensor2float(scalar_outputs), image_outputs
其余代码超详细注释版详见下面的链接,PyTorch可运行,其中 temp.py 随机生成图像和内外参,可以快速测试代码并学习网络。
MVSNet代码超详细注释(PyTorch)https://download.csdn.net/download/qq_43307074/87416295?spm=1001.2014.3001.5503