原始链接
GitHub - fangchangma/sparse-to-dense.pytorch: ICRA 2018 "Sparse-to-Dense: Depth Prediction from Sparse Depth Samples and a Single Image" (PyTorch Implementation)
第一列是场景原图,第二列是稀疏数据,第三列是稠密数据,第四列是该模型的预测结果
该模型有三种训练模式:1.第一列RGB作为图像输入,第三列作为标签;2.第一列和第二列合并为4通道数据作为图像输入,第三列作为标签;3.第二列作为输入,第三列作为标签。
其中第二列是通过第三列采样得到。采样方式有两种,在dataloaders/dense_to_sparse.py脚本中,如果要跑自己的数据集那么需要准备的是第一列和第三列数据。
这个数据有30G左右,比较大,我下载了,网盘链接链接:https://pan.baidu.com/s/1SzQhDVZBJSy9gnMkr4UCMQ
提取码:tpql
--来自百度网盘超级会员V6的分享
解压后如下:
这里的h5文件其实就是数据的一种存储形式而已,内部结构和字典类似,在代码里有加载的函数(dataloaders/dataloader.py脚本中的h5_loader函数),包含了rgb和对应的depth数据。
该项目通过设置evaluate模式来做评价,下载好数据、模型后,直接创建数据文件夹data放解压的数据即可。然后命令行输入python main.py --evaluate model_best.pth会自动创建结果文件夹results,以及产生一个拼接的长图comparison_7.png
这个需要得到和val相同的效果,并且批量跑的情况下,需要在val的基础上改,改的地方比较多,下面我把改了的脚本都贴出来。
(1)数据加载部分
nyu_dataloader.py
import numpy as np
import dataloaders.transforms as transforms
from dataloaders.dataloader import MyDataloader
iheight, iwidth = 480, 640 # raw image size
class NYUDataset(MyDataloader):
def __init__(self, root, type, sparsifier=None, modality='rgb'):
super(NYUDataset, self).__init__(root, type, sparsifier, modality)
self.output_size = (228, 304)
def train_transform(self, rgb, depth):
s = np.random.uniform(1.0, 1.5) # random scaling
depth_np = depth / s
angle = np.random.uniform(-5.0, 5.0) # random rotation degrees
do_flip = np.random.uniform(0.0, 1.0) < 0.5 # random horizontal flip
# perform 1st step of data augmentation
transform = transforms.Compose([
transforms.Resize(250.0 / iheight), # this is for computational efficiency, since rotation can be slow
transforms.Rotate(angle),
transforms.Resize(s),
transforms.CenterCrop(self.output_size),
transforms.HorizontalFlip(do_flip)
])
rgb_np = transform(rgb)
rgb_np = self.color_jitter(rgb_np) # random color jittering
rgb_np = np.asfarray(rgb_np, dtype='float') / 255
depth_np = transform(depth_np)
return rgb_np, depth_np
def val_transform(self, rgb, depth):
depth_np = depth
transform = transforms.Compose([
transforms.Resize(240.0 / iheight),
transforms.CenterCrop(self.output_size),
])
rgb_np = transform(rgb)
rgb_np = np.asfarray(rgb_np, dtype='float') / 255
depth_np = transform(depth_np)
return rgb_np, depth_np
def test_transform(self, rgb, depth):
depth_np = depth
transform = transforms.Compose([
transforms.Resize(240.0 / iheight),
transforms.CenterCrop(self.output_size),
])
rgb_np = transform(rgb)
rgb_np = np.asfarray(rgb_np, dtype='float') / 255
depth_np = transform(depth_np)
return rgb_np, depth_np
dataloader.py
import os
import os.path
import numpy as np
import torch.utils.data as data
import h5py
import dataloaders.transforms as transforms
IMG_EXTENSIONS = ['.h5',]
def is_image_file(filename):
return any(filename.endswith(extension) for extension in IMG_EXTENSIONS)
def find_classes(dir):
classes = [d for d in os.listdir(dir) if os.path.isdir(os.path.join(dir, d))]
classes.sort()
class_to_idx = {classes[i]: i for i in range(len(classes))}
return classes, class_to_idx
def make_dataset(dir, class_to_idx):
images = []
dir = os.path.expanduser(dir)
for target in sorted(os.listdir(dir)):
d = os.path.join(dir, target)
if not os.path.isdir(d):
continue
for root, _, fnames in sorted(os.walk(d)):
for fname in sorted(fnames):
if is_image_file(fname):
path = os.path.join(root, fname)
item = (path, class_to_idx[target])
images.append(item)
return images
def h5_loader(path):
h5f = h5py.File(path, "r")
rgb = np.array(h5f['rgb'])
rgb = np.transpose(rgb, (1, 2, 0))
depth = np.array(h5f['depth'])
return rgb, depth
# def rgb2grayscale(rgb):
# return rgb[:,:,0] * 0.2989 + rgb[:,:,1] * 0.587 + rgb[:,:,2] * 0.114
to_tensor = transforms.ToTensor()
class MyDataloader(data.Dataset):
modality_names = ['rgb', 'rgbd', 'd'] # , 'g', 'gd'
color_jitter = transforms.ColorJitter(0.4, 0.4, 0.4)
def __init__(self, root, type, sparsifier=None, modality='rgb', loader=h5_loader):
classes, class_to_idx = find_classes(root)
imgs = make_dataset(root, class_to_idx)
assert len(imgs)>0, "Found 0 images in subfolders of: " + root + "\n"
print("Found {} images in {} folder.".format(len(imgs), type))
self.root = root
self.imgs = imgs
self.classes = classes
self.class_to_idx = class_to_idx
if type == 'train':
self.transform = self.train_transform
elif type == 'val':
self.transform = self.val_transform
elif type == 'test':
self.transform = self.test_transform
else:
raise (RuntimeError("Invalid dataset type: " + type + "\n"
"Supported dataset types are: train, val"))
self.loader = loader
self.sparsifier = sparsifier
assert (modality in self.modality_names), "Invalid modality type: " + modality + "\n" + \
"Supported dataset types are: " + ''.join(self.modality_names)
self.modality = modality
self.mark = type
def train_transform(self, rgb, depth):
raise (RuntimeError("train_transform() is not implemented. "))
def val_transform(rgb, depth):
raise (RuntimeError("val_transform() is not implemented."))
def test_transform(rgb, depth):
raise (RuntimeError("test_transform() is not implemented."))
def create_sparse_depth(self, rgb, depth):
if self.sparsifier is None:
return depth
else:
mask_keep = self.sparsifier.dense_to_sparse(rgb, depth)
sparse_depth = np.zeros(depth.shape)
sparse_depth[mask_keep] = depth[mask_keep]
return sparse_depth
def create_rgbd(self, rgb, depth):
sparse_depth = self.create_sparse_depth(rgb, depth)
rgbd = np.append(rgb, np.expand_dims(sparse_depth, axis=2), axis=2)
return rgbd
def __getraw__(self, index):
"""
Args:
index (int): Index
Returns:
tuple: (rgb, depth) the raw data.
"""
path, target = self.imgs[index]
rgb, depth = self.loader(path)
_, name = os.path.split(path)
name = name.split('.')[0]
return rgb, depth, name
def __getitem__(self, index):
rgb, depth, name = self.__getraw__(index)
if self.transform is not None:
rgb_np, depth_np = self.transform(rgb, depth)
else:
raise(RuntimeError("transform not defined"))
# color normalization
# rgb_tensor = normalize_rgb(rgb_tensor)
# rgb_np = normalize_np(rgb_np)
if self.modality == 'rgb':
input_np = rgb_np
elif self.modality == 'rgbd':
input_np = self.create_rgbd(rgb_np, depth_np)
elif self.modality == 'd':
input_np = self.create_sparse_depth(rgb_np, depth_np)
input_tensor = to_tensor(input_np)
while input_tensor.dim() < 3:
input_tensor = input_tensor.unsqueeze(0)
if self.mark == 'test':
depth_tensor = name
else:
depth_tensor = to_tensor(depth_np)
depth_tensor = depth_tensor.unsqueeze(0)
return input_tensor, depth_tensor
def __len__(self):
return len(self.imgs)
2.参数部分
util.py
import os
import cv2
import torch
import shutil
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
cmap = plt.cm.viridis
def parse_command():
model_names = ['resnet18', 'resnet50']
loss_names = ['l1', 'l2']
data_names = ['nyudepthv2', 'kitti']
from dataloaders.dense_to_sparse import UniformSampling, SimulatedStereo
sparsifier_names = [x.name for x in [UniformSampling, SimulatedStereo]]
from models import Decoder
decoder_names = Decoder.names
from dataloaders.dataloader import MyDataloader
modality_names = MyDataloader.modality_names
import argparse
parser = argparse.ArgumentParser(description='Sparse-to-Dense')
parser.add_argument('--arch', '-a', metavar='ARCH', default='resnet18', choices=model_names,
help='model architecture: ' + ' | '.join(model_names) + ' (default: resnet18)')
parser.add_argument('--data', metavar='DATA', default='nyudepthv2',
choices=data_names,
help='dataset: ' + ' | '.join(data_names) + ' (default: nyudepthv2)')
parser.add_argument('--modality', '-m', metavar='MODALITY', default='rgb', choices=modality_names,
help='modality: ' + ' | '.join(modality_names) + ' (default: rgb)')
parser.add_argument('-s', '--num-samples', default=0, type=int, metavar='N',
help='number of sparse depth samples (default: 0)')
parser.add_argument('--max-depth', default=-1.0, type=float, metavar='D',
help='cut-off depth of sparsifier, negative values means infinity (default: inf [m])')
parser.add_argument('--sparsifier', metavar='SPARSIFIER', default=UniformSampling.name, choices=sparsifier_names,
help='sparsifier: ' + ' | '.join(sparsifier_names) + ' (default: ' + UniformSampling.name + ')')
parser.add_argument('--decoder', '-d', metavar='DECODER', default='deconv2', choices=decoder_names,
help='decoder: ' + ' | '.join(decoder_names) + ' (default: deconv2)')
parser.add_argument('-j', '--workers', default=0, type=int, metavar='N',
help='number of data loading workers (default: 10)')
parser.add_argument('--epochs', default=15, type=int, metavar='N',
help='number of total epochs to run (default: 15)')
parser.add_argument('-c', '--criterion', metavar='LOSS', default='l1', choices=loss_names,
help='loss function: ' + ' | '.join(loss_names) + ' (default: l1)')
parser.add_argument('-b', '--batch-size', default=2, type=int, help='mini-batch size (default: 8)')
parser.add_argument('--lr', '--learning-rate', default=0.01, type=float,
metavar='LR', help='initial learning rate (default 0.01)')
parser.add_argument('--momentum', default=0.9, type=float, metavar='M',
help='momentum')
parser.add_argument('--weight-decay', '--wd', default=1e-4, type=float,
metavar='W', help='weight decay (default: 1e-4)')
parser.add_argument('--print-freq', '-p', default=10, type=int,
metavar='N', help='print frequency (default: 10)')
parser.add_argument('--resume', default='', type=str, metavar='PATH',
help='path to latest checkpoint (default: none)')
parser.add_argument('-e', '--evaluate', dest='evaluate', type=str, default='',
help='evaluate model on validation set')
parser.add_argument('-t', '--test', dest='test', type=str, default='',
help='test model on test set')
parser.add_argument('--no-pretrain', dest='pretrained', action='store_false',
help='not to use ImageNet pre-trained weights')
parser.set_defaults(pretrained=True)
args = parser.parse_args()
if args.modality == 'rgb' and args.num_samples != 0:
print("number of samples is forced to be 0 when input modality is rgb")
args.num_samples = 0
if args.modality == 'rgb' and args.max_depth != 0.0:
print("max depth is forced to be 0.0 when input modality is rgb/rgbd")
args.max_depth = 0.0
return args
def save_checkpoint(state, is_best, epoch, output_directory):
checkpoint_filename = os.path.join(output_directory, 'checkpoint-' + str(epoch) + '.pth.tar')
torch.save(state, checkpoint_filename)
if is_best:
best_filename = os.path.join(output_directory, 'model_best.pth.tar')
shutil.copyfile(checkpoint_filename, best_filename)
if epoch > 0:
prev_checkpoint_filename = os.path.join(output_directory, 'checkpoint-' + str(epoch-1) + '.pth.tar')
if os.path.exists(prev_checkpoint_filename):
os.remove(prev_checkpoint_filename)
def adjust_learning_rate(optimizer, epoch, lr_init):
"""Sets the learning rate to the initial LR decayed by 10 every 5 epochs"""
lr = lr_init * (0.1 ** (epoch // 5))
for param_group in optimizer.param_groups:
param_group['lr'] = lr
def get_output_directory(args):
output_directory = os.path.join('results',
'{}.sparsifier={}.samples={}.modality={}.arch={}.decoder={}.criterion={}.lr={}.bs={}.pretrained={}'.
format(args.data, args.sparsifier, args.num_samples, args.modality, \
args.arch, args.decoder, args.criterion, args.lr, args.batch_size, \
args.pretrained))
return output_directory
def colored_depthmap(depth, d_min=None, d_max=None):
if d_min is None:
d_min = np.min(depth)
if d_max is None:
d_max = np.max(depth)
depth_relative = (depth - d_min) / (d_max - d_min)
return 255 * cmap(depth_relative)[:,:,:3] # H, W, C
def merge_into_row(input, depth_target, depth_pred):
rgb = 255 * np.transpose(np.squeeze(input.cpu().numpy()), (1,2,0)) # H, W, C
depth_target_cpu = np.squeeze(depth_target.cpu().numpy())
depth_pred_cpu = np.squeeze(depth_pred.data.cpu().numpy())
d_min = min(np.min(depth_target_cpu), np.min(depth_pred_cpu))
d_max = max(np.max(depth_target_cpu), np.max(depth_pred_cpu))
depth_target_col = colored_depthmap(depth_target_cpu, d_min, d_max)
depth_pred_col = colored_depthmap(depth_pred_cpu, d_min, d_max)
img_merge = np.hstack([rgb, depth_target_col, depth_pred_col])
return img_merge
def merge_into_row_with_gt(input, depth_input, depth_target, depth_pred):
rgb = 255 * np.transpose(np.squeeze(input.cpu().numpy()), (1,2,0)) # H, W, C
depth_input_cpu = np.squeeze(depth_input.cpu().numpy())
depth_target_cpu = np.squeeze(depth_target.cpu().numpy())
depth_pred_cpu = np.squeeze(depth_pred.data.cpu().numpy())
d_min = min(np.min(depth_input_cpu), np.min(depth_target_cpu), np.min(depth_pred_cpu))
d_max = max(np.max(depth_input_cpu), np.max(depth_target_cpu), np.max(depth_pred_cpu))
depth_input_col = colored_depthmap(depth_input_cpu, d_min, d_max)
depth_target_col = colored_depthmap(depth_target_cpu, d_min, d_max)
depth_pred_col = colored_depthmap(depth_pred_cpu, d_min, d_max)
img_merge = np.hstack([rgb, depth_input_col, depth_target_col, depth_pred_col])
return img_merge
def add_row(img_merge, row):
return np.vstack([img_merge, row])
def save_image(img_merge, filename):
img_merge = Image.fromarray(img_merge.astype('uint8'))
img_merge.save(filename)
def strentch_img(pred):
depth_pred_cpu = np.squeeze(pred.data.cpu().numpy())
d_min = np.min(depth_pred_cpu)
d_max = np.max(depth_pred_cpu)
depth_pred_cpu = cv2.resize(depth_pred_cpu, (1280, 720))
depth_pred_col = colored_depthmap(depth_pred_cpu, d_min, d_max)
return depth_pred_col
(3)主函数部分
main.py
import os
import time
import csv
import numpy as np
import torch
import torch.backends.cudnn as cudnn
import torch.optim
cudnn.benchmark = True
from models import ResNet
from metrics import AverageMeter, Result
from dataloaders.dense_to_sparse import UniformSampling, SimulatedStereo
import criteria
import utils
from PIL import Image
torch.nn.Module.dump_patches = True
args = utils.parse_command()
print(args)
fieldnames = ['mse', 'rmse', 'absrel', 'lg10', 'mae',
'delta1', 'delta2', 'delta3',
'data_time', 'gpu_time']
best_result = Result()
best_result.set_to_worst()
def create_data_loaders(args):
# Data loading code
print("=> creating data loaders ...")
traindir = os.path.join('data', args.data, 'train')
valdir = os.path.join('data', args.data, 'val')
testdir = os.path.join('data', args.data, 'test')
train_loader = None
val_loader = None
test_loader = None
# sparsifier is a class for generating random sparse depth input from the ground truth
sparsifier = None
max_depth = args.max_depth if args.max_depth >= 0.0 else np.inf
if args.sparsifier == UniformSampling.name:
sparsifier = UniformSampling(num_samples=args.num_samples, max_depth=max_depth)
elif args.sparsifier == SimulatedStereo.name:
sparsifier = SimulatedStereo(num_samples=args.num_samples, max_depth=max_depth)
if args.data == 'nyudepthv2':
from dataloaders.nyu_dataloader import NYUDataset
if args.evaluate:
val_dataset = NYUDataset(valdir, type='val',
modality=args.modality, sparsifier=sparsifier)
# set batch size to be 1 for validation
val_loader = torch.utils.data.DataLoader(val_dataset,
batch_size=1, shuffle=False, num_workers=args.workers,
pin_memory=True)
elif args.test:
test_dataset = NYUDataset(testdir, type='test',
modality=args.modality, sparsifier=sparsifier)
test_loader = torch.utils.data.DataLoader(test_dataset,
batch_size=1, shuffle=False, num_workers=args.workers,
pin_memory=True)
else:
train_dataset = NYUDataset(traindir, type='train',
modality=args.modality, sparsifier=sparsifier)
elif args.data == 'kitti':
from dataloaders.kitti_dataloader import KITTIDataset
if not args.evaluate:
train_dataset = KITTIDataset(traindir, type='train',
modality=args.modality, sparsifier=sparsifier)
val_dataset = KITTIDataset(valdir, type='val',
modality=args.modality, sparsifier=sparsifier)
# set batch size to be 1 for validation
val_loader = torch.utils.data.DataLoader(val_dataset,
batch_size=1, shuffle=False, num_workers=args.workers,
pin_memory=True)
else:
raise RuntimeError('Dataset not found.' +
'The dataset must be either of nyudepthv2 or kitti.')
# put construction of train loader here, for those who are interested in testing only
if not args.evaluate and not args.test:
train_loader = torch.utils.data.DataLoader(
train_dataset, batch_size=args.batch_size, shuffle=True,
num_workers=args.workers, pin_memory=True, sampler=None,
worker_init_fn=lambda work_id:np.random.seed(work_id))
# worker_init_fn ensures different sampling patterns for each data loading thread
print("=> data loaders created.")
return train_loader, val_loader, test_loader
test_save_path = './results/'
def main():
global args, best_result, output_directory, train_csv, test_csv
# evaluation mode
start_epoch = 0
if args.evaluate:
assert os.path.isfile(args.evaluate), \
"=> no best model found at '{}'".format(args.evaluate)
print("=> loading best model '{}'".format(args.evaluate))
checkpoint = torch.load(args.evaluate)
output_directory = os.path.dirname(args.evaluate)
args = checkpoint['args']
start_epoch = checkpoint['epoch'] + 1
best_result = checkpoint['best_result']
model = checkpoint['model']
print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
args.test = ''
args.evaluate = True
_, val_loader, _ = create_data_loaders(args)
validate(val_loader, model, checkpoint['epoch'], write_to_file=False)
return
elif args.test:
assert os.path.isfile(args.test), \
"=> no best model found at '{}'".format(args.test)
print("=> loading best model '{}'".format(args.test))
checkpoint = torch.load(args.test)
output_directory = os.path.dirname(args.test)
args = checkpoint['args']
start_epoch = checkpoint['epoch'] + 1
best_result = checkpoint['best_result']
model = checkpoint['model']
print("=> loaded best model (epoch {})".format(checkpoint['epoch']))
args.test = True
_, _, test_loader = create_data_loaders(args)
test(test_loader, model, test_save_path)
return
# optionally resume from a checkpoint
elif args.resume:
chkpt_path = args.resume
assert os.path.isfile(chkpt_path), \
"=> no checkpoint found at '{}'".format(chkpt_path)
print("=> loading checkpoint '{}'".format(chkpt_path))
checkpoint = torch.load(chkpt_path)
args = checkpoint['args']
start_epoch = checkpoint['epoch'] + 1
best_result = checkpoint['best_result']
model = checkpoint['model']
optimizer = checkpoint['optimizer']
output_directory = os.path.dirname(os.path.abspath(chkpt_path))
print("=> loaded checkpoint (epoch {})".format(checkpoint['epoch']))
train_loader, val_loader, test_loader = create_data_loaders(args)
args.resume = True
# create new model
else:
train_loader, val_loader, test_loader = create_data_loaders(args)
print("=> creating Model ({}-{}) ...".format(args.arch, args.decoder))
in_channels = len(args.modality)
if args.arch == 'resnet50':
model = ResNet(layers=50, decoder=args.decoder, output_size=train_loader.dataset.output_size,
in_channels=in_channels, pretrained=args.pretrained)
elif args.arch == 'resnet18':
model = ResNet(layers=18, decoder=args.decoder, output_size=train_loader.dataset.output_size,
in_channels=in_channels, pretrained=args.pretrained)
print("=> model created.")
optimizer = torch.optim.SGD(model.parameters(), args.lr, \
momentum=args.momentum, weight_decay=args.weight_decay)
# model = torch.nn.DataParallel(model).cuda() # for multi-gpu training
model = model.cuda()
# define loss function (criterion) and optimizer
if args.criterion == 'l2':
criterion = criteria.MaskedMSELoss().cuda()
elif args.criterion == 'l1':
criterion = criteria.MaskedL1Loss().cuda()
# create results folder, if not already exists
output_directory = utils.get_output_directory(args)
if not os.path.exists(output_directory):
os.makedirs(output_directory)
train_csv = os.path.join(output_directory, 'train.csv')
test_csv = os.path.join(output_directory, 'test.csv')
best_txt = os.path.join(output_directory, 'best.txt')
# create new csv files with only header
if not args.resume:
with open(train_csv, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
with open(test_csv, 'w') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for epoch in range(start_epoch, args.epochs):
utils.adjust_learning_rate(optimizer, epoch, args.lr)
train(train_loader, model, criterion, optimizer, epoch) # train for one epoch
result, img_merge = validate(val_loader, model, epoch) # evaluate on validation set
# remember best rmse and save checkpoint
is_best = result.rmse < best_result.rmse
if is_best:
best_result = result
with open(best_txt, 'w') as txtfile:
txtfile.write("epoch={}\nmse={:.3f}\nrmse={:.3f}\nabsrel={:.3f}\nlg10={:.3f}\nmae={:.3f}\ndelta1={:.3f}\nt_gpu={:.4f}\n".
format(epoch, result.mse, result.rmse, result.absrel, result.lg10, result.mae, result.delta1, result.gpu_time))
if img_merge is not None:
img_filename = output_directory + '/comparison_best.png'
utils.save_image(img_merge, img_filename)
utils.save_checkpoint({
'args': args,
'epoch': epoch,
'arch': args.arch,
'model': model,
'best_result': best_result,
'optimizer' : optimizer,
}, is_best, epoch, output_directory)
def train(train_loader, model, criterion, optimizer, epoch):
average_meter = AverageMeter()
model.train() # switch to train mode
end = time.time()
for i, (input, target) in enumerate(train_loader):
input, target = input.cuda(), target.cuda()
torch.cuda.synchronize()
data_time = time.time() - end
# compute pred
end = time.time()
pred = model(input)
loss = criterion(pred, target)
optimizer.zero_grad()
loss.backward() # compute gradient and do SGD step
optimizer.step()
torch.cuda.synchronize()
gpu_time = time.time() - end
# measure accuracy and record loss
result = Result()
result.evaluate(pred.data, target.data)
average_meter.update(result, gpu_time, data_time, input.size(0))
end = time.time()
if (i + 1) % args.print_freq == 0:
print('=> output: {}'.format(output_directory))
print('Train Epoch: {0} [{1}/{2}]\t'
't_Data={data_time:.3f}({average.data_time:.3f}) '
't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
'MAE={result.mae:.2f}({average.mae:.2f}) '
'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
'REL={result.absrel:.3f}({average.absrel:.3f}) '
'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format(
epoch, i+1, len(train_loader), data_time=data_time,
gpu_time=gpu_time, result=result, average=average_meter.average()))
avg = average_meter.average()
with open(train_csv, 'a') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writerow({'mse': avg.mse, 'rmse': avg.rmse, 'absrel': avg.absrel, 'lg10': avg.lg10,
'mae': avg.mae, 'delta1': avg.delta1, 'delta2': avg.delta2, 'delta3': avg.delta3,
'gpu_time': avg.gpu_time, 'data_time': avg.data_time})
def validate(val_loader, model, epoch, write_to_file=True):
average_meter = AverageMeter()
model.eval() # switch to evaluate mode
end = time.time()
for i, (input, target) in enumerate(val_loader):
input, target = input.cuda(), target.cuda()
torch.cuda.synchronize()
data_time = time.time() - end
# compute output
end = time.time()
with torch.no_grad():
pred = model(input)
torch.cuda.synchronize()
gpu_time = time.time() - end
# measure accuracy and record loss
result = Result()
result.evaluate(pred.data, target.data)
average_meter.update(result, gpu_time, data_time, input.size(0))
end = time.time()
# save 8 images for visualization
skip = 50
if args.modality == 'd':
img_merge = None
else:
if args.modality == 'rgb':
rgb = input
elif args.modality == 'rgbd':
rgb = input[:,:3,:,:]
depth = input[:,3:,:,:]
if i == 0:
if args.modality == 'rgbd':
img_merge = utils.merge_into_row_with_gt(rgb, depth, target, pred)
else:
img_merge = utils.merge_into_row(rgb, target, pred)
elif (i < 8*skip) and (i % skip == 0):
if args.modality == 'rgbd':
row = utils.merge_into_row_with_gt(rgb, depth, target, pred)
else:
row = utils.merge_into_row(rgb, target, pred)
img_merge = utils.add_row(img_merge, row)
elif i == 8*skip:
filename = output_directory + '/comparison_' + str(epoch) + '.png'
utils.save_image(img_merge, filename)
if (i+1) % args.print_freq == 0:
print('Test: [{0}/{1}]\t'
't_GPU={gpu_time:.3f}({average.gpu_time:.3f})\n\t'
'RMSE={result.rmse:.2f}({average.rmse:.2f}) '
'MAE={result.mae:.2f}({average.mae:.2f}) '
'Delta1={result.delta1:.3f}({average.delta1:.3f}) '
'REL={result.absrel:.3f}({average.absrel:.3f}) '
'Lg10={result.lg10:.3f}({average.lg10:.3f}) '.format(
i+1, len(val_loader), gpu_time=gpu_time, result=result, average=average_meter.average()))
avg = average_meter.average()
print('\n*\n'
'RMSE={average.rmse:.3f}\n'
'MAE={average.mae:.3f}\n'
'Delta1={average.delta1:.3f}\n'
'REL={average.absrel:.3f}\n'
'Lg10={average.lg10:.3f}\n'
't_GPU={time:.3f}\n'.format(
average=avg, time=avg.gpu_time))
if write_to_file:
with open(test_csv, 'a') as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writerow({'mse': avg.mse, 'rmse': avg.rmse, 'absrel': avg.absrel, 'lg10': avg.lg10,
'mae': avg.mae, 'delta1': avg.delta1, 'delta2': avg.delta2, 'delta3': avg.delta3,
'data_time': avg.data_time, 'gpu_time': avg.gpu_time})
return avg, img_merge
def test(test_loader, model, save_path):
average_meter = AverageMeter()
model.eval() # switch to evaluate mode
for i, (input, target) in enumerate(test_loader):
input, name = input.cuda(), target
torch.cuda.synchronize()
# compute output
end = time.time()
with torch.no_grad():
pred = model(input)
torch.cuda.synchronize()
pred1 = utils.strentch_img(pred)
save_to_file = os.path.join(save_path, name[0] + '.png')
utils.save_image(pred1, save_to_file)
save_to_tif = os.path.join(save_path, name[0] + '_ori.tiff')
depth_pred_cpu = np.squeeze(pred.data.cpu().numpy())
img = Image.fromarray(depth_pred_cpu)
img = img.resize((1280, 720))
img.save(save_to_tif)
if __name__ == '__main__':
main()
(4)测试
改好上面后,创建test文件夹,放入数据
接着命令行输入下面的命令
python main.py --test model_best.pth
白色的图是结果,彩色图是白色图可视化后的结果 ,存放位置在mian.py的第90行改(test_save_path = './results/')