1先看一下readme
一些来源如下:
[Clément Godard](http://www0.cs.ucl.ac.uk/staff/C.Godard/), [Oisin Mac Aodha](http://vision.caltech.edu/~macaodha/), [Michael Firman](http://www.michaelfirman.co.uk) and [Gabriel J. Brostow](http://www0.cs.ucl.ac.uk/staff/g.brostow/)
> [ICCV 2019 (arXiv pdf)](https://arxiv.org/abs/1806.01260)
本机环境:ubuntu20.04,pytorch=1.7.1,torchvision=0.8.0a0,cv2=3.4.2,cuda.avaliable()=0
##预测但张图像的深度,test_simple.py
就这三部分,县来看parse_args函数。
import argparse #导入argparse模块
def parse_args():#定义函数
parser = argparse.ArgumentParser(
description='Simple testing funtion for Monodepthv2 models.')
#创建ArgmentParser对象,该对象包含将命令行解析成python数据类型的一些信息。
parser.add_argument('--image_path', type=str,
help='path to a test image or folder of images', required=True)
#调用add_argument()方法添加参数,调用将指定argumenparser如何获取命令行字符并将其转换为对象。
parser.add_argument('--model_name', type=str,
help='name of a pretrained model to use',
choices=[
"mono_640x192",
"stereo_640x192",
"mono+stereo_640x192",
"mono_no_pt_640x192",
"stereo_no_pt_640x192",
"mono+stereo_no_pt_640x192",
"mono_1024x320",
"stereo_1024x320",
"mono+stereo_1024x320"])
parser.add_argument('--ext', type=str,
help='image extension to search for in folder', default="jpg")
parser.add_argument("--no_cuda",
help='if set, disables CUDA',
action='store_true')
parser.add_argument("--pred_metric_depth",
help='if set, predicts metric depth instead of disparity. (This only '
'makes sense for stereo-trained KITTI models).',
action='store_true')
return parser.parse_args()
#将检查命令行,把每个参数转换为适当的类型然后调用相应的操作
更多信息见argparse --- 命令行选项、参数和子命令解析器 — Python 3.7.13 文档
在来看test_simple函数
import xxx
def test_simple(args):
"""Function to predict for a single image or folder of images
"""
assert args.model_name is not None, \
"You must specify the --model_name parameter; see README.md for an example"
#看看运行时有没有在命令行指定用哪个模型
if torch.cuda.is_available() and not args.no_cuda:
device = torch.device("cuda")
else:
device = torch.device("cpu")
#如果cuda可用,后续会将数据放到cpu中
if args.pred_metric_depth and "stereo" not in args.model_name:
print("Warning: The --pred_metric_depth flag only makes sense for stereo-trained KITTI "
"models. For mono-trained models, output depths will not in metric space.")
#如果要输出metric depth 那么必须用用立体图像对训练的模型。
download_model_if_doesnt_exist(args.model_name)#从utils里面定义的函数,如果没有模型,就下下载一个。
model_path = os.path.join("models", args.model_name)#os.path.join()可以将文件名合成一个路径。
print("-> Loading model from ", model_path)
encoder_path = os.path.join(model_path, "encoder.pth")
depth_decoder_path = os.path.join(model_path, "depth.pth")
# LOADING PRETRAINED MODEL
print(" Loading pretrained encoder")
encoder = networks.ResnetEncoder(18, False)#在搭encoder,原代码在networks文件夹里
loaded_dict_enc = torch.load(encoder_path, map_location=device)#加载模型文件,这里其实是加载encoder的参数,map_location适用于修改模型能在gpu上运行还是在cpu上运行
# extract the height and width of image that this model was trained with
feed_height = loaded_dict_enc['height']
feed_width = loaded_dict_enc['width']#模型中可能保留了训练图像的高宽等参数
filtered_dict_enc = {k: v for k, v in loaded_dict_enc.items() if k in encoder.state_dict()}
encoder.load_state_dict(filtered_dict_enc)
encoder.to(device)
encoder.eval()
#module.state_dict()返回关于模型整个state的字典(dict),包括参数和永久保存的缓冲(例如运行时的平均值),关键词是对应的参数和缓冲名字,被设为None的参数和buffers不包括在内。
#而parameters是被optimizer更新的,buffers不被更新
#load_state_dict(state_dict,strict=True)将parameters和buffers完全copy到模型中。
print(" Loading pretrained decoder")
depth_decoder = networks.DepthDecoder(
num_ch_enc=encoder.num_ch_enc, scales=range(4))
#um_ch_enc:num of channel of encoder
loaded_dict = torch.load(depth_decoder_path, map_location=device)
depth_decoder.load_state_dict(loaded_dict)
depth_decoder.to(device)
depth_decoder.eval()
# FINDING INPUT IMAGES
if os.path.isfile(args.image_path):
# Only testing on a single image
paths = [args.image_path]
output_directory = os.path.dirname(args.image_path)
elif os.path.isdir(args.image_path):
# Searching folder for images
paths = glob.glob(os.path.join(args.image_path, '*.{}'.format(args.ext)))
output_directory = args.image_path
else:
raise Exception("Can not find args.image_path: {}".format(args.image_path))
print("-> Predicting on {:d} test images".format(len(paths)))
# PREDICTING ON EACH IMAGE IN TURN
with torch.no_grad():
for idx, image_path in enumerate(paths):
if image_path.endswith("_disp.jpg"):
# don't try to predict disparity for a disparity image!
continue
# Load image and preprocess
input_image = pil.open(image_path).convert('RGB')
original_width, original_height = input_image.size
input_image = input_image.resize((feed_width, feed_height), pil.LANCZOS)
input_image = transforms.ToTensor()(input_image).unsqueeze(0)
# PREDICTION
input_image = input_image.to(device)
features = encoder(input_image)
outputs = depth_decoder(features)
disp = outputs[("disp", 0)]
disp_resized = torch.nn.functional.interpolate(
disp, (original_height, original_width), mode="bilinear", align_corners=False)
# Saving numpy file
output_name = os.path.splitext(os.path.basename(image_path))[0]
scaled_disp, depth = disp_to_depth(disp, 0.1, 100)
if args.pred_metric_depth:
name_dest_npy = os.path.join(output_directory, "{}_depth.npy".format(output_name))
metric_depth = STEREO_SCALE_FACTOR * depth.cpu().numpy()
np.save(name_dest_npy, metric_depth)
else:
name_dest_npy = os.path.join(output_directory, "{}_disp.npy".format(output_name))
np.save(name_dest_npy, scaled_disp.cpu().numpy())
# Saving colormapped depth image
disp_resized_np = disp_resized.squeeze().cpu().numpy()
vmax = np.percentile(disp_resized_np, 95)
normalizer = mpl.colors.Normalize(vmin=disp_resized_np.min(), vmax=vmax)
mapper = cm.ScalarMappable(norm=normalizer, cmap='magma')
colormapped_im = (mapper.to_rgba(disp_resized_np)[:, :, :3] * 255).astype(np.uint8)
im = pil.fromarray(colormapped_im)
name_dest_im = os.path.join(output_directory, "{}_disp.jpeg".format(output_name))
im.save(name_dest_im)
print(" Processed {:d} of {:d} images - saved predictions to:".format(
idx + 1, len(paths)))
print(" - {}".format(name_dest_im))
print(" - {}".format(name_dest_npy))
print('-> Done!')
以上是关于测试部分的代码