源代码:Dynamic NeRF|generate_depth
主要是利用Midas模型来预测深度
主函数:创建参数解析------创建路径------GPU加速设置------计算depth
torch.backends.cudnn():加速GPU网络计算
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--dataset_path", type=str, help='Dataset path')
parser.add_argument('--model', help="restore midas checkpoint")
args = parser.parse_args()
input_path = os.path.join(args.dataset_path, 'images')
output_path = os.path.join(args.dataset_path, 'disp')
output_img_path = os.path.join(args.dataset_path, 'disp_png')
create_dir(output_path)
create_dir(output_img_path)
# set torch options
#加速网络训练
torch.backends.cudnn.enabled = True
torch.backends.cudnn.benchmark = True
# compute depth maps
run(input_path, output_path, output_img_path, args.model)
****Resize()****将图像调整为给定大小的图像,下面是它的一些参数的解释
"""
Args:
width (int): desired output width
height (int): desired output height
resize_target (bool, optional):
True: Resize the full sample (image, mask, target).
False: Resize image only.
Defaults to True.
keep_aspect_ratio (bool, optional):
True: Keep the aspect ratio of the input sample.
Output sample might not have the given width and height, and
resize behaviour depends on the parameter 'resize_method'.
Defaults to False.
ensure_multiple_of (int, optional):
Output width and height is constrained to be multiple of this parameter.
Defaults to 1.
resize_method (str, optional):
"lower_bound": Output will be at least as large as the given size.
"upper_bound": Output will be at max as large as the given size. (Output size might be smaller than given size.)
"minimal": Scale as least as possible. (Output size might be smaller than given size.)
Defaults to "lower_bound".
"""
加载网络-----加载输入图像-----利用模型进行计算------存储输出
torch.from_numpy()将numpy数组转换为tensor张量
.cpu().numpy() 表示将tensor转换为numpy
model.forward()
.unsqueeze(0)增加维度(0表示,在第一个位置增加维度)
torch.nn.functional.interpolate()上采样函数
归一化处理
def run(input_path, output_path, output_img_path, model_path):
"""Run MonoDepthNN to compute depth maps.
Args:
input_path (str): path to input folder
output_path (str): path to output folder
model_path (str): path to saved model
"""
print("initialize")
# select device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("device: %s" % device)
# load network
model = MidasNet(model_path, non_negative=True)
sh = cv2.imread(sorted(glob.glob(os.path.join(input_path, "*")))[0]).shape
net_w, net_h = sh[1], sh[0]
resize_mode="upper_bound"
transform = Compose(
[
Resize(
net_w,
net_h,
resize_target=None, #只改变图像的大小
keep_aspect_ratio=True,#保持输入样本的纵横比
ensure_multiple_of=32,#输出宽度和高度被限制为该参数的倍数
resize_method=resize_mode,#输出最大size与给定size一样大
image_interpolation_method=cv2.INTER_CUBIC,# INTER_CUBIC - 4x4像素邻域内的双立方插值
),
NormalizeImage(mean=[0.485, 0.456, 0.406],
std=[0.229, 0.224, 0.225]),#通过给定的均值和标准对图像进行归一化
PrepareForNet(),#准备用作网络输入的示例
]
)
model.eval()#使模型属于测试状态
model.to(device)#将模型传到GPU上
# get input
img_names = sorted(glob.glob(os.path.join(input_path, "*")))
num_images = len(img_names)
# create output folder
os.makedirs(output_path, exist_ok=True)
print("start processing")
for ind, img_name in enumerate(img_names):
print(" processing {} ({}/{})".format(img_name, ind + 1, num_images))
# input
img = read_image(img_name)
img_input = transform({"image": img})["image"]
# compute
with torch.no_grad():
sample = torch.from_numpy(img_input).to(device).unsqueeze(0)#numpy转tensor,并升1维
prediction = model.forward(sample)#前向传播预测
prediction = (
torch.nn.functional.interpolate(
prediction.unsqueeze(1),
size=[net_h, net_w],
mode="bicubic",#上采样算法
align_corners=False,
)
.squeeze()#降维
.cpu()
.numpy()
)
# output
filename = os.path.join(
output_path, os.path.splitext(os.path.basename(img_name))[0]
)
print(filename + '.npy')
np.save(filename + '.npy', prediction.astype(np.float32))
depth_min = prediction.min()
depth_max = prediction.max()
max_val = (2**(8*2))-1
#归一化处理
if depth_max - depth_min > np.finfo("float").eps:
out = max_val * (prediction - depth_min) / (depth_max - depth_min)
else:
out = np.zeros(prediction.shape, dtype=prediction.type)
cv2.imwrite(os.path.join(output_img_path, os.path.splitext(os.path.basename(img_name))[0] + '.png'), out.astype("uint16"))
读取图像
主要是把image从BGR(0,255)格式转换为(0,1)格式的
cv2.imread() 图片读出来的格式是BGR,范围(0,255)
cv2.cvtColor()转换图像格式
def read_image(path):
"""Read image and output RGB image (0-1).
Args:
path (str): path to file
Returns:
array: RGB image (0-1)
"""
img = cv2.imread(path)
# 是否为灰度图
if img.ndim == 2:
img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) / 255.0
return img