MiddleBury与SceneFlow 数据集相机参数与pfm文件解析

MiddleBury相机参数(calib.txt)

cam0=[1758.23 0 953.34; 0 1758.23 552.29; 0 0 1]
cam1=[1758.23 0 953.34; 0 1758.23 552.29; 0 0 1]
doffs=0
baseline=111.53
width=1920
height=1080
ndisp=290
isint=0
vmin=75
vmax=262

  • cam0,1: 相机内参:[f 0 cx; 0 f cy; 0 0 1]
  •    f: 焦距
  • cx, cy: 主点坐标
  • doffs: 主点在各自图像坐标系的x坐标差:doffs = cx1 - cx0 (here always == 0)
  • baseline: 基线长度(单位:mm)
  • width, height:图像尺寸
  • ndisp: 视差范围 d ∈ [ 0 , 1 , . . . , n d i s p − 1 ] d \in[ 0,1, ... ,ndisp-1] d[01...ndisp1]
  • vmin, vmax: 在视差边界,用于颜色可视化(立体视觉算法永不上这个值);

SceneFlow相机参数

相机内参:

大多数的场景模拟焦距为35mm的相机成像,这些视图下的相机内参为:
[ f x 0.0 c x 0.0 f y c y 0.0 0.0 1.0 ] = [ 1050.0 0.0 479.5 0.0 1050.0 269.5 0.0 0.0 1.0 ] \begin{bmatrix} f_x & 0.0 & c_x\\ 0.0 & f_y & c_y\\ 0.0 & 0.0 &1.0 \end{bmatrix}=\begin{bmatrix} 1050.0 & 0.0 & 479.5\\ 0.0 & 1050.0 & 269.5\\ 0.0 & 0.0 &1.0 \end{bmatrix} fx0.00.00.0fy0.0cxcy1.0=1050.00.00.00.01050.00.0479.5269.51.0

某些场景下模拟焦距为15.0mm的相机城乡,这些场景下的相机内参为:
[ f x 0.0 c x 0.0 f y c y 0.0 0.0 1.0 ] = [ 450.0 0.0 479.5 0.0 450.0 269.5 0.0 0.0 1.0 ] \begin{bmatrix} f_x & 0.0 & c_x\\ 0.0 & f_y & c_y\\ 0.0 & 0.0 &1.0 \end{bmatrix}=\begin{bmatrix} 450.0 & 0.0 & 479.5\\ 0.0 & 450.0 & 269.5\\ 0.0 & 0.0 &1.0 \end{bmatrix} fx0.00.00.0fy0.0cxcy1.0=450.00.00.00.0450.00.0479.5269.51.0

相机外参(camera_data.txt):

Frame 1\n
L 0.000180073388037 -4.3711384734e-08 -0.999999880791 47.8838249108 -0.999999880791 -6.77291538409e-08 -0.000180141098099 39.5000647903 -2.96019105055e-15 1.0 -1.11432655103e-07 1.0 0.0 0.0 0.0 1.0\n
R 0.000180073388037 -4.3711384734e-08 -0.999999880791 47.8840049842 -0.999999880791 -6.77291538409e-08 -0.000180141098099 38.5000649095 -2.96019105055e-15 1.0 -1.11432655103e-07 1.0 0.0 0.0 0.0 1.0\n
\n
Frame 2\n

4*4的矩阵,按行展开

MiddleBury(SceneFlow)视差图pfm文件

pfm文件由两部分组成:

PFM header(文件头区域)

PF
1080 1920
-0.003922
元数据

  1. Identifier Line:“Pf” :单通道灰度图像 ,“PF” :3通道RGB图像
  2. Dimensions Line:图像尺寸
  3. Scale Factor / Endianness: scale<0表示按小(低)字节序存储(即低位字节排放在内存的低地址端,高位字节排放在内存的高地址端),scale>0表示按照大(高)字节序存储(高位字节排放在内存的低地址端,低位字节排放在内存的高地址端),Scale的绝对值为全局缩放因子,视差图转换为深度图时需要使用缩放因子: Z = b a s e l i n e ∗ f ( d ∣ s c a l e ∣ + d o f f s ) Z = \frac{baseline * f}{(\frac{d}{|scale|} + doffs)} Z=(scaled+doffs)baselinef

数据集文件读取

from pathlib import Path
import numpy as np
import csv
import re
import cv2


# 读取相机参数
def read_calib(calib_file_path):
    with open(calib_file_path, 'r') as calib_file:
        calib = {}
        csv_reader = csv.reader(calib_file, delimiter='=')
        for attr, value in csv_reader:
            calib.setdefault(attr, value)
    return calib


def read_pfm(pfm_file_path):
    with open(pfm_file_path, 'rb') as pfm_file:
        header = pfm_file.readline().decode().rstrip()
        channels = 3 if header == 'PF' else 1
        dim_match = re.match(r'^(\d+)\s(\d+)\s$', pfm_file.readline().decode('utf-8'))
        if dim_match:
            width, height = map(int, dim_match.groups())
        else:
            raise Exception("Malformed PFM header.")

        scale = float(pfm_file.readline().decode().rstrip())
        if scale < 0:
            endian = '<'  # littel endian
            scale = -scale
        else:
            endian = '>'  # big endian

        dispariy = np.fromfile(pfm_file, endian + 'f')

    img = np.reshape(dispariy, newshape=(height, width, channels))
    img = np.flipud(img).astype('uint8')
    show(img, "disparity")
    return dispariy, [(height, width, channels), scale]


def create_depth_map(pfm_file_path, calib=None):
    dispariy, [shape, scale] = read_pfm(pfm_file_path)

    if calib is None:
        raise Exception("Loss calibration information.")
    else:
        fx = float(calib['cam0'].split(' ')[0].lstrip('['))
        base_line = float(calib['baseline'])
        doffs = float(calib['doffs'])
        # scale factor is used here
        # d = bf/(d+ doffs)         doffs就是(x_or-x_ol) 两个相机主点在各自图像坐标系x方向上的坐标差
        depth_map = fx * base_line / (dispariy / scale + doffs)
        depth_map = np.reshape(depth_map, newshape=shape)
        depth_map = np.flipud(depth_map).astype('uint8')
        return depth_map


def show(img, win_name='image'):
    if img is None:
        raise Exception("Can't display an empty image.")
    else:
        cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)
        cv2.imshow(win_name, img)
        cv2.waitKey()
        cv2.destroyWindow(win_name)



def main():
    pfm_file_dir = Path(r'./middlebury2021/data/ladder1')
    calib_file_path = pfm_file_dir.joinpath('calib.txt')
    disp_left = pfm_file_dir.joinpath('disp0.pfm')
    # calibration information
    calib = read_calib(calib_file_path)
    # create depth map
    depth_map_left = create_depth_map(disp_left, calib)
    show(depth_map_left, "depth_map")


if __name__ == '__main__':
    main()

你可能感兴趣的:(基于图像的双目立体视觉三维重建,计算机视觉,python)