slam实践--(简易vo)3d-2d位姿估计

slam实践–3d-2d位姿估计(简易vo)

一、前言
3d-2d的位姿估计(这里指简单的vo)可就比2d-2d的位姿估计难很多了,不过主要的思想也是蛮简单的。首先用最初的两帧·通过2d-2d的位姿估计得到R,t这时候的t是没有尺度信息的,是一个相对值。然后通过刚才两帧获得的特征点利用三角化还原深度信息,这个深度信息也是相对值。再次计算第二帧与第三帧的特征点,保留那些有深度的特征点,利用pnp求解R,t,利用这个R,t再次用三角化求解所有特征点深度,循环往复。
说的简单点就是
1、对第一帧第二帧2d-2d求解R,t
2、利用三角化求解特征点深度
3、对第二帧第三帧特征点匹配,用有深度的点pnp求解R,t
4,再次三角化所有特征点求深度
5、重复3、4

二、实现

#include 
#include 
#include 
#include 
#include 
#include "opencv2/imgcodecs/legacy/constants_c.h"
#include 
#include "math.h"
// #include "extra.h" // used in opencv2 
using namespace std;
using namespace cv;



void ransacmatch(vector<KeyPoint>& querykpt, vector<KeyPoint>& trainkpt, vector<DMatch>& match1, vector<DMatch>& matchransac)
{
    //随机一致性检测
    vector<Point2f>queryp(match1.size()), trainp(match1.size());//定义匹配点对坐标
    //保存从关键点中提取的点对
    for (int i = 0; i < match1.size(); ++i)
    {
        queryp[i] = querykpt[match1[i].queryIdx].pt;//match1[i].queryIdx  是结果中对应的query的index
        trainp[i] = trainkpt[match1[i].trainIdx].pt;
    }
    //匹配点对进行RANSAC过滤
    vector<int> mask(queryp.size());
    findHomography(queryp, trainp, RANSAC, 3, mask);
    for (int j = 0; j < mask.size(); ++j)
    {
        if (mask[j])
        {
            matchransac.push_back(match1[j]);
        }
    }

}


double distance1(KeyPoint k1, KeyPoint k2) {
    return sqrt(pow(k1.pt.x - k2.pt.x, 2) + pow(k1.pt.y - k2.pt.y, 2));
}
void find_feature_matches(
    const Mat& img_1, const Mat& img_2,
    std::vector<KeyPoint>& keypoints_1,
    std::vector<KeyPoint>& keypoints_2,
    std::vector< DMatch >& matches);

void pose_estimation_2d2d(
    const std::vector<KeyPoint>& keypoints_1,
    const std::vector<KeyPoint>& keypoints_2,
    const std::vector< DMatch >& matches,
    Mat& R, Mat& t);

void triangulation(
    const vector<KeyPoint>& keypoint_1,
    const vector<KeyPoint>& keypoint_2,
    const std::vector< DMatch >& matches,
    const Mat& R, const Mat& t,
    vector<Point3d>& points
);
void triangulation(
    const vector<KeyPoint>& keypoint_1,
    const vector<KeyPoint>& keypoint_2,
    const std::vector< DMatch >& matches,
    const Mat& R1, const Mat& t1,
    const Mat& R2, const Mat& t2,
    vector<Point3d>& points
);


// 像素坐标转相机归一化坐标
Point2f pixel2cam(const Point2d& p, const Mat& K);

int main(int argc, char** argv)
{
    //if (argc != 3)
    //{
    //    cout << "usage: triangulation img1 img2" << endl;
    //    return 1;
    //}
    //-- 读取图像
    //Mat img_1 = imread("20.bmp", CV_LOAD_IMAGE_COLOR);//彩色图模式
    //Mat img_2 = imread("21.bmp", CV_LOAD_IMAGE_COLOR);
    string folder_path = "E:/Data/video2/*.bmp";
    vector<cv::String> file_names;
    glob(folder_path, file_names);
    cv::Mat img_1, img_2;

    img_1 = imread(file_names[0]);

    img_2 = imread(file_names[1]);
   


    vector<KeyPoint> keypoints_1, keypoints_2, keypoints_tmp;
    vector<DMatch> matches;
    find_feature_matches(img_1, img_2, keypoints_1, keypoints_2, matches);
    cout << "一共找到了" << matches.size() << "组匹配点" << endl;

    //-- 估计两张图像间运动
    Mat R, t;
    pose_estimation_2d2d(keypoints_1, keypoints_2, matches, R, t);

    //-- 三角化
    vector<Point3d> points;
    vector<Point3d> points_tmp;
    triangulation(keypoints_1, keypoints_2, matches, R, t, points);

    //points为得到的具有深度信息的三维坐标
    //储存这些深度信息与之前的特征点相匹配记录keypoints_2与points的对应关系
    //记录点与点的关系
    vector<Point3d> preFrame2d3d;

    //for (int i = 0; i < matches.size(); i++) {
    //    cout << "得到keypoints_2为:" << keypoints_2[matches[i].trainIdx].pt<< ",对应3D" << points[i] << endl;
    //    Point3d p(matches[i].trainIdx, i, 1);
    //    preFrame2d3d.push_back(p);
    //}
    //keypoints_tmp = keypoints_2;
    //points_tmp = points;
    在此需要维护一个索引表 preFrame2d3d
    一个存放前一帧所有特征点坐标的vector,一个存放深度信息的vector points;
    到此初始化完成————————————————————————————————————————
    读取第三帧
    //Mat img_3 = imread("22.bmp", CV_LOAD_IMAGE_COLOR);
    将第三帧与第二针进行特征点匹配 keypoints_1前一帧的特征点  keypoints_2当前帧的特征点
    //keypoints_1.clear();
    //keypoints_2.clear();
    //matches.clear();
    //find_feature_matches(img_2, img_3, keypoints_1, keypoints_2, matches);
    寻找keypoints_1与keypoints_tmp之间的相同点
    利用相同点找到对应的keypoints_2的点
    //for (int i = 0; i < preFrame2d3d.size(); i++) {
    //    int m = preFrame2d3d[i].x;
    //    int flag = 0; //记录次数
    //    double dist = 5.0;//记录距离
    //    int index = 0;//记录下标
    //    int k2index = 0;
    //    for (int j = 0; j < matches.size(); j++) {
    //        if (distance1(keypoints_tmp[m], keypoints_1[matches[j].queryIdx]) < 2)
    //        {
    //            //判断为特征点
    //            if (distance1(keypoints_tmp[m], keypoints_1[matches[j].queryIdx]) < dist) {
    //                dist = distance1(keypoints_tmp[m], keypoints_1[matches[j].queryIdx]);
    //                index = matches[j].queryIdx;
    //                k2index = matches[j].trainIdx;
    //            }
    //            flag++;
    //        }
    //    }
    //    if (flag == 0) {
    //        //表示未找到匹配的点
    //        preFrame2d3d[i].z = 0;
    //    }
    //    else {
    //        //找到特征点记录下来
    //        if(index == preFrame2d3d[i].x)
    //            preFrame2d3d[i].z = k2index;
    //        else
    //            preFrame2d3d[i].z = 0;
    //    }
    //}
    preFrame2d3d x是keypoints_tmp的下标 y是points_tmp的下标 z是keypoints_2的下标
    //
    //for (int i = 0; i < preFrame2d3d.size(); i++) {
    //    cout << "索引表" << preFrame2d3d[i] << endl;
    //}
    //
    利用得到的对应的keypoints_2的点与points点之间通过solvePnP求解R,T
    1、pnp求解
    //Mat K = (Mat_(3, 3) << 753.3, 0, 316.6, 0, 753.3, 251.7, 0, 0, 1);
    //vector pts_3d;
    //vector pts_2d;
    //for (Point3d m : preFrame2d3d)
    //{
    //    if (m.z != 0) {
    //        int d = points_tmp[m.y].z;
    //        if (d == 0 || d < 0)   // bad depth
    //            continue;
    //        Point2d p1 = pixel2cam(keypoints_tmp[m.x].pt, K);

    //        pts_3d.push_back(Point3f(p1.x * d, p1.y * d, d));
    //        pts_2d.push_back(keypoints_2[m.z].pt);
    //    }
    //}
    //Mat r1, t1;
    //solvePnP(pts_3d, pts_2d, K, Mat(), r1, t1, false); // 调用OpenCV 的 PnP 求解,可选择EPNP,DLS等方法
    //Mat R1;
    //cv::Rodrigues(r1, R1); // r为旋转向量形式,用Rodrigues公式转换为矩阵
    //cout << "R1 is:" << R1 << endl;
    //cout << "t1 is:" << t1 << endl;

    再利用R,T与keypoints_1, keypoints_2三角化得到新的points
    在上面已经计算过一次特征点,就利用这些特征点再次三角化,
    这次三角化已经知道前一帧的r, t所以和第一次三角化的投影矩阵不同
    重载triangulation函数,R,t 为之前r,t,R1,t1为当前r,t
    //points.clear();
    //triangulation(keypoints_1, keypoints_2, matches, R, t, R1, t1, points);
    更新索引表preFrame2d3d
    完成当前帧操作


    //读取项目指定文件夹下所有待检测图像
    //string folder_path = "E:/carData/video2/*.bmp";
    //vector file_names;
    glob(folder_path, file_names);
    cv::Mat img;
    for (int i = 0; i < file_names.size(); i++) {
        if (i < 2) {
            continue;
        }
        int pos = file_names[i].find_last_of("\\");
        string img_name(file_names[i].substr(pos + 1));
        cout << img_name << std::endl;
       

        //开始
        preFrame2d3d.clear();
        for (int i = 0; i < matches.size(); i++) {
            cout << "得到keypoints_2为:" << keypoints_2[matches[i].trainIdx].pt << ",对应3D" << points[i] << endl;
            Point3d p(matches[i].trainIdx, i, 1);
            preFrame2d3d.push_back(p);
        }
        keypoints_tmp.clear();
        points_tmp.clear();
        keypoints_tmp = keypoints_2;
        points_tmp = points;

        //在此需要维护一个索引表 preFrame2d3d
        //一个存放前一帧所有特征点坐标的vector,一个存放深度信息的vector points;
        //到此初始化完成————————————————————————————————————————
        //读取第三帧
        Mat img_3  = imread(file_names[i]);
        //将第三帧与第二针进行特征点匹配 keypoints_1前一帧的特征点  keypoints_2当前帧的特征点
        keypoints_1.clear();
        keypoints_2.clear();
        matches.clear();
        find_feature_matches(img_2, img_3, keypoints_1, keypoints_2, matches);
        //寻找keypoints_1与keypoints_tmp之间的相同点
        //利用相同点找到对应的keypoints_2的点
        for (int i = 0; i < preFrame2d3d.size(); i++) {
            int m = preFrame2d3d[i].x;
            int flag = 0; //记录次数
            double dist = 10000.0;//记录距离
            int index = 0;//记录下标
            int k2index = 0;
            for (int j = 0; j < matches.size(); j++) {
                if (distance1(keypoints_tmp[m], keypoints_1[matches[j].queryIdx]) < 2)
                {
                    //判断为匹配特征点
                    if (distance1(keypoints_tmp[m], keypoints_1[matches[j].queryIdx]) < dist) {
                        dist = distance1(keypoints_tmp[m], keypoints_1[matches[j].queryIdx]);
                        index = matches[j].queryIdx;
                        k2index = matches[j].trainIdx;
                    }
                    flag++;
                }
            }
            if (flag == 0) {
                //表示未找到匹配的点
                preFrame2d3d[i].z = 0;
            }
            else {
                //找到特征点记录下来
                if (index == preFrame2d3d[i].x)
                    preFrame2d3d[i].z = k2index;
                else
                    preFrame2d3d[i].z = 0;
            }
        }
        //preFrame2d3d x是keypoints_tmp上一帧对应前一次的当前帧特征点的下标 
        //y是points_tmp上一帧3d点的下标 z是keypoints_2当前帧下标

        for (int i = 0; i < preFrame2d3d.size(); i++) {
            cout << "索引表" << preFrame2d3d[i] << endl;
        }

        //利用得到的对应的keypoints_2的点与points点之间通过solvePnP求解R,T
        //1、pnp求解
        Mat K = (Mat_<double>(3, 3) << 600, 0, 300, 0, 600, 300, 0, 0, 1);
        vector<Point3f> pts_3d;
        vector<Point2f> pts_2d;
        for (Point3d m : preFrame2d3d)
        {
            if (m.z != 0) {
                int d = points_tmp[m.y].z;
                if (d == 0 || d < 0)   // bad depth
                    continue;
                Point2d p1 = pixel2cam(keypoints_tmp[m.x].pt, K);

                pts_3d.push_back(Point3f(p1.x * d, p1.y * d, d));
                pts_2d.push_back(keypoints_2[m.z].pt);
            }
        }
        Mat r1, t1;
        solvePnP(pts_3d, pts_2d, K, Mat(), r1, t1, false); // 调用OpenCV 的 PnP 求解,可选择EPNP,DLS等方法
        Mat R1;
        cv::Rodrigues(r1, R1); // r为旋转向量形式,用Rodrigues公式转换为矩阵
        cout << "R1 is:" << R1 << endl;
        cout << "t1 is:" << t1 << endl;

        //再利用R,T与keypoints_1, keypoints_2三角化得到新的points
        //在上面已经计算过一次特征点,就利用这些特征点再次三角化,
        //这次三角化已经知道前一帧的r, t所以和第一次三角化的投影矩阵不同
        //重载triangulation函数,R,t 为之前r,t,R1,t1为当前r,t
        points.clear();
        triangulation(keypoints_1, keypoints_2, matches, R, t, R1, t1, points);
    }



    -- 验证三角化点与特征点的重投影关系
    //Mat K = (Mat_(3, 3) << 600, 0, 300, 0, 600, 300, 0, 0, 1);
    //for (int i = 0; i < matches.size(); i++)
    //{
    //    Point2d pt1_cam = pixel2cam(keypoints_1[matches[i].queryIdx].pt, K);
    //    Point2d pt1_cam_3d(
    //        points[i].x / points[i].z,
    //        points[i].y / points[i].z
    //    );

    //    cout << "point in the first camera frame: " << pt1_cam << endl;
    //    cout << "point projected from 3D " << pt1_cam_3d << ", d=" << points[i].z << endl;

    //    // 第二个图
    //    Point2f pt2_cam = pixel2cam(keypoints_2[matches[i].trainIdx].pt, K);
    //    Mat pt2_trans = R * (Mat_(3, 1) << points[i].x, points[i].y, points[i].z) + t;
    //    pt2_trans /= pt2_trans.at(2, 0);
    //    cout << "point in the second camera frame: " << pt2_cam << endl;
    //    cout << "point reprojected from second frame: " << pt2_trans.t() << endl;
    //    cout << endl;
    //}

    return 0;
}

void find_feature_matches(const Mat& img_1, const Mat& img_2,
    std::vector<KeyPoint>& keypoints_1,
    std::vector<KeyPoint>& keypoints_2,
    std::vector< DMatch >& matches)
{
    Ptr<Feature2D> f2d = SIFT::create(1000);
    vector<KeyPoint> trainkeypoints, querykeypoints;
    f2d->detect(img_1, querykeypoints);
    f2d->detect(img_2, trainkeypoints);
    Mat querydescriptions, traindescriptions;
    f2d->compute(img_1, querykeypoints, querydescriptions);
    f2d->compute(img_2, trainkeypoints, traindescriptions);//flann中的描述子需32f
    querydescriptions.convertTo(querydescriptions, CV_32F);

    FlannBasedMatcher matcher;
    vector<vector<DMatch>> matchePoints;
    //matcher.match(imageDesc1, imageDesc2, matchePoints, Mat());
    matcher.knnMatch(querydescriptions, traindescriptions, matchePoints, 2);

    std::vector<DMatch> match1;
    std::vector<DMatch> match2;
    std::vector<DMatch> ransac_matchers;

    double th = 0.0;
    if (matchePoints.size() > 300) {
        th = 0.5;
    }
    else
    {
        th = 0.9;
    }

    for (int i = 0; i < matchePoints.size(); i++)
    {
        match1.push_back(matchePoints[i][0]);
        match2.push_back(matchePoints[i][1]);
        if (matchePoints[i][0].distance < 1 * matchePoints[i][1].distance)
            matches.push_back(matchePoints[i][0]);
    }

    //随机一致性检验
    ransacmatch(querykeypoints, trainkeypoints, matches, ransac_matchers);
    Mat img_matches1, img_matches2, img_matches3, img_matches4;
    //drawMatches(img_1, querykeypoints, img_2, trainkeypoints, match1, img_matches1);
    //drawMatches(img_1, querykeypoints, img_2, trainkeypoints, match2, img_matches2);
    drawMatches(img_1, querykeypoints, img_2, trainkeypoints, matches, img_matches3);
    drawMatches(img_1, querykeypoints, img_2, trainkeypoints, ransac_matchers, img_matches4);
    //imshow("1", img_matches1);
    //imshow("2", img_matches2);
    imshow("now", img_matches3);
    imshow("ransac_matchers", img_matches4);
    keypoints_1 = querykeypoints;
    keypoints_2 = trainkeypoints;
    matches = ransac_matchers;
    waitKey(0);

}

void pose_estimation_2d2d(
    const std::vector<KeyPoint>& keypoints_1,
    const std::vector<KeyPoint>& keypoints_2,
    const std::vector< DMatch >& matches,
    Mat& R, Mat& t)
{
    // 相机内参,TUM Freiburg2
    Mat K = (Mat_<double>(3, 3) << 600, 0, 300, 0, 600, 300, 0, 0, 1);

    //-- 把匹配点转换为vector的形式
    vector<Point2f> points1;
    vector<Point2f> points2;

    for (int i = 0; i < (int)matches.size(); i++)
    {
        points1.push_back(keypoints_1[matches[i].queryIdx].pt);
        points2.push_back(keypoints_2[matches[i].trainIdx].pt);
    }

    //-- 计算基础矩阵
    Mat fundamental_matrix;
    fundamental_matrix = findFundamentalMat(points1, points2, FM_RANSAC);
    cout << "fundamental_matrix is " << endl << fundamental_matrix << endl;

    //-- 计算本质矩阵
    Point2d principal_point(300, 300);				//相机主点, TUM dataset标定值
    int focal_length = 600;						//相机焦距, TUM dataset标定值
    Mat essential_matrix;
    essential_matrix = findEssentialMat(points1, points2, focal_length, principal_point);
    cout << "essential_matrix is " << endl << essential_matrix << endl;

    //-- 计算单应矩阵
    Mat homography_matrix;
    homography_matrix = findHomography(points1, points2, RANSAC, 3);
    cout << "homography_matrix is " << endl << homography_matrix << endl;

    //-- 从本质矩阵中恢复旋转和平移信息.
    recoverPose(essential_matrix, points1, points2, R, t, focal_length, principal_point);
    cout << "R is " << endl << R << endl;
    cout << "t is " << endl << t << endl;
}

void triangulation(
    const vector< KeyPoint >& keypoint_1,
    const vector< KeyPoint >& keypoint_2,
    const std::vector< DMatch >& matches,
    const Mat& R, const Mat& t,
    vector< Point3d >& points)
{
    Mat T1 = (Mat_<float>(3, 4) <<
        1, 0, 0, 0,
        0, 1, 0, 0,
        0, 0, 1, 0);
    Mat T2 = (Mat_<float>(3, 4) <<
        R.at<double>(0, 0), R.at<double>(0, 1), R.at<double>(0, 2), t.at<double>(0, 0),
        R.at<double>(1, 0), R.at<double>(1, 1), R.at<double>(1, 2), t.at<double>(1, 0),
        R.at<double>(2, 0), R.at<double>(2, 1), R.at<double>(2, 2), t.at<double>(2, 0)
        );

    Mat K = (Mat_<double>(3, 3) << 600, 0, 300, 0, 600, 300, 0, 0, 1);
    vector<Point2f> pts_1, pts_2;
    for (DMatch m : matches)
    {
        // 将像素坐标转换至相机坐标
        pts_1.push_back(pixel2cam(keypoint_1[m.queryIdx].pt, K));
        pts_2.push_back(pixel2cam(keypoint_2[m.trainIdx].pt, K));
    }

    Mat pts_4d;
    cv::triangulatePoints(T1, T2, pts_1, pts_2, pts_4d);

    // 转换成非齐次坐标
    for (int i = 0; i < pts_4d.cols; i++)
    {
        Mat x = pts_4d.col(i);
        x /= x.at<float>(3, 0); // 归一化
        Point3d p(
            x.at<float>(0, 0),
            x.at<float>(1, 0),
            x.at<float>(2, 0)
        );
        points.push_back(p);
        //cout << "p is" << p << endl;
        //cout << "pts1 is" << pts_1[i] << endl;
        //cout << "pts2 is" << pts_2[i] << endl;
    }
}

void triangulation(
    const vector< KeyPoint >& keypoint_1,
    const vector< KeyPoint >& keypoint_2,
    const std::vector< DMatch >& matches,
    const Mat& R1, const Mat& t1,
    const Mat& R2, const Mat& t2,
    vector< Point3d >& points)
{
    Mat T1 = (Mat_<float>(3, 4) <<
        R1.at<double>(0, 0), R1.at<double>(0, 1), R1.at<double>(0, 2), t1.at<double>(0, 0),
        R1.at<double>(1, 0), R1.at<double>(1, 1), R1.at<double>(1, 2), t1.at<double>(1, 0),
        R1.at<double>(2, 0), R1.at<double>(2, 1), R1.at<double>(2, 2), t1.at<double>(2, 0)
        );
    Mat T2 = (Mat_<float>(3, 4) <<
        R2.at<double>(0, 0), R2.at<double>(0, 1), R2.at<double>(0, 2), t2.at<double>(0, 0),
        R2.at<double>(1, 0), R2.at<double>(1, 1), R2.at<double>(1, 2), t2.at<double>(1, 0),
        R2.at<double>(2, 0), R2.at<double>(2, 1), R2.at<double>(2, 2), t2.at<double>(2, 0)
        );

    Mat K = (Mat_<double>(3, 3) << 753.3, 0, 316.6, 0, 753.3, 251.7, 0, 0, 1);
    vector<Point2f> pts_1, pts_2;
    for (DMatch m : matches)
    {
        // 将像素坐标转换至相机坐标
        pts_1.push_back(pixel2cam(keypoint_1[m.queryIdx].pt, K));
        pts_2.push_back(pixel2cam(keypoint_2[m.trainIdx].pt, K));
    }

    Mat pts_4d;
    cv::triangulatePoints(T1, T2, pts_1, pts_2, pts_4d);

    // 转换成非齐次坐标
    for (int i = 0; i < pts_4d.cols; i++)
    {
        Mat x = pts_4d.col(i);
        x /= x.at<float>(3, 0); // 归一化
        Point3d p(
            x.at<float>(0, 0),
            x.at<float>(1, 0),
            x.at<float>(2, 0)
        );
        points.push_back(p);
        //cout << "p is" << p << endl;
        //cout << "pts1 is" << pts_1[i] << endl;
        //cout << "pts2 is" << pts_2[i] << endl;
    }
}


Point2f pixel2cam(const Point2d& p, const Mat& K)
{
    return Point2f
    (
        (p.x - K.at<double>(0, 2)) / K.at<double>(0, 0),
        (p.y - K.at<double>(1, 2)) / K.at<double>(1, 1)
    );
}

三、总结
还是有一些方面需要注意。
1、特别是我们喂给solvePnP的坐标到底是什么坐标。
2d的坐标直接是他的像素坐标就行了
3d的坐标是 归一化后的坐标,像素坐标转相机归一化坐标,深度直接用就行了。
2、在三角化的时候我们要给的投影矩阵是什么?
这个要分两个情况:
第一、在2d-2d完过后的三角化,两个投影矩阵都为3x4的矩阵,分别是,一个[1 0 0 0;0 1 0 0;0 0 1 0;0 0 0 0],一个是由求解出来的R,t组成的矩阵[R, t],特别说明t为列向量。
第二、在3d-2d完过后的三角化,两个投影矩阵都为3x4的矩阵,都是是由R,t组成的矩阵[R, t]。不同的地方在于前一个[R, t]为上一次求解出来的R,t构成。后一个投影矩阵为本次solvePnP求解出来的R,t构成。
3、本程序还有一个很明显的不足,每次求完特征点都需要与前面获得的具有深度的特征点匹配,这就会存在本来特征点就不多,匹配上的很少不足以求解本质矩阵的问题。这是设计上的缺陷,实际测试情况下,图片变动大一些就会出现上述现象。可以改进的地方就是放弃特征点匹配这个思路,换用LK光流来追踪特征点,并加入关键帧的概念,减少寻找特征点的次数,效果应该会好很多,后面有时间补上。

你可能感兴趣的:(slam,图像算法,3d,c++,计算机视觉)