一、前言
3d-2d的位姿估计(这里指简单的vo)可就比2d-2d的位姿估计难很多了,不过主要的思想也是蛮简单的。首先用最初的两帧·通过2d-2d的位姿估计得到R,t这时候的t是没有尺度信息的,是一个相对值。然后通过刚才两帧获得的特征点利用三角化还原深度信息,这个深度信息也是相对值。再次计算第二帧与第三帧的特征点,保留那些有深度的特征点,利用pnp求解R,t,利用这个R,t再次用三角化求解所有特征点深度,循环往复。
说的简单点就是
1、对第一帧第二帧2d-2d求解R,t
2、利用三角化求解特征点深度
3、对第二帧第三帧特征点匹配,用有深度的点pnp求解R,t
4,再次三角化所有特征点求深度
5、重复3、4
二、实现
#include
#include
#include
#include
#include
#include "opencv2/imgcodecs/legacy/constants_c.h"
#include
#include "math.h"
// #include "extra.h" // used in opencv2
using namespace std;
using namespace cv;
void ransacmatch(vector<KeyPoint>& querykpt, vector<KeyPoint>& trainkpt, vector<DMatch>& match1, vector<DMatch>& matchransac)
{
//随机一致性检测
vector<Point2f>queryp(match1.size()), trainp(match1.size());//定义匹配点对坐标
//保存从关键点中提取的点对
for (int i = 0; i < match1.size(); ++i)
{
queryp[i] = querykpt[match1[i].queryIdx].pt;//match1[i].queryIdx 是结果中对应的query的index
trainp[i] = trainkpt[match1[i].trainIdx].pt;
}
//匹配点对进行RANSAC过滤
vector<int> mask(queryp.size());
findHomography(queryp, trainp, RANSAC, 3, mask);
for (int j = 0; j < mask.size(); ++j)
{
if (mask[j])
{
matchransac.push_back(match1[j]);
}
}
}
double distance1(KeyPoint k1, KeyPoint k2) {
return sqrt(pow(k1.pt.x - k2.pt.x, 2) + pow(k1.pt.y - k2.pt.y, 2));
}
void find_feature_matches(
const Mat& img_1, const Mat& img_2,
std::vector<KeyPoint>& keypoints_1,
std::vector<KeyPoint>& keypoints_2,
std::vector< DMatch >& matches);
void pose_estimation_2d2d(
const std::vector<KeyPoint>& keypoints_1,
const std::vector<KeyPoint>& keypoints_2,
const std::vector< DMatch >& matches,
Mat& R, Mat& t);
void triangulation(
const vector<KeyPoint>& keypoint_1,
const vector<KeyPoint>& keypoint_2,
const std::vector< DMatch >& matches,
const Mat& R, const Mat& t,
vector<Point3d>& points
);
void triangulation(
const vector<KeyPoint>& keypoint_1,
const vector<KeyPoint>& keypoint_2,
const std::vector< DMatch >& matches,
const Mat& R1, const Mat& t1,
const Mat& R2, const Mat& t2,
vector<Point3d>& points
);
// 像素坐标转相机归一化坐标
Point2f pixel2cam(const Point2d& p, const Mat& K);
int main(int argc, char** argv)
{
//if (argc != 3)
//{
// cout << "usage: triangulation img1 img2" << endl;
// return 1;
//}
//-- 读取图像
//Mat img_1 = imread("20.bmp", CV_LOAD_IMAGE_COLOR);//彩色图模式
//Mat img_2 = imread("21.bmp", CV_LOAD_IMAGE_COLOR);
string folder_path = "E:/Data/video2/*.bmp";
vector<cv::String> file_names;
glob(folder_path, file_names);
cv::Mat img_1, img_2;
img_1 = imread(file_names[0]);
img_2 = imread(file_names[1]);
vector<KeyPoint> keypoints_1, keypoints_2, keypoints_tmp;
vector<DMatch> matches;
find_feature_matches(img_1, img_2, keypoints_1, keypoints_2, matches);
cout << "一共找到了" << matches.size() << "组匹配点" << endl;
//-- 估计两张图像间运动
Mat R, t;
pose_estimation_2d2d(keypoints_1, keypoints_2, matches, R, t);
//-- 三角化
vector<Point3d> points;
vector<Point3d> points_tmp;
triangulation(keypoints_1, keypoints_2, matches, R, t, points);
//points为得到的具有深度信息的三维坐标
//储存这些深度信息与之前的特征点相匹配记录keypoints_2与points的对应关系
//记录点与点的关系
vector<Point3d> preFrame2d3d;
//for (int i = 0; i < matches.size(); i++) {
// cout << "得到keypoints_2为:" << keypoints_2[matches[i].trainIdx].pt<< ",对应3D" << points[i] << endl;
// Point3d p(matches[i].trainIdx, i, 1);
// preFrame2d3d.push_back(p);
//}
//keypoints_tmp = keypoints_2;
//points_tmp = points;
在此需要维护一个索引表 preFrame2d3d
一个存放前一帧所有特征点坐标的vector,一个存放深度信息的vector points;
到此初始化完成————————————————————————————————————————
读取第三帧
//Mat img_3 = imread("22.bmp", CV_LOAD_IMAGE_COLOR);
将第三帧与第二针进行特征点匹配 keypoints_1前一帧的特征点 keypoints_2当前帧的特征点
//keypoints_1.clear();
//keypoints_2.clear();
//matches.clear();
//find_feature_matches(img_2, img_3, keypoints_1, keypoints_2, matches);
寻找keypoints_1与keypoints_tmp之间的相同点
利用相同点找到对应的keypoints_2的点
//for (int i = 0; i < preFrame2d3d.size(); i++) {
// int m = preFrame2d3d[i].x;
// int flag = 0; //记录次数
// double dist = 5.0;//记录距离
// int index = 0;//记录下标
// int k2index = 0;
// for (int j = 0; j < matches.size(); j++) {
// if (distance1(keypoints_tmp[m], keypoints_1[matches[j].queryIdx]) < 2)
// {
// //判断为特征点
// if (distance1(keypoints_tmp[m], keypoints_1[matches[j].queryIdx]) < dist) {
// dist = distance1(keypoints_tmp[m], keypoints_1[matches[j].queryIdx]);
// index = matches[j].queryIdx;
// k2index = matches[j].trainIdx;
// }
// flag++;
// }
// }
// if (flag == 0) {
// //表示未找到匹配的点
// preFrame2d3d[i].z = 0;
// }
// else {
// //找到特征点记录下来
// if(index == preFrame2d3d[i].x)
// preFrame2d3d[i].z = k2index;
// else
// preFrame2d3d[i].z = 0;
// }
//}
preFrame2d3d x是keypoints_tmp的下标 y是points_tmp的下标 z是keypoints_2的下标
//
//for (int i = 0; i < preFrame2d3d.size(); i++) {
// cout << "索引表" << preFrame2d3d[i] << endl;
//}
//
利用得到的对应的keypoints_2的点与points点之间通过solvePnP求解R,T
1、pnp求解
//Mat K = (Mat_(3, 3) << 753.3, 0, 316.6, 0, 753.3, 251.7, 0, 0, 1);
//vector pts_3d;
//vector pts_2d;
//for (Point3d m : preFrame2d3d)
//{
// if (m.z != 0) {
// int d = points_tmp[m.y].z;
// if (d == 0 || d < 0) // bad depth
// continue;
// Point2d p1 = pixel2cam(keypoints_tmp[m.x].pt, K);
// pts_3d.push_back(Point3f(p1.x * d, p1.y * d, d));
// pts_2d.push_back(keypoints_2[m.z].pt);
// }
//}
//Mat r1, t1;
//solvePnP(pts_3d, pts_2d, K, Mat(), r1, t1, false); // 调用OpenCV 的 PnP 求解,可选择EPNP,DLS等方法
//Mat R1;
//cv::Rodrigues(r1, R1); // r为旋转向量形式,用Rodrigues公式转换为矩阵
//cout << "R1 is:" << R1 << endl;
//cout << "t1 is:" << t1 << endl;
再利用R,T与keypoints_1, keypoints_2三角化得到新的points
在上面已经计算过一次特征点,就利用这些特征点再次三角化,
这次三角化已经知道前一帧的r, t所以和第一次三角化的投影矩阵不同
重载triangulation函数,R,t 为之前r,t,R1,t1为当前r,t
//points.clear();
//triangulation(keypoints_1, keypoints_2, matches, R, t, R1, t1, points);
更新索引表preFrame2d3d
完成当前帧操作
//读取项目指定文件夹下所有待检测图像
//string folder_path = "E:/carData/video2/*.bmp";
//vector file_names;
glob(folder_path, file_names);
cv::Mat img;
for (int i = 0; i < file_names.size(); i++) {
if (i < 2) {
continue;
}
int pos = file_names[i].find_last_of("\\");
string img_name(file_names[i].substr(pos + 1));
cout << img_name << std::endl;
//开始
preFrame2d3d.clear();
for (int i = 0; i < matches.size(); i++) {
cout << "得到keypoints_2为:" << keypoints_2[matches[i].trainIdx].pt << ",对应3D" << points[i] << endl;
Point3d p(matches[i].trainIdx, i, 1);
preFrame2d3d.push_back(p);
}
keypoints_tmp.clear();
points_tmp.clear();
keypoints_tmp = keypoints_2;
points_tmp = points;
//在此需要维护一个索引表 preFrame2d3d
//一个存放前一帧所有特征点坐标的vector,一个存放深度信息的vector points;
//到此初始化完成————————————————————————————————————————
//读取第三帧
Mat img_3 = imread(file_names[i]);
//将第三帧与第二针进行特征点匹配 keypoints_1前一帧的特征点 keypoints_2当前帧的特征点
keypoints_1.clear();
keypoints_2.clear();
matches.clear();
find_feature_matches(img_2, img_3, keypoints_1, keypoints_2, matches);
//寻找keypoints_1与keypoints_tmp之间的相同点
//利用相同点找到对应的keypoints_2的点
for (int i = 0; i < preFrame2d3d.size(); i++) {
int m = preFrame2d3d[i].x;
int flag = 0; //记录次数
double dist = 10000.0;//记录距离
int index = 0;//记录下标
int k2index = 0;
for (int j = 0; j < matches.size(); j++) {
if (distance1(keypoints_tmp[m], keypoints_1[matches[j].queryIdx]) < 2)
{
//判断为匹配特征点
if (distance1(keypoints_tmp[m], keypoints_1[matches[j].queryIdx]) < dist) {
dist = distance1(keypoints_tmp[m], keypoints_1[matches[j].queryIdx]);
index = matches[j].queryIdx;
k2index = matches[j].trainIdx;
}
flag++;
}
}
if (flag == 0) {
//表示未找到匹配的点
preFrame2d3d[i].z = 0;
}
else {
//找到特征点记录下来
if (index == preFrame2d3d[i].x)
preFrame2d3d[i].z = k2index;
else
preFrame2d3d[i].z = 0;
}
}
//preFrame2d3d x是keypoints_tmp上一帧对应前一次的当前帧特征点的下标
//y是points_tmp上一帧3d点的下标 z是keypoints_2当前帧下标
for (int i = 0; i < preFrame2d3d.size(); i++) {
cout << "索引表" << preFrame2d3d[i] << endl;
}
//利用得到的对应的keypoints_2的点与points点之间通过solvePnP求解R,T
//1、pnp求解
Mat K = (Mat_<double>(3, 3) << 600, 0, 300, 0, 600, 300, 0, 0, 1);
vector<Point3f> pts_3d;
vector<Point2f> pts_2d;
for (Point3d m : preFrame2d3d)
{
if (m.z != 0) {
int d = points_tmp[m.y].z;
if (d == 0 || d < 0) // bad depth
continue;
Point2d p1 = pixel2cam(keypoints_tmp[m.x].pt, K);
pts_3d.push_back(Point3f(p1.x * d, p1.y * d, d));
pts_2d.push_back(keypoints_2[m.z].pt);
}
}
Mat r1, t1;
solvePnP(pts_3d, pts_2d, K, Mat(), r1, t1, false); // 调用OpenCV 的 PnP 求解,可选择EPNP,DLS等方法
Mat R1;
cv::Rodrigues(r1, R1); // r为旋转向量形式,用Rodrigues公式转换为矩阵
cout << "R1 is:" << R1 << endl;
cout << "t1 is:" << t1 << endl;
//再利用R,T与keypoints_1, keypoints_2三角化得到新的points
//在上面已经计算过一次特征点,就利用这些特征点再次三角化,
//这次三角化已经知道前一帧的r, t所以和第一次三角化的投影矩阵不同
//重载triangulation函数,R,t 为之前r,t,R1,t1为当前r,t
points.clear();
triangulation(keypoints_1, keypoints_2, matches, R, t, R1, t1, points);
}
-- 验证三角化点与特征点的重投影关系
//Mat K = (Mat_(3, 3) << 600, 0, 300, 0, 600, 300, 0, 0, 1);
//for (int i = 0; i < matches.size(); i++)
//{
// Point2d pt1_cam = pixel2cam(keypoints_1[matches[i].queryIdx].pt, K);
// Point2d pt1_cam_3d(
// points[i].x / points[i].z,
// points[i].y / points[i].z
// );
// cout << "point in the first camera frame: " << pt1_cam << endl;
// cout << "point projected from 3D " << pt1_cam_3d << ", d=" << points[i].z << endl;
// // 第二个图
// Point2f pt2_cam = pixel2cam(keypoints_2[matches[i].trainIdx].pt, K);
// Mat pt2_trans = R * (Mat_(3, 1) << points[i].x, points[i].y, points[i].z) + t;
// pt2_trans /= pt2_trans.at(2, 0);
// cout << "point in the second camera frame: " << pt2_cam << endl;
// cout << "point reprojected from second frame: " << pt2_trans.t() << endl;
// cout << endl;
//}
return 0;
}
void find_feature_matches(const Mat& img_1, const Mat& img_2,
std::vector<KeyPoint>& keypoints_1,
std::vector<KeyPoint>& keypoints_2,
std::vector< DMatch >& matches)
{
Ptr<Feature2D> f2d = SIFT::create(1000);
vector<KeyPoint> trainkeypoints, querykeypoints;
f2d->detect(img_1, querykeypoints);
f2d->detect(img_2, trainkeypoints);
Mat querydescriptions, traindescriptions;
f2d->compute(img_1, querykeypoints, querydescriptions);
f2d->compute(img_2, trainkeypoints, traindescriptions);//flann中的描述子需32f
querydescriptions.convertTo(querydescriptions, CV_32F);
FlannBasedMatcher matcher;
vector<vector<DMatch>> matchePoints;
//matcher.match(imageDesc1, imageDesc2, matchePoints, Mat());
matcher.knnMatch(querydescriptions, traindescriptions, matchePoints, 2);
std::vector<DMatch> match1;
std::vector<DMatch> match2;
std::vector<DMatch> ransac_matchers;
double th = 0.0;
if (matchePoints.size() > 300) {
th = 0.5;
}
else
{
th = 0.9;
}
for (int i = 0; i < matchePoints.size(); i++)
{
match1.push_back(matchePoints[i][0]);
match2.push_back(matchePoints[i][1]);
if (matchePoints[i][0].distance < 1 * matchePoints[i][1].distance)
matches.push_back(matchePoints[i][0]);
}
//随机一致性检验
ransacmatch(querykeypoints, trainkeypoints, matches, ransac_matchers);
Mat img_matches1, img_matches2, img_matches3, img_matches4;
//drawMatches(img_1, querykeypoints, img_2, trainkeypoints, match1, img_matches1);
//drawMatches(img_1, querykeypoints, img_2, trainkeypoints, match2, img_matches2);
drawMatches(img_1, querykeypoints, img_2, trainkeypoints, matches, img_matches3);
drawMatches(img_1, querykeypoints, img_2, trainkeypoints, ransac_matchers, img_matches4);
//imshow("1", img_matches1);
//imshow("2", img_matches2);
imshow("now", img_matches3);
imshow("ransac_matchers", img_matches4);
keypoints_1 = querykeypoints;
keypoints_2 = trainkeypoints;
matches = ransac_matchers;
waitKey(0);
}
void pose_estimation_2d2d(
const std::vector<KeyPoint>& keypoints_1,
const std::vector<KeyPoint>& keypoints_2,
const std::vector< DMatch >& matches,
Mat& R, Mat& t)
{
// 相机内参,TUM Freiburg2
Mat K = (Mat_<double>(3, 3) << 600, 0, 300, 0, 600, 300, 0, 0, 1);
//-- 把匹配点转换为vector的形式
vector<Point2f> points1;
vector<Point2f> points2;
for (int i = 0; i < (int)matches.size(); i++)
{
points1.push_back(keypoints_1[matches[i].queryIdx].pt);
points2.push_back(keypoints_2[matches[i].trainIdx].pt);
}
//-- 计算基础矩阵
Mat fundamental_matrix;
fundamental_matrix = findFundamentalMat(points1, points2, FM_RANSAC);
cout << "fundamental_matrix is " << endl << fundamental_matrix << endl;
//-- 计算本质矩阵
Point2d principal_point(300, 300); //相机主点, TUM dataset标定值
int focal_length = 600; //相机焦距, TUM dataset标定值
Mat essential_matrix;
essential_matrix = findEssentialMat(points1, points2, focal_length, principal_point);
cout << "essential_matrix is " << endl << essential_matrix << endl;
//-- 计算单应矩阵
Mat homography_matrix;
homography_matrix = findHomography(points1, points2, RANSAC, 3);
cout << "homography_matrix is " << endl << homography_matrix << endl;
//-- 从本质矩阵中恢复旋转和平移信息.
recoverPose(essential_matrix, points1, points2, R, t, focal_length, principal_point);
cout << "R is " << endl << R << endl;
cout << "t is " << endl << t << endl;
}
void triangulation(
const vector< KeyPoint >& keypoint_1,
const vector< KeyPoint >& keypoint_2,
const std::vector< DMatch >& matches,
const Mat& R, const Mat& t,
vector< Point3d >& points)
{
Mat T1 = (Mat_<float>(3, 4) <<
1, 0, 0, 0,
0, 1, 0, 0,
0, 0, 1, 0);
Mat T2 = (Mat_<float>(3, 4) <<
R.at<double>(0, 0), R.at<double>(0, 1), R.at<double>(0, 2), t.at<double>(0, 0),
R.at<double>(1, 0), R.at<double>(1, 1), R.at<double>(1, 2), t.at<double>(1, 0),
R.at<double>(2, 0), R.at<double>(2, 1), R.at<double>(2, 2), t.at<double>(2, 0)
);
Mat K = (Mat_<double>(3, 3) << 600, 0, 300, 0, 600, 300, 0, 0, 1);
vector<Point2f> pts_1, pts_2;
for (DMatch m : matches)
{
// 将像素坐标转换至相机坐标
pts_1.push_back(pixel2cam(keypoint_1[m.queryIdx].pt, K));
pts_2.push_back(pixel2cam(keypoint_2[m.trainIdx].pt, K));
}
Mat pts_4d;
cv::triangulatePoints(T1, T2, pts_1, pts_2, pts_4d);
// 转换成非齐次坐标
for (int i = 0; i < pts_4d.cols; i++)
{
Mat x = pts_4d.col(i);
x /= x.at<float>(3, 0); // 归一化
Point3d p(
x.at<float>(0, 0),
x.at<float>(1, 0),
x.at<float>(2, 0)
);
points.push_back(p);
//cout << "p is" << p << endl;
//cout << "pts1 is" << pts_1[i] << endl;
//cout << "pts2 is" << pts_2[i] << endl;
}
}
void triangulation(
const vector< KeyPoint >& keypoint_1,
const vector< KeyPoint >& keypoint_2,
const std::vector< DMatch >& matches,
const Mat& R1, const Mat& t1,
const Mat& R2, const Mat& t2,
vector< Point3d >& points)
{
Mat T1 = (Mat_<float>(3, 4) <<
R1.at<double>(0, 0), R1.at<double>(0, 1), R1.at<double>(0, 2), t1.at<double>(0, 0),
R1.at<double>(1, 0), R1.at<double>(1, 1), R1.at<double>(1, 2), t1.at<double>(1, 0),
R1.at<double>(2, 0), R1.at<double>(2, 1), R1.at<double>(2, 2), t1.at<double>(2, 0)
);
Mat T2 = (Mat_<float>(3, 4) <<
R2.at<double>(0, 0), R2.at<double>(0, 1), R2.at<double>(0, 2), t2.at<double>(0, 0),
R2.at<double>(1, 0), R2.at<double>(1, 1), R2.at<double>(1, 2), t2.at<double>(1, 0),
R2.at<double>(2, 0), R2.at<double>(2, 1), R2.at<double>(2, 2), t2.at<double>(2, 0)
);
Mat K = (Mat_<double>(3, 3) << 753.3, 0, 316.6, 0, 753.3, 251.7, 0, 0, 1);
vector<Point2f> pts_1, pts_2;
for (DMatch m : matches)
{
// 将像素坐标转换至相机坐标
pts_1.push_back(pixel2cam(keypoint_1[m.queryIdx].pt, K));
pts_2.push_back(pixel2cam(keypoint_2[m.trainIdx].pt, K));
}
Mat pts_4d;
cv::triangulatePoints(T1, T2, pts_1, pts_2, pts_4d);
// 转换成非齐次坐标
for (int i = 0; i < pts_4d.cols; i++)
{
Mat x = pts_4d.col(i);
x /= x.at<float>(3, 0); // 归一化
Point3d p(
x.at<float>(0, 0),
x.at<float>(1, 0),
x.at<float>(2, 0)
);
points.push_back(p);
//cout << "p is" << p << endl;
//cout << "pts1 is" << pts_1[i] << endl;
//cout << "pts2 is" << pts_2[i] << endl;
}
}
Point2f pixel2cam(const Point2d& p, const Mat& K)
{
return Point2f
(
(p.x - K.at<double>(0, 2)) / K.at<double>(0, 0),
(p.y - K.at<double>(1, 2)) / K.at<double>(1, 1)
);
}
三、总结
还是有一些方面需要注意。
1、特别是我们喂给solvePnP的坐标到底是什么坐标。
2d的坐标直接是他的像素坐标就行了
3d的坐标是 归一化后的坐标,像素坐标转相机归一化坐标,深度直接用就行了。
2、在三角化的时候我们要给的投影矩阵是什么?
这个要分两个情况:
第一、在2d-2d完过后的三角化,两个投影矩阵都为3x4的矩阵,分别是,一个[1 0 0 0;0 1 0 0;0 0 1 0;0 0 0 0],一个是由求解出来的R,t组成的矩阵[R, t],特别说明t为列向量。
第二、在3d-2d完过后的三角化,两个投影矩阵都为3x4的矩阵,都是是由R,t组成的矩阵[R, t]。不同的地方在于前一个[R, t]为上一次求解出来的R,t构成。后一个投影矩阵为本次solvePnP求解出来的R,t构成。
3、本程序还有一个很明显的不足,每次求完特征点都需要与前面获得的具有深度的特征点匹配,这就会存在本来特征点就不多,匹配上的很少不足以求解本质矩阵的问题。这是设计上的缺陷,实际测试情况下,图片变动大一些就会出现上述现象。可以改进的地方就是放弃特征点匹配这个思路,换用LK光流来追踪特征点,并加入关键帧的概念,减少寻找特征点的次数,效果应该会好很多,后面有时间补上。