我们继续上一节的tracking部分,tracking部分结合了多视图几何的许多计算算法,这就不得不翻开《多视图几何》这本大部头才能看懂作者的源码。上一节的初始化过程后,Tracking需要持续地提供位姿给建图模块,在一般的视觉SLAM中都需要一个运动的先验给特征匹配和估计,imu预积分就是其中一种。回到Track函数,作者为相机设计了两种运行模式,分别是匀速运动模式和关键帧匹配模式。
一、持续定位
在一般的SLAM定位过程中,例如lego-LOAM,会有两段定位,scan-to-scan以及scan-to-map,在这里也类似。
1、帧到帧的匹配
匀速运动模式适用于在上一帧已经计算好速度的情况下,直接沿袭到下一帧。
bool Tracking::TrackWithMotionModel()
{
ORBmatcher matcher(0.9,true);
// Update last frame pose according to its reference keyframe
// Create "visual odometry" points if in Localization Mode
UpdateLastFrame();
mCurrentFrame.SetPose(mVelocity*mLastFrame.mTcw);将速度作为先验,其实是匀速采样图像的过程中上上帧到上一帧的变换,直接按直线运动累计
fill(mCurrentFrame.mvpMapPoints.begin(),mCurrentFrame.mvpMapPoints.end(),static_cast(NULL));
// Project points seen in previous frame
int th;
if(mSensor!=System::STEREO)
th=15;
else
th=7;
int nmatches = matcher.SearchByProjection(mCurrentFrame,mLastFrame,th,mSensor==System::MONOCULAR);
// If few matches, uses a wider window search
if(nmatches<20)
{
fill(mCurrentFrame.mvpMapPoints.begin(),mCurrentFrame.mvpMapPoints.end(),static_cast(NULL));
nmatches = matcher.SearchByProjection(mCurrentFrame,mLastFrame,2*th,mSensor==System::MONOCULAR);
}
if(nmatches<20)
return false;
// Optimize frame pose with all matches
Optimizer::PoseOptimization(&mCurrentFrame);
// Discard outliers
int nmatchesMap = 0;
for(int i =0; i(NULL);
mCurrentFrame.mvbOutlier[i]=false;
pMP->mbTrackInView = false;
pMP->mnLastFrameSeen = mCurrentFrame.mnId;
nmatches--;
}
else if(mCurrentFrame.mvpMapPoints[i]->Observations()>0)
nmatchesMap++;
}
}
if(mbOnlyTracking)
{
mbVO = nmatchesMap<10;
return nmatches>20;
}
return nmatchesMap>=10;
}
PoseOptimization是进一步的位姿优化,这个函数是对单个位姿的再次优化,严格的说,是利用优化的同时迎合观测的特征点并评判图像帧是否满足我们的需要,在本篇中将会多次使用该函数。
int Optimizer::PoseOptimization(Frame *pFrame)
{
//本函数是对单个位姿再次进行优化
g2o::SparseOptimizer optimizer;
g2o::BlockSolver_6_3::LinearSolverType * linearSolver;
linearSolver = new g2o::LinearSolverDense();
g2o::BlockSolver_6_3 * solver_ptr = new g2o::BlockSolver_6_3(linearSolver);
//定义基于LM算法的求解器
g2o::OptimizationAlgorithmLevenberg* solver = new g2o::OptimizationAlgorithmLevenberg(solver_ptr);
optimizer.setAlgorithm(solver);
int nInitialCorrespondences=0;
// Set Frame vertex
g2o::VertexSE3Expmap * vSE3 = new g2o::VertexSE3Expmap();//每一帧作为一个节点
vSE3->setEstimate(Converter::toSE3Quat(pFrame->mTcw));//将目前得到的帧位姿转换为SE3李代数形式
vSE3->setId(0);//当前帧作为第一个节点
vSE3->setFixed(false);//由于是优化当前位姿,因此不固定
optimizer.addVertex(vSE3);
// Set MapPoint vertices
const int N = pFrame->N;//每一帧所持有的地图点也加入优化
// Set MapPoint vertices
const int N = pFrame->N;
vector vpEdgesMono;//本帧所观测到的地图点所投影的像素值作为观测约束
vector vnIndexEdgeMono;
vpEdgesMono.reserve(N);
vnIndexEdgeMono.reserve(N);
vector vpEdgesStereo;
vector vnIndexEdgeStereo;
vpEdgesStereo.reserve(N);
vnIndexEdgeStereo.reserve(N);
const float deltaMono = sqrt(5.991);//经验值?
const float deltaStereo = sqrt(7.815);
{
unique_lock lock(MapPoint::mGlobalMutex);
for(int i=0; imvpMapPoints[i];
if(pMP)
{
// Monocular observation
// 单目与双目过程类似,我们先关注单目的过程
if(pFrame->mvuRight[i]<0)
{
nInitialCorrespondences++;//在最开始时,计数地图点
pFrame->mvbOutlier[i] = false;
Eigen::Matrix obs;
const cv::KeyPoint &kpUn = pFrame->mvKeysUn[i];
obs << kpUn.pt.x, kpUn.pt.y;
//直接使用g2o的观测投影模型
g2o::EdgeSE3ProjectXYZOnlyPose* e = new g2o::EdgeSE3ProjectXYZOnlyPose();
e->setVertex(0, dynamic_cast(optimizer.vertex(0)));
e->setMeasurement(obs);//观测值是地图点在这一帧的像素值
const float invSigma2 = pFrame->mvInvLevelSigma2[kpUn.octave];
e->setInformation(Eigen::Matrix2d::Identity()*invSigma2);
g2o::RobustKernelHuber* rk = new g2o::RobustKernelHuber;
e->setRobustKernel(rk);
rk->setDelta(deltaMono);
e->fx = pFrame->fx;
e->fy = pFrame->fy;
e->cx = pFrame->cx;
e->cy = pFrame->cy;
cv::Mat Xw = pMP->GetWorldPos();
e->Xw[0] = Xw.at(0);
e->Xw[1] = Xw.at(1);
e->Xw[2] = Xw.at(2);//将地图点的xyz位置和相机内参加入,作为真值,与观察值形成误差
optimizer.addEdge(e);//将观测作为边加入,这样完成了整个优化的前期准备过程
vpEdgesMono.push_back(e);
vnIndexEdgeMono.push_back(i);
}
else // Stereo observation
{
nInitialCorrespondences++;
pFrame->mvbOutlier[i] = false;
//SET EDGE
Eigen::Matrix obs;
const cv::KeyPoint &kpUn = pFrame->mvKeysUn[i];
const float &kp_ur = pFrame->mvuRight[i];
obs << kpUn.pt.x, kpUn.pt.y, kp_ur;
g2o::EdgeStereoSE3ProjectXYZOnlyPose* e = new g2o::EdgeStereoSE3ProjectXYZOnlyPose();
e->setVertex(0, dynamic_cast(optimizer.vertex(0)));
e->setMeasurement(obs);
const float invSigma2 = pFrame->mvInvLevelSigma2[kpUn.octave];
Eigen::Matrix3d Info = Eigen::Matrix3d::Identity()*invSigma2;
e->setInformation(Info);
g2o::RobustKernelHuber* rk = new g2o::RobustKernelHuber;
e->setRobustKernel(rk);
rk->setDelta(deltaStereo);
e->fx = pFrame->fx;
e->fy = pFrame->fy;
e->cx = pFrame->cx;
e->cy = pFrame->cy;
e->bf = pFrame->mbf;
cv::Mat Xw = pMP->GetWorldPos();
e->Xw[0] = Xw.at(0);
e->Xw[1] = Xw.at(1);
e->Xw[2] = Xw.at(2);
optimizer.addEdge(e);
vpEdgesStereo.push_back(e);
vnIndexEdgeStereo.push_back(i);
}
}
}
}
if(nInitialCorrespondences<3)
return 0;
// We perform 4 optimizations, after each optimization we classify observation as inlier/outlier
// At the next optimization, outliers are not included, but at the end they can be classified as inliers again.
const float chi2Mono[4]={5.991,5.991,5.991,5.991};
const float chi2Stereo[4]={7.815,7.815,7.815, 7.815};
const int its[4]={10,10,10,10};
int nBad=0;
for(size_t it=0; it<4; it++)
{
//连续进行4次优化,在每次优化完毕后将坏点剔除图中
vSE3->setEstimate(Converter::toSE3Quat(pFrame->mTcw));
optimizer.initializeOptimization(0);
optimizer.optimize(its[it]);
nBad=0;
for(size_t i=0, iend=vpEdgesMono.size(); imvbOutlier[idx])
{
e->computeError();
}
const float chi2 = e->chi2();
if(chi2>chi2Mono[it])//将误差大于阈值的作为坏点(outlier)
{
pFrame->mvbOutlier[idx]=true;
e->setLevel(1);
nBad++;
}
else
{
pFrame->mvbOutlier[idx]=false;
e->setLevel(0);
}
if(it==2)
e->setRobustKernel(0);
}
for(size_t i=0, iend=vpEdgesStereo.size(); imvbOutlier[idx])
{
e->computeError();
}
const float chi2 = e->chi2();
if(chi2>chi2Stereo[it])
{
pFrame->mvbOutlier[idx]=true;
e->setLevel(1);
nBad++;
}
else
{
e->setLevel(0);
pFrame->mvbOutlier[idx]=false;
}
if(it==2)
e->setRobustKernel(0);
}
if(optimizer.edges().size()<10)//坏点超过一定程度则中止
break;
}
// Recover optimized pose and return number of inliers
g2o::VertexSE3Expmap* vSE3_recov = static_cast(optimizer.vertex(0));
g2o::SE3Quat SE3quat_recov = vSE3_recov->estimate();
cv::Mat pose = Converter::toCvMat(SE3quat_recov);
pFrame->SetPose(pose);//优化完毕后的位姿
return nInitialCorrespondences-nBad;
}
2、帧到地图的匹配
在一般情况下,我们会基于之前的一段局部地图再次进行优化。TrackLocalMap的作用是根据初步优化后的位姿,在地图中寻找与其相近的关键帧,再基于这一步的位姿确定其他关键帧的共视关系,将有可能看见的其他帧的地图点再次进行匹配,从而进行更大范围的优化。
bool Tracking::TrackLocalMap()
{
// We have an estimation of the camera pose and some map points tracked in the frame.
// We retrieve the local map and try to find matches to points in the local map.
UpdateLocalMap();
SearchLocalPoints();
// Optimize Pose
Optimizer::PoseOptimization(&mCurrentFrame);
mnMatchesInliers = 0;
// Update MapPoints Statistics
for(int i=0; iIncreaseFound();
if(!mbOnlyTracking)
{
if(mCurrentFrame.mvpMapPoints[i]->Observations()>0)
mnMatchesInliers++;
}
else
mnMatchesInliers++;
}
else if(mSensor==System::STEREO)
mCurrentFrame.mvpMapPoints[i] = static_cast(NULL);
}
}
// Decide if the tracking was succesful
// More restrictive if there was a relocalization recently
if(mCurrentFrame.mnId
二、重定位
当mState变为LOST时将进行重定位。在这里重定位主要是利用pnp(epnp)的方式求解,我们可以看到首先是利用本帧的词袋模型在所有关键帧中遍历,将适合的关键帧挑选出来再进行优化计算,如果认定当前帧和某关键帧匹配则立即结束遍历,将位姿跳到该关键帧附近。
bool Tracking::Relocalization()
{
// Compute Bag of Words Vector
mCurrentFrame.ComputeBoW();//计算词袋模型
// Relocalization is performed when tracking is lost
// Track Lost: Query KeyFrame Database for keyframe candidates for relocalisation
vector vpCandidateKFs = mpKeyFrameDB->DetectRelocalizationCandidates(&mCurrentFrame);//在所有关键帧中遍历查找阈值以上的适用帧,一并取出留用
if(vpCandidateKFs.empty())
return false;
const int nKFs = vpCandidateKFs.size();
// We perform first an ORB matching with each candidate
// If enough matches are found we setup a PnP solver
ORBmatcher matcher(0.75,true);
vector vpPnPsolvers;
vpPnPsolvers.resize(nKFs);
vector > vvpMapPointMatches;
vvpMapPointMatches.resize(nKFs);
vector vbDiscarded;
vbDiscarded.resize(nKFs);//做好丢弃和继续计算两手准备
int nCandidates=0;
for(int i=0; iisBad())
vbDiscarded[i] = true;
else
{
int nmatches = matcher.SearchByBoW(pKF,mCurrentFrame,vvpMapPointMatches[i]);
if(nmatches<15)
{
vbDiscarded[i] = true;
continue;
}
else
{
PnPsolver* pSolver = new PnPsolver(mCurrentFrame,vvpMapPointMatches[i]);
pSolver->SetRansacParameters(0.99,10,300,4,0.5,5.991);
vpPnPsolvers[i] = pSolver;
nCandidates++;
}
}
}
// Alternatively perform some iterations of P4P RANSAC
// Until we found a camera pose supported by enough inliers
bool bMatch = false;
ORBmatcher matcher2(0.9,true);
while(nCandidates>0 && !bMatch)
{
for(int i=0; i vbInliers;
int nInliers;
bool bNoMore;
PnPsolver* pSolver = vpPnPsolvers[i];
//使用Epnp的方法进行计算,由于我只会暴力法就先跳过具体计算。。。
cv::Mat Tcw = pSolver->iterate(5,bNoMore,vbInliers,nInliers);
// If Ransac reachs max. iterations discard keyframe
if(bNoMore)
{
vbDiscarded[i]=true;
nCandidates--;
}
// If a Camera Pose is computed, optimize
if(!Tcw.empty())
{
Tcw.copyTo(mCurrentFrame.mTcw);
set sFound;//设计一个地图点容器,将可能满足的点加入,等待进一步优化
const int np = vbInliers.size();
for(int j=0; j(NULL);
// If few inliers, search by projection in a coarse window and optimize again
//对于有抢救价值的关键帧进行反复投影并重新采样求匹配的特征点
if(nGood<50)
{
int nadditional =matcher2.SearchByProjection(mCurrentFrame,vpCandidateKFs[i],sFound,10,100);
if(nadditional+nGood>=50)
{
nGood = Optimizer::PoseOptimization(&mCurrentFrame);
// If many inliers but still not enough, search by projection again in a narrower window
// the camera has been already optimized with many points
if(nGood>30 && nGood<50)
{
sFound.clear();
for(int ip =0; ip=50)
{
nGood = Optimizer::PoseOptimization(&mCurrentFrame);
for(int io =0; io=50)
{
bMatch = true;
break;
}
}
}
}
if(!bMatch)
{
return false;
}
else
{
mnLastRelocFrameId = mCurrentFrame.mnId;
return true;
}
}
总地来说,Tracking部分是基于特征点的匹配进一步提供了相机的位姿估计,同时也为建图和后端提供了未经处理的地图点和关键帧,是整个orb-slam系统的骨干部分,在熟悉了它之后,剩下的部分也就迎刃而解。