在这一章节中,针对orb-slam(version1)中关于orb特征的部分进行详细的分析。
在上一章节中,我们知道orb提取的主要过程是:1.FAST提取特征点;2.计算特征点方向;3.计算特征点描述符。
1.FAST提取特征点
orb-slam中是采用这样的方法提取N个FAST特征点:假设期望提取的orb特征点个数是一定的,设为N,首先设定阈值以提取大于N个的FAST特征点,然后根据特征点的响应值排序筛选保留前N个特征点。
如何确定金字塔每一层应该提取的特征点个数?
orb-slam中是在 L=8 个层上分别提取FAST特征,相邻层之间相差缩放系数 k=1.2 ,如果期望提取的总的FAST特征为N,那么分配到第 i 层的特征点个数为 N∗ki−1∗1−k1−kL 。推导公式很简单,假设第一层分配的特征点个数为 N∗p ,即 p 表示第一层总的特征点个数占所有特征点个数的比例,那么第二层至第 L 层的特征点个数分别为 N∗p∗k , N∗p∗k2 … N∗p∗KL−1 ,那么有
如何提取期望数量的FAST特征点?
orb-slam中为了在每一层上提取到期望数量的FAST特征,首先在每一层上提取过量的FAST特征,然后根据特征点的响应值筛选保留前若干个特征点。对于每一层上,如果当前层需要提取的特征点数量为 n (筛选前),那么为了使得某些密集区域的特征点不过于密集,orb-slam将整个图像区域划分为若干个大小相同的cell,并在每个cell上按照FAST阈值提取FAST特征,此时每个cell上提取的FAST特征都是超标的。接下来,orb-slam采用算法对过量的特征点进行筛选过滤,基本原则是对于检测数量过少的cell仍然保持原有检测的特征点,而对于检测数量过多的cell具有优先删减特征点的权利。具体的实现过程建议查看源码,下面的源码中对具体过程有具体的说明。
主要源码:
void ORBextractor::ComputeKeyPoints(vector<vector >& allKeypoints)
{
allKeypoints.resize(nlevels);
float imageRatio = (float)mvImagePyramid[0].cols/mvImagePyramid[0].rows;
for (int level = 0; level < nlevels; ++level)/*for each scale*/
{
const int nDesiredFeatures = mnFeaturesPerLevel[level];/*the desired key points number on current scale*/
/*columns and rows number for current cell*/
const int levelCols = sqrt((float)nDesiredFeatures/(5*imageRatio));
const int levelRows = imageRatio*levelCols;
const int minBorderX = EDGE_THRESHOLD;
const int minBorderY = minBorderX;
const int maxBorderX = mvImagePyramid[level].cols-EDGE_THRESHOLD;
const int maxBorderY = mvImagePyramid[level].rows-EDGE_THRESHOLD;
const int W = maxBorderX - minBorderX;
const int H = maxBorderY - minBorderY;
const int cellW = ceil((float)W/levelCols);/*each cell width*/
const int cellH = ceil((float)H/levelRows);/*each cell height*/
const int nCells = levelRows*levelCols;/*total cell number*/
const int nfeaturesCell = ceil((float)nDesiredFeatures/nCells);/*averaged key points number in one cell*/
vector<vector<vector > > cellKeyPoints(levelRows, vector<vector >(levelCols));
vector<vector<int> > nToRetain(levelRows,vector<int>(levelCols));
vector<vector<int> > nTotal(levelRows,vector<int>(levelCols));
vector<vector<bool> > bNoMore(levelRows,vector<bool>(levelCols,false));
vector<int> iniXCol(levelCols);
vector<int> iniYRow(levelRows);
int nNoMore = 0;
int nToDistribute = 0;
float hY = cellH + 6;/*current cell's y range(add 3 pixels as border)*/
/*extract FAST key points on each cell image*/
int TotalKyPointsNum = 0;
for(int i=0; iconst float iniY = minBorderY + i*cellH - 3;/*initial y coordinate position for current cell*/
iniYRow[i] = iniY;
if(i == levelRows-1)
{
hY = maxBorderY+3-iniY;
if(hY<=0)
continue;
}
float hX = cellW + 6;/*current cell's x range(add 3 pixels as border)*/
for(int j=0; jfloat iniX;
if(i==0)
{
iniX = minBorderX + j*cellW - 3;/*initial x coordinate position for current cell*/
iniXCol[j] = iniX;
}
else
{
iniX = iniXCol[j];
}
if(j == levelCols-1)
{
hX = maxBorderX+3-iniX;
if(hX<=0)
continue;
}
/*get current image patch for cell (i, j)*/
Mat cellImage = mvImagePyramid[level].rowRange(iniY,iniY+hY).colRange(iniX,iniX+hX);
Mat cellMask;
if(!mvMaskPyramid[level].empty())
cellMask = cv::Mat(mvMaskPyramid[level],Rect(iniX,iniY,hX,hY));
cellKeyPoints[i][j].reserve(nfeaturesCell*5);
/*FAST feature extractor:cellImage is the cell patch image, fastTh is the threshold, all extracted key points are stored in cellKeyPoints variable*/
FAST(cellImage,cellKeyPoints[i][j],fastTh,true);
if(cellKeyPoints[i][j].size()<=3)/*if too less key points are detected, then improve the threshold in order to let more key points in*/
{
cellKeyPoints[i][j].clear();
FAST(cellImage,cellKeyPoints[i][j],7,true);
}
if( scoreType == ORB::HARRIS_SCORE )
{
// Compute the Harris cornerness
HarrisResponses(cellImage,cellKeyPoints[i][j], 7, HARRIS_K);
}
const int nKeys = cellKeyPoints[i][j].size();
nTotal[i][j] = nKeys;
TotalKyPointsNum += nKeys;
/*this is for the key points culling algorithm(below)*/
if(nKeys>nfeaturesCell)
{
nToRetain[i][j] = nfeaturesCell;
bNoMore[i][j] = false;
}
else
{
nToRetain[i][j] = nKeys;
nToDistribute += nfeaturesCell-nKeys;
bNoMore[i][j] = true;
nNoMore++;
}
}
}
// Retain by score
/*this is how to decide the retained key points number in each cell.
this code aims to this case: for some cell, this algorithm may extract much more FAST key points, but others may be less,
we hope to extract around 1000(set value) key points for input image, so when much more FAST key points are extracted,
we use the below algorithm to reject extra key points and decide the number of key points each cell should retain.
the variable "nToRetain" means the retained key points number in each cell.
*/
while(nToDistribute>0 && nNoMoreint nNewFeaturesCell = nfeaturesCell + ceil((float)nToDistribute/(nCells-nNoMore));
nToDistribute = 0;
for(int i=0; ifor(int j=0; jif(!bNoMore[i][j])
{
if(nTotal[i][j]>nNewFeaturesCell)
{
nToRetain[i][j] = nNewFeaturesCell;
bNoMore[i][j] = false;
}
else
{
nToRetain[i][j] = nTotal[i][j];
nToDistribute += nNewFeaturesCell-nTotal[i][j];
bNoMore[i][j] = true;
nNoMore++;
}
}
}
}
}
vector & keypoints = allKeypoints[level];/*reference to the keypoints vector on ith pyramid level*/
keypoints.reserve(nDesiredFeatures*2);
const int scaledPatchSize = PATCH_SIZE*mvScaleFactor[level];
// Retain by score and transform coordinates
/*retain the first nToRetain[i][j] key points and transform the key points position from cell patch image to original whole image*/
int afterKeyPointsNum = 0;
for(int i=0; ifor(int j=0; jvector &keysCell = cellKeyPoints[i][j];
KeyPointsFilter::retainBest(keysCell,nToRetain[i][j]);//retain the frist nToRetain[i][j] key points
if((int)keysCell.size()>nToRetain[i][j])
keysCell.resize(nToRetain[i][j]);
for(size_t k=0, kend=keysCell.size(); k/*the actual key points position on the original whole image*/
keysCell[k].pt.x+=iniXCol[j];
keysCell[k].pt.y+=iniYRow[i];
keysCell[k].octave=level;
keysCell[k].size = scaledPatchSize;
keypoints.push_back(keysCell[k]);
}
}
}
/*if still more features are retained, retain the top N(here set 1000) according to the response intensity*/
if((int)keypoints.size()>nDesiredFeatures)
{
KeyPointsFilter::retainBest(keypoints,nDesiredFeatures);/*retain the top N*/
keypoints.resize(nDesiredFeatures);
}
}
// compute key points' orientations
for (int level = 0; level < nlevels; ++level)
computeOrientation(mvImagePyramid[level], allKeypoints[level], umax);
}
2.计算特征点方向
orb特征如何计算每个特征点方向的方法已经在前一章节中进行了说明,这里直接上源码说明,
/*orb feature orientation: compute the centriod direction within a neighbor patch*/
static float IC_Angle(const Mat& image, Point2f pt, const vector<int> & u_max)
{
int m_01 = 0, m_10 = 0;
/*current key point value*/
const uchar* center = &image.at (cvRound(pt.y), cvRound(pt.x));
// Treat the center line differently, v=0 (v is the row id, u is the column id)
for (int u = -HALF_PATCH_SIZE; u <= HALF_PATCH_SIZE; ++u)
m_10 += u * center[u];/*uxI(v,u)*/
// Go line by line in the circular patch
int step = (int)image.step1();
for (int v = 1; v <= HALF_PATCH_SIZE; ++v)
{
// Proceed over the two lines
int v_sum = 0;
int d = u_max[v];
for (int u = -d; u <= d; ++u)
{
int val_plus = center[u + v*step], val_minus = center[u - v*step];
v_sum += (val_plus - val_minus);//here is "minus" but not "add" is bacause v is all positive
m_10 += u * (val_plus + val_minus);//her is "add" but not "minus" is because u is signed
}
m_01 += v * v_sum;
}
return fastAtan2((float)m_01, (float)m_10);
}
3.计算特征点描述符
orb-slam中的orb描述符的具体计算过程在上一章节已经介绍,这里针对其细节进一步的描述。根据上一章节描述的取比较点对的方式取得256个比较点对,每一个比较点对包含两个点坐标,两个坐标上对应的点的值强度大小可以获取到一个描述符位,所以最终得到的orb描述符是256bit,可以用256/8=32个int类型进行存储,所以每个特征点可以用一个32个int的空间来存储其描述符。
static void computeOrbDescriptor(const KeyPoint& kpt,
const Mat& img, const Point* pattern,
uchar* desc)/*pattern is the comparision test position*/
{
float angle = (float)kpt.angle*factorPI;/*key points orientation*/
float a = (float)cos(angle), b = (float)sin(angle);
const uchar* center = &img.at(cvRound(kpt.pt.y), cvRound(kpt.pt.x));
const int step = (int)img.step;
/*steered BRIEF(rotate by the key point's orientation)*/
#define GET_VALUE(idx) \
center[cvRound(pattern[idx].x*b + pattern[idx].y*a)*step + \
cvRound(pattern[idx].x*a - pattern[idx].y*b)]
/*32 int descriptor*/
for (int i = 0; i < 32; ++i, pattern += 16)/*16 patterns=16 points=8 comparisions=8 bits descriptor=1 int descriptor*/
{
int t0, t1, val;
t0 = GET_VALUE(0); t1 = GET_VALUE(1);
val = t0 < t1;
t0 = GET_VALUE(2); t1 = GET_VALUE(3);
val |= (t0 < t1) << 1;//first check whether t0 < t1, then left shift the result, then OR operator with before result.
t0 = GET_VALUE(4); t1 = GET_VALUE(5);
val |= (t0 < t1) << 2;
t0 = GET_VALUE(6); t1 = GET_VALUE(7);
val |= (t0 < t1) << 3;
t0 = GET_VALUE(8); t1 = GET_VALUE(9);
val |= (t0 < t1) << 4;
t0 = GET_VALUE(10); t1 = GET_VALUE(11);
val |= (t0 < t1) << 5;
t0 = GET_VALUE(12); t1 = GET_VALUE(13);
val |= (t0 < t1) << 6;
t0 = GET_VALUE(14); t1 = GET_VALUE(15);
val |= (t0 < t1) << 7;
desc[i] = (uchar)val;
}
#undef GET_VALUE//cancel the defination of variable "GET_VALUE"
}