因为在理解LK光流opencv源码的过程中,没有找到相关的资料,因此将自己理解的过程及思考贴出来,希望可以记录并便于大家查阅,如果大家对代码有不同的理解或想法,欢迎评论讨论~
LK光流算法由Jean - Yves Bouguet提出,该算法是基于亮度恒定、时间连续、空间具有一致性的前提下,提出的一种基于特征点的追踪算法。该算法是将求两帧间特征点的最小误差值应用到图像上进而转换为求图像微分、差分的一种迭代算法。
在理解源码时,需要提前掌握LK光流算法的数学原理,否则阅读起来是比较繁难的。
Mat old_frame, old_gray;
vector p0, p1;
// Take first frame and find corners in it
capture >> old_frame;
cvtColor(old_frame, old_gray, COLOR_BGR2GRAY);
//根据第一帧图像和参数设置,获得第一帧图像的特征点
goodFeaturesToTrack(old_gray, p0, 100, 0.3, 7, Mat(), 7, false, 0.04);
// Create a mask image for drawing purposes
Mat mask = Mat::zeros(old_frame.size(), old_frame.type());
while(true){
Mat frame, frame_gray;
capture >> frame;
if (frame.empty())
break;
cvtColor(frame, frame_gray, COLOR_BGR2GRAY);
// calculate optical flow
vector status;
vector err;
//设置光流迭代终止标准,10为迭代次数,0.03为迭代终止标准
TermCriteria criteria = TermCriteria((TermCriteria::COUNT) + (TermCriteria::EPS), 10, 0.03);
//光流算法,输入前一帧,后一帧,前一帧特征点,后一帧特征点,特征点状态以及微分窗口大小等
calcOpticalFlowPyrLK(old_gray, frame_gray, p0, p1, status, err, Size(15,15), 2, criteria);
vector good_new;
for(uint i = 0; i < p0.size(); i++)
{
// Select good points
if(status[i] == 1) {//如果特征点状态为1,则进行绘制
good_new.push_back(p1[i]);
// draw the tracks
line(mask,p1[i], p0[i], colors[i], 2);
circle(frame, p1[i], 5, colors[i], -1);
}
}
Mat img;
add(frame, mask, img);//将绘制出的mask叠加到frame上
imshow("Frame", img);
int keyboard = waitKey(30);
if (keyboard == 'q' || keyboard == 27)
break;
// Now update the previous frame and previous points
old_gray = frame_gray.clone();
p0 = good_new;
}
以上是opencv的官方用例摘自:d:\opencv\opencv3.4.7\opencv3.4.7\samples\cpp\tutorial_code\video\optical_flow\optical_flow.cpp
用例中最为核心的两句代码:goodFeaturesToTrack,calcOpticalFlowPyrLK;其中goodFeaturesToTrack负责查找特征点,calcOpticalFlowPyrLK是光流法的主体,我会以calcOpticalFlowPyrLK为开始,对代码进行解析。
2. calcOpticalFlowPyrLK函数详解
void cv::calcOpticalFlowPyrLK( InputArray _prevImg, InputArray _nextImg,
InputArray _prevPts, InputOutputArray _nextPts,
OutputArray _status, OutputArray _err,
Size winSize, int maxLevel,
TermCriteria criteria,
int flags, double minEigThreshold )
{
//初始化光流,包括winSize:微分窗口;maxLevel:金字塔层数;criteria:迭代终止条件
//flag;minEigThreshold:特征值求解的最小阈值,小于该值,G不可逆,特征点不可追踪
Ptr optflow = cv::SparsePyrLKOpticalFlow::create(winSize,maxLevel,criteria,flags,minEigThreshold);
//根据初始条件,计算特征点在下一帧中的追踪位置
optflow->calc(_prevImg,_nextImg,_prevPts,_nextPts,_status,_err);
//_prevImg:上一帧图像,_nextImg:下一帧图像
//_prevPts:_prevImg中的特征点,_nextPts:calc计算后,得出的特征点存放地址
//_status:特征点状态,能否找到该特征点,_err:衡量特征点相似度或误差度
}
其实内层的光流计算代码为:void SparsePyrLKOpticalFlowImpl::calc( InputArray _prevImg, InputArray _nextImg,
InputArray _prevPts, InputOutputArray _nextPts,
OutputArray _status, OutputArray _err)
void SparsePyrLKOpticalFlowImpl::calc( InputArray _prevImg, InputArray _nextImg,
InputArray _prevPts, InputOutputArray _nextPts,
OutputArray _status, OutputArray _err)
{
CV_INSTRUMENT_REGION();
CV_OCL_RUN(ocl::isOpenCLActivated() &&
(_prevImg.isUMat() || _nextImg.isUMat()) &&
ocl::Image2D::isFormatSupported(CV_32F, 1, false),
ocl_calcOpticalFlowPyrLK(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err))
// Disabled due to bad accuracy
CV_OVX_RUN(false,
openvx_pyrlk(_prevImg, _nextImg, _prevPts, _nextPts, _status, _err))
Mat prevPtsMat = _prevPts.getMat();
const int derivDepth = DataType::depth;
CV_Assert( maxLevel >= 0 && winSize.width > 2 && winSize.height > 2 );
int level=0, i, npoints;
CV_Assert( (npoints = prevPtsMat.checkVector(2, CV_32F, true)) >= 0 );
//特征点数量判断
if( npoints == 0 )
{
_nextPts.release();
_status.release();
_err.release();
return;
}
//变量初始化操作
if( !(flags & OPTFLOW_USE_INITIAL_FLOW) )
_nextPts.create(prevPtsMat.size(), prevPtsMat.type(), -1, true);
Mat nextPtsMat = _nextPts.getMat();
CV_Assert( nextPtsMat.checkVector(2, CV_32F, true) == npoints );
const Point2f* prevPts = prevPtsMat.ptr();
Point2f* nextPts = nextPtsMat.ptr();
_status.create((int)npoints, 1, CV_8U, -1, true);
Mat statusMat = _status.getMat(), errMat;
CV_Assert( statusMat.isContinuous() );
uchar* status = statusMat.ptr();
float* err = 0;
//status状态赋值1
for( i = 0; i < npoints; i++ )
status[i] = true;
//err变量初始化
if( _err.needed() )
{
_err.create((int)npoints, 1, CV_32F, -1, true);
errMat = _err.getMat();
CV_Assert( errMat.isContinuous() );
err = errMat.ptr();
}
从内容来看,这一部分主要包括了两大内容,对输入变量的错误断言和变量初始化。可自行欣赏这块的代码。
//prevPyr:存放上一帧的金字塔,nextPyr:存放下一帧金字塔
std::vector prevPyr, nextPyr;
int levels1 = -1;
int lvlStep1 = 1;
int levels2 = -1;
int lvlStep2 = 1;
//如果输入的_prevImg为一组金字塔图像,则进行如下处理
//处理包括:判断输入的金字塔图像是否做了边缘填充,以及重新定义了金字塔的层数
if(_prevImg.kind() == _InputArray::STD_VECTOR_MAT)
{
_prevImg.getMatVector(prevPyr);
levels1 = int(prevPyr.size()) - 1;
CV_Assert(levels1 >= 0);
if (levels1 % 2 == 1 && prevPyr[0].channels() * 2 == prevPyr[1].channels() && prevPyr[1].depth() == derivDepth)
{
lvlStep1 = 2;
levels1 /= 2;
}
// ensure that pyramid has required padding
if(levels1 > 0)
{
Size fullSize;
Point ofs;
prevPyr[lvlStep1].locateROI(fullSize, ofs);
CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
&& ofs.x + prevPyr[lvlStep1].cols + winSize.width <= fullSize.width
&& ofs.y + prevPyr[lvlStep1].rows + winSize.height <= fullSize.height);
}
if(levels1 < maxLevel)
maxLevel = levels1;
}
//_nextImg也进行了相同操作,具体细节可进一步理解代码
if(_nextImg.kind() == _InputArray::STD_VECTOR_MAT)
{
_nextImg.getMatVector(nextPyr);
levels2 = int(nextPyr.size()) - 1;
CV_Assert(levels2 >= 0);
if (levels2 % 2 == 1 && nextPyr[0].channels() * 2 == nextPyr[1].channels() && nextPyr[1].depth() == derivDepth)
{
lvlStep2 = 2;
levels2 /= 2;
}
// ensure that pyramid has required padding
if(levels2 > 0)
{
Size fullSize;
Point ofs;
nextPyr[lvlStep2].locateROI(fullSize, ofs);
CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
&& ofs.x + nextPyr[lvlStep2].cols + winSize.width <= fullSize.width
&& ofs.y + nextPyr[lvlStep2].rows + winSize.height <= fullSize.height);
}
if(levels2 < maxLevel)
maxLevel = levels2;
}
//如果输入为单帧图像,则对_prevImg和_nextImg求图像金字塔,按照maxLevel得到每一层的降采样图像
if (levels1 < 0)
maxLevel = buildOpticalFlowPyramid(_prevImg, prevPyr, winSize, maxLevel, false);
if (levels2 < 0)
maxLevel = buildOpticalFlowPyramid(_nextImg, nextPyr, winSize, maxLevel, false);
这一部分主要是对输入的两帧图像获取其图像金字塔的处理。注意:金字塔中的每一层图像都按照winSize进行了边缘扩充,这为后面越界操作提供前提。
//对迭代终止条件的判断操作,maxCount允许范围为【0,100】
// epsilon允许范围为【0.,10.】
if( (criteria.type & TermCriteria::COUNT) == 0 )
criteria.maxCount = 30;
else
criteria.maxCount = std::min(std::max(criteria.maxCount, 0), 100);
if( (criteria.type & TermCriteria::EPS) == 0 )
criteria.epsilon = 0.01;
else
criteria.epsilon = std::min(std::max(criteria.epsilon, 0.), 10.);
criteria.epsilon *= criteria.epsilon;
这一部分规范了,迭代次数和迭代终止条件,是光流算法中迭代模块的重要参数。
//derivIBuf:这是个很重要的变量,每一层金字塔的微分矩阵都包含于该变量之中
Mat derivIBuf;
if(lvlStep1 == 1)
derivIBuf.create(prevPyr[0].rows + winSize.height*2, prevPyr[0].cols + winSize.width*2, CV_MAKETYPE(derivDepth, prevPyr[0].channels() * 2));
//逐层光流查找
for( level = maxLevel; level >= 0; level-- )
{
//存放每一层图像的微分图
Mat derivI;
//一般情况下lvlStep1是=1的
if(lvlStep1 == 1)
{
//获取当前层图像的size
Size imgSize = prevPyr[level * lvlStep1].size();
//定义一个_derivI(和derivBuf共享内存),高为:imgSize.height + winSize.height*2
//宽为:imgSize.width + winSize.width*2
Mat _derivI( imgSize.height + winSize.height*2,
imgSize.width + winSize.width*2, derivIBuf.type(), derivIBuf.ptr() );
//derivI与_derivI共享内存,derivI从_deriveI的起点(winSize.width, winSize.height)开始,
//derivI宽度为imgSize.width,高度为imgSize.height
derivI = _derivI(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height));
//将当前层的图像求取微分,存放在derivI中
calcSharrDeriv(prevPyr[level * lvlStep1], derivI);
//将derivI的边缘按照winSize进行扩充,扩充后的图像为_deriveI,且扩充像素值为0
copyMakeBorder(derivI, _derivI, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_CONSTANT|BORDER_ISOLATED);
}
else
derivI = prevPyr[level * lvlStep1 + 1];
//两帧金字塔图像尺寸对比
CV_Assert(prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size());
CV_Assert(prevPyr[level * lvlStep1].type() == nextPyr[level * lvlStep2].type());
#ifdef HAVE_TEGRA_OPTIMIZATION
typedef tegra::LKTrackerInvoker LKTrackerInvoker;
#else
typedef cv::detail::LKTrackerInvoker LKTrackerInvoker;
#endif
//实质进行光流算法的主体,LKTrackerInvoker(),该函数中完成了光流算法的具体操作
parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
nextPyr[level * lvlStep2], prevPts, nextPts,
status, err,
winSize, criteria, level, maxLevel,
flags, (float)minEigThreshold));
}
这一部分实现了对金字塔的分层处理,按层采用光流法求取当前层所有特征点的速度。
所以真正意义上完成了光流法的函数为:void cv::detail::LKTrackerInvoker::operator()(const Range& range) const,这个重载运算符()中我们可看到LK光流法在opencv中的具体代码实现过程。
3. operator()函数详解
CV_INSTRUMENT_REGION();
//设置初值,I,J,derivI,halfWin:半个窗口大小
Point2f halfWin((winSize.width-1)*0.5f, (winSize.height-1)*0.5f);
const Mat& I = *prevImg;
const Mat& J = *nextImg;
const Mat& derivI = *prevDeriv;
int j, cn = I.channels(), cn2 = cn*2;
//通过追踪可知:deriv_type为short类型;_buf为3倍的窗口面积大小
cv::AutoBuffer _buf(winSize.area()*(cn + cn2));
int derivDepth = DataType::depth;
//IWinBuf占据了_buf的第一个窗口面积大小的区域
Mat IWinBuf(winSize, CV_MAKETYPE(derivDepth, cn), _buf.data());
//derivIWinBuf占据了_buf后两个窗口面积大小的区域
Mat derivIWinBuf(winSize, CV_MAKETYPE(derivDepth, cn2), _buf.data() + winSize.area()*cn);
第一部分设置了部分初始变量便于后续的计算。
第二部分是光流算法的核心,包括了G的计算,G可逆的判断,b的计算,delta的计算,是光流算法最本质的部分。
//挨个遍历各个特征点,ptidx为特征点的索引
for( int ptidx = range.start; ptidx < range.end; ptidx++ )
{
//将特征点的坐标映射到当前层来
Point2f prevPt = prevPts[ptidx]*(float)(1./(1 << level));
Point2f nextPt;
//最高层金字塔,令nextPt的坐标点等同于prevPt
if( level == maxLevel )
{
if( flags & OPTFLOW_USE_INITIAL_FLOW )
nextPt = nextPts[ptidx]*(float)(1./(1 << level));
else
nextPt = prevPt;
}
//非最高层,令nextPt的坐标*2,高层坐标反映射到低一层坐标上
else
nextPt = nextPts[ptidx]*2.f;
nextPts[ptidx] = nextPt;
首先对图像I中的特征点逐个处理,第一步是获得待处理点的坐标,特征点坐标按照层数进行映射。
//对浮点型的prevPt取整和取浮点计算
Point2i iprevPt, inextPt;
//对当前层特征点坐标向左上移动halfWin个单位,为了计算模板起始位置。
//特征prevPt的G值的计算是通过以prevPt为中心,大小为winSize的模板,因此模板起始点为prevPt-halfWin。
prevPt -= halfWin;
iprevPt.x = cvFloor(prevPt.x);
iprevPt.y = cvFloor(prevPt.y);
//坐标点出界判断
if( iprevPt.x < -winSize.width || iprevPt.x >= derivI.cols ||
iprevPt.y < -winSize.height || iprevPt.y >= derivI.rows )
{
//坐标点在金字塔最底层,即原图中出界,则将该点标志为丢失点,status置为0,err置0
if( level == 0 )
{
if( status )
status[ptidx] = false;
if( err )
err[ptidx] = 0;
}
continue;
}
//根据浮点数值,计算四个坐标二次线性插值的系数
float a = prevPt.x - iprevPt.x;
float b = prevPt.y - iprevPt.y;
const int W_BITS = 14, W_BITS1 = 14;
const float FLT_SCALE = 1.f/(1 << 20);
//opencv中,为了高效的计算效率,对浮点数乘以大数转为整形进行计算
int iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
int iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
int iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
int iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
//eleSize1:一个元素占据的字节数
//step:一行元素占据的字节数
int dstep = (int)(derivI.step/derivI.elemSize1());
int stepI = (int)(I.step/I.elemSize1());
int stepJ = (int)(J.step/J.elemSize1());
acctype iA11 = 0, iA12 = 0, iA22 = 0;
float A11, A12, A22;
//计算矩阵G的三个元素Ix*Ix,Iy*Iy,Ix*Iy
// extract the patch from the first image, compute covariation matrix of derivatives
int x, y;
for( y = 0; y < winSize.height; y++ )
{
const uchar* src = I.ptr() + (y + iprevPt.y)*stepI + iprevPt.x*cn;
const deriv_type* dsrc = derivI.ptr() + (y + iprevPt.y)*dstep + iprevPt.x*cn2;
deriv_type* Iptr = IWinBuf.ptr(y);
deriv_type* dIptr = derivIWinBuf.ptr(y);
x = 0;
for( ; x < winSize.width*cn; x++, dsrc += 2, dIptr += 2 )
{
int ival = CV_DESCALE(src[x]*iw00 + src[x+cn]*iw01 +
src[x+stepI]*iw10 + src[x+stepI+cn]*iw11, W_BITS1-5);
int ixval = CV_DESCALE(dsrc[0]*iw00 + dsrc[cn2]*iw01 +
dsrc[dstep]*iw10 + dsrc[dstep+cn2]*iw11, W_BITS1);
int iyval = CV_DESCALE(dsrc[1]*iw00 + dsrc[cn2+1]*iw01 + dsrc[dstep+1]*iw10 +
dsrc[dstep+cn2+1]*iw11, W_BITS1);
Iptr[x] = (short)ival;//线性插值后的像素灰度值
dIptr[0] = (short)ixval;//Ix
dIptr[1] = (short)iyval;//Iy
iA11 += (itemtype)(ixval*ixval);
iA12 += (itemtype)(ixval*iyval);
iA22 += (itemtype)(iyval*iyval);
}
}
这一部分主要计算了Ix,Iy,以及G中的三个元素。其中Ix,Iy,以及坐标处的灰度值都是由四个特征点二次插值得到。
//对G中的三个元素恢复到原来的尺度
A11 = iA11*FLT_SCALE;
A12 = iA12*FLT_SCALE;
A22 = iA22*FLT_SCALE;
//D计算的是G的行列式值
float D = A11*A22 - A12*A12;
//minEig计算的是一元二次方程的根
float minEig = (A22 + A11 - std::sqrt((A11-A22)*(A11-A22) +
4.f*A12*A12))/(2*winSize.width*winSize.height);
if( err && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) != 0 )
err[ptidx] = (float)minEig;
//若行列式值!=0,则G可逆;若根值小于某个固定值,则有根,有根矩阵可逆
if( minEig < minEigThreshold || D < FLT_EPSILON )
{
if( level == 0 && status )
status[ptidx] = false;
continue;
}
D = 1.f/D;
这部分主要对特征点对应的G进行了可逆判断,如果可逆则该特征点可追踪,否则该特征点状态置0,视为丢失点。
//nextPt也移动至模板开始的位置
nextPt -= halfWin;
Point2f prevDelta;
//对特征点进行迭代计算
for( j = 0; j < criteria.maxCount; j++ )
{
//先对图像J上的特征点位置取整并进行越界判断
inextPt.x = cvFloor(nextPt.x);
inextPt.y = cvFloor(nextPt.y);
if( inextPt.x < -winSize.width || inextPt.x >= J.cols ||
inextPt.y < -winSize.height || inextPt.y >= J.rows )
{
if( level == 0 && status )
status[ptidx] = false;
break;
}
//求解四个特征点的二次线性插值系数
a = nextPt.x - inextPt.x;
b = nextPt.y - inextPt.y;
iw00 = cvRound((1.f - a)*(1.f - b)*(1 << W_BITS));
iw01 = cvRound(a*(1.f - b)*(1 << W_BITS));
iw10 = cvRound((1.f - a)*b*(1 << W_BITS));
iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
acctype ib1 = 0, ib2 = 0;
float b1, b2;
for( y = 0; y < winSize.height; y++ )
{
const uchar* Jptr = J.ptr() + (y + inextPt.y)*stepJ + inextPt.x*cn;
const deriv_type* Iptr = IWinBuf.ptr(y);
const deriv_type* dIptr = derivIWinBuf.ptr(y);
x = 0;
for( ; x < winSize.width*cn; x++, dIptr += 2 )
{
//I,J两个特征点求模板范围内的累计差值
int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11,
W_BITS1-5) - Iptr[x];
//计算矩阵b
ib1 += (itemtype)(diff*dIptr[0]);
ib2 += (itemtype)(diff*dIptr[1]);
}
}
b1 = ib1*FLT_SCALE;
b2 = ib2*FLT_SCALE;
//根据G的逆*b得到特征点的速度v(delta)
Point2f delta( (float)((A12*b2 - A22*b1) * D),
(float)((A12*b1 - A11*b2) * D));//计算v
//delta = -delta;
//给J中的特征点叠加一个偏移delta
nextPt += delta;
//将更新后的位置坐标保存起来
nextPts[ptidx] = nextPt + halfWin;
//当delta偏移值很小时,可跳出迭代,认为该delta就是最终的delta
if( delta.ddot(delta) <= criteria.epsilon )
break;
//如果前后两次delta变化比较小,也可认为已经找到delta了
if( j > 0 && std::abs(delta.x + prevDelta.x) < 0.01 &&
std::abs(delta.y + prevDelta.y) < 0.01 )
{
nextPts[ptidx] -= delta*0.5f;
break;
}
prevDelta = delta;
}
这一部分根据光流法求最优delta,保证图像I,J之间的特征点的误差值最小。程序到了这一环节,光流法求特征点已经计算完成。
CV_Assert(status != NULL);
if( status[ptidx] && err && level == 0 && (flags & OPTFLOW_LK_GET_MIN_EIGENVALS) == 0 )
{
Point2f nextPoint = nextPts[ptidx] - halfWin;
Point inextPoint;
inextPoint.x = cvFloor(nextPoint.x);
inextPoint.y = cvFloor(nextPoint.y);
if( inextPoint.x < -winSize.width || inextPoint.x >= J.cols ||
inextPoint.y < -winSize.height || inextPoint.y >= J.rows )
{
if( status )
status[ptidx] = false;
continue;
}
float aa = nextPoint.x - inextPoint.x;
float bb = nextPoint.y - inextPoint.y;
iw00 = cvRound((1.f - aa)*(1.f - bb)*(1 << W_BITS));
iw01 = cvRound(aa*(1.f - bb)*(1 << W_BITS));
iw10 = cvRound((1.f - aa)*bb*(1 << W_BITS));
iw11 = (1 << W_BITS) - iw00 - iw01 - iw10;
float errval = 0.f;
for( y = 0; y < winSize.height; y++ )
{
const uchar* Jptr = J.ptr() + (y + inextPoint.y)*stepJ + inextPoint.x*cn;
const deriv_type* Iptr = IWinBuf.ptr(y);
for( x = 0; x < winSize.width*cn; x++ )
{
//特征点在I、J上的误差累计值
int diff = CV_DESCALE(Jptr[x]*iw00 + Jptr[x+cn]*iw01 +
Jptr[x+stepJ]*iw10 + Jptr[x+stepJ+cn]*iw11,
W_BITS1-5) - Iptr[x];
errval += std::abs((float)diff);
}
}
//得到最终的err,这个值可根据自己经验去使用,是衡量两个特征点的误差指标
err[ptidx] = errval * 1.f/(32*winSize.width*cn*winSize.height);//计算最后一层的误差值
}
}
在第三部分,给出了I、J图像上,特征点对之间的误差指标,这个指标可根据我们的经验去使用。
以上是opencv-LK光流算法的源码解析,这个过程中需要理解光流算法数学原理、光路算法流程图、线性代数相关的计算,但对代码的解释基本完成。如果有不同的看法或意见,欢迎大家积极讨论~