光流(optical flow)法是目前分析运动图像比较重要的方法,用来指时变图像的运动速度,是根据运动的物体,对应到图像中亮度模式也在运动得到的。
常用的一些读取文件操作,通常驱动会把相机映射为一个文件。
- fopen
- fseek
- fread
- fclose
- fwrite
角点是计算机视觉中获取图像特征的一种方法,广泛应用于运动检测、图像匹配、视觉跟踪、三维重建等领域。
角点定义方法有多种,我们这里以两条边的交点,常见的方法有Harris算子、Shi-Tomasi算子等。
Shi-Tomasi算子是1994年在《Good Features to Track》论文中被提出,OpenCV对应的算法函数命名为 goodFeaturesToTrack,下面我们就来看看该函数的实现原理。
`goodFeaturesToTrack()`
***********************************************************
C++: void goodFeaturesToTrack(
InputArray image,
OutputArray corners,
int maxCorners,
double qualityLevel,
double minDistance,
InputArray mask=noArray(),
int blockSize=3,
bool useHarrisDetector=false,
double k=0.04 )
Parameters:
image – 输入 8-bit or floating-point 32-bit, single-channel 图像.
corners – 输出 vector of detected 角点.
maxCorners – 返回最大值 of corners. If there are more corners than are found, the strongest of them is returned.
qualityLevel – Parameter characterizing the 最小特征值 of 图像角点. 例如, if the best corner has the quality measure = 1500, and the qualityLevel=0.01 , then all the corners with the quality measure less than 15 are rejected.
minDistance – 最小 possible 欧拉距离 between the returned 角点.
mask – Optional 感兴趣区域. If the image is not empty (it needs to have the type CV_8UC1 and the same size as image ), it specifies the region in which the corners are detected.
blockSize – Size of an average block for computing a 导数自相关矩阵 over each pixel neighborhood. See cornerEigenValsAndVecs() .
useHarrisDetector – Parameter indicating 是否使用 Harris detector (see cornerHarris()) or cornerMinEigenVal().
k – Free parameter of the Harris detector.
关键一点,在该项目中,该 goodFeaturesToTrack
被用来初始化光流跟踪的特征点。goodFeaturesToTrack
具体实现代码如下:
void cv::goodFeaturesToTrack( InputArray _image,
OutputArray _corners,
int maxCorners,
double minDistance,
InputArray _mask,
int blockSize,
bool useHarrisDetector,
double harrisK )
{
Mat image = _image.getMat(), mask = _mask.getMat();
CV_Assert( qualityLevel > 0 && minDistance >= 0 && maxCorners >= 0 );
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == image.size()) );
Mat eig, tmp;
// 默认采用 cornerMinEigenVal
if( useHarrisDetector )
cornerHarris( image, eig, blockSize, 3, harrisK );
else
cornerMinEigenVal( image, eig, blockSize, 3 );
double maxVal = 0;
minMaxLoc( eig, 0, &maxVal, 0, 0, mask );
threshold( eig, eig, maxVal*qualityLevel, 0, THRESH_TOZERO );
dilate( eig, tmp, Mat());
Size imgsize = image.size();
vector<const float*> tmpCorners;
// collect list of pointers to features - put them into temporary image
for( int y = 1; y < imgsize.height - 1; y++ )
{
const float* eig_data = (const float*)eig.ptr(y);
const float* tmp_data = (const float*)tmp.ptr(y);
const uchar* mask_data = mask.data ? mask.ptr(y) : 0;
for( int x = 1; x < imgsize.width - 1; x++ )
{
float val = eig_data[x];
if( val != 0 && val == tmp_data[x] && (!mask_data || mask_data[x]) )
tmpCorners.push_back(eig_data + x);
}
}
sort( tmpCorners, greaterThanPtr<float>() );
vector corners;
size_t i, j, total = tmpCorners.size(), ncorners = 0;
if(minDistance >= 1)
{
// Partition the image into larger grids
int w = image.cols;
int h = image.rows;
const int cell_size = cvRound(minDistance);
const int grid_width = (w + cell_size - 1) / cell_size;
const int grid_height = (h + cell_size - 1) / cell_size;
std::vector > grid(grid_width*grid_height);
minDistance *= minDistance;
for( i = 0; i < total; i++ )
{
int ofs = (int)((const uchar*)tmpCorners[i] - eig.data);
int y = (int)(ofs / eig.step);
int x = (int)((ofs - y*eig.step)/sizeof(float));
bool good = true;
int x_cell = x / cell_size;
int y_cell = y / cell_size;
int x1 = x_cell - 1;
int y1 = y_cell - 1;
int x2 = x_cell + 1;
int y2 = y_cell + 1;
// boundary check
x1 = std::max(0, x1);
y1 = std::max(0, y1);
x2 = std::min(grid_width-1, x2);
y2 = std::min(grid_height-1, y2);
for( int yy = y1; yy <= y2; yy++ )
{
for( int xx = x1; xx <= x2; xx++ )
{
vector &m = grid[yy*grid_width + xx];
if( m.size() )
{
for(j = 0; j < m.size(); j++)
{
float dx = x - m[j].x;
float dy = y - m[j].y;
if( dx*dx + dy*dy < minDistance )
{
good = false;
goto break_out;
}
}
}
}
}
break_out:
if(good)
{
// printf("%d: %d %d -> %d %d, %d, %d -- %d %d %d %d, %d %d, c=%d\n",
// i,x, y, x_cell, y_cell, (int)minDistance, cell_size,x1,y1,x2,y2, grid_width,grid_height,c);
grid[y_cell*grid_width + x_cell].push_back(Point2f((float)x, (float)y));
corners.push_back(Point2f((float)x, (float)y));
++ncorners;
if( maxCorners > 0 && (int)ncorners == maxCorners )
break;
}
}
}
else
{
for( i = 0; i < total; i++ )
{
int ofs = (int)((const uchar*)tmpCorners[i] - eig.data);
int y = (int)(ofs / eig.step);
int x = (int)((ofs - y*eig.step)/sizeof(float));
corners.push_back(Point2f((float)x, (float)y));
++ncorners;
if( maxCorners > 0 && (int)ncorners == maxCorners )
break;
}
}
Mat(corners).convertTo(_corners, _corners.fixedType() ? _corners.type() : CV_32F);
/*
for( i = 0; i < total; i++ )
{
int ofs = (int)((const uchar*)tmpCorners[i] - eig.data);
int y = (int)(ofs / eig.step);
int x = (int)((ofs - y*eig.step)/sizeof(float));
if( minDistance > 0 )
{
for( j = 0; j < ncorners; j++ )
{
float dx = x - corners[j].x;
float dy = y - corners[j].y;
if( dx*dx + dy*dy < minDistance )
break;
}
if( j < ncorners )
continue;
}
corners.push_back(Point2f((float)x, (float)y));
++ncorners;
if( maxCorners > 0 && (int)ncorners == maxCorners )
break;
}
*/
}
中间函数,关键看这个 cornerEigenValsVecs
函数:
// 后面会用到
enum { MINEIGENVAL=0, HARRIS=1, EIGENVALSVECS=2 };
// 自相关函数
void cv::cornerMinEigenVal( InputArray _src,
OutputArray _dst,
int blockSize,
int ksize,
int borderType )
{
Mat src = _src.getMat();
_dst.create( src.size(), CV_32F );
Mat dst = _dst.getMat();
cornerEigenValsVecs( src, dst, blockSize, ksize, MINEIGENVAL, 0, borderType );
}
通过 cornerEigenValsVecs
计算图像二阶导数的自相关矩阵:
static void cornerEigenValsVecs( const Mat& src,
Mat& eigenv,
int block_size,
int aperture_size,
double k=0.,
int borderType=BORDER_DEFAULT )
{
#ifdef HAVE_TEGRA_OPTIMIZATION
if (tegra::cornerEigenValsVecs(src, eigenv, block_size, aperture_size, op_type, k, borderType))
return;
#endif
int depth = src.depth();
double scale = (double)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size;
if( aperture_size < 0 )
scale *= 2.;
if( depth == CV_8U )
scale *= 255.;
scale = 1./scale;
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_32FC1 );
Mat Dx, Dy;
// 孔径尺寸大于零
if( aperture_size > 0 )
{
Sobel( src, Dx, CV_32F, 1, 0, aperture_size, scale, 0, borderType );
Sobel( src, Dy, CV_32F, 0, 1, aperture_size, scale, 0, borderType );
}
else
{
Scharr( src, Dx, CV_32F, 1, 0, scale, 0, borderType );
Scharr( src, Dy, CV_32F, 0, 1, scale, 0, borderType );
}
Size size = src.size();
Mat cov( size, CV_32FC3 );
int i, j;
for( i = 0; i < size.height; i++ )
{
float* cov_data = (float*)(cov.data + i*cov.step);
const float* dxdata = (const float*)(Dx.data + i*Dx.step);
const float* dydata = (const float*)(Dy.data + i*Dy.step);
for( j = 0; j < size.width; j++ )
{
float dx = dxdata[j];
float dy = dydata[j];
cov_data[j*3] = dx*dx;
cov_data[j*3+1] = dx*dy;
cov_data[j*3+2] = dy*dy;
}
}
// 滤波操作
boxFilter(cov, cov, cov.depth(), Size(block_size, block_size),
Point(-1,-1), false, borderType );
if( op_type == MINEIGENVAL )
calcMinEigenVal( cov, eigenv );
else if( op_type == HARRIS )
calcHarris( cov, eigenv, k );
else if( op_type == EIGENVALSVECS )
calcEigenValsVecs( cov, eigenv );
}
按照规定的参数设置,特征点提取到此完成,我们可以通过 imshow
函数验证结果。
计算一个稀疏特征光流使用Lucas Kanade迭代法与金字塔。OpenCV 函数为 calcOpticalFlowPyrLK
。这里通过prevImg定位特征点和 nextImg对物体的跟踪那么处理第二帧的时候这两个参数都会有了,可以进行跟踪了。
该函数实现的原理来自于Jean-Yves Bouguet的论文“Pyramidal Implementation of the Lucas Kanade Feature Tracker Description of the algorithm”中提出的基于金字塔Lucas Kanade光流法。
该算法基于三个假设条件:
- 亮度恒定
- 时间连续或者运动是“小运动”
- 空间一致
接下来就是通过这三点假设构建一个有效的跟踪算法。
具体实现代码如下所示:
//! computes sparse optical flow using multi-scale Lucas-Kanade algorithm
CV_EXPORTS_W void calcOpticalFlowPyrLK( InputArray prevImg,
InputArray nextImg,
InputArray prevPts,
CV_OUT InputOutputArray nextPts,
OutputArray status,
OutputArray err,
Size winSize=Size(21,21),
int maxLevel=3,
TermCriteria criteria=TermCriteria(TermCriteria::COUNT+TermCriteria::EPS, 30, 0.01),
double minEigThreshold=1e-4);
Parameters:
prevImg – first 8-bit 输入图像 or 金字塔.
nextImg – second 输入图像 or pyramid of the same size and the same type as prevImg.
prevPts – vector of 2D 角点 for which the flow needs to be found;
nextPts – output vector of 2D 角点 containing the calculated new positions of input features in the second image;
status – output 状态 vector (of unsigned chars);
err – output vector of 错误;
winSize – size of the 搜索窗口 at each pyramid level.
maxLevel – 0-based maximal pyramid level number; if set to 0, pyramids are not used (single level), if set to 1, two levels are used, and so on;
criteria – parameter, specifying the termination criteria of the iterative search algorithm
flags – 操作标志:
OPTFLOW_USE_INITIAL_FLOW uses initial estimations, stored in nextPts;
OPTFLOW_LK_GET_MIN_EIGENVALS use minimum eigen values as an error measure ;
minEigThreshold – the algorithm calculates the minimum eigen value of a 2x2 normal matrix of optical flow equations.
void cv::calcOpticalFlowPyrLK( InputArray _prevImg,
InputArray _nextImg,
InputArray _prevPts,
InputOutputArray _nextPts,
OutputArray _status,
OutputArray _err,
Size winSize,
int maxLevel,
TermCriteria criteria,
int flags,
double minEigThreshold )
{
Mat prevPtsMat = _prevPts.getMat();
const int derivDepth = DataType::depth;
CV_Assert( maxLevel >= 0 && winSize.width > 2 && winSize.height > 2 );
int level=0, i, npoints;
CV_Assert( (npoints = prevPtsMat.checkVector(2, CV_32F, true)) >= 0 );
if( npoints == 0 )
{
_nextPts.release();
_status.release();
_err.release();
return;
}
if( !(flags & OPTFLOW_USE_INITIAL_FLOW) )
_nextPts.create(prevPtsMat.size(), prevPtsMat.type(), -1, true);
Mat nextPtsMat = _nextPts.getMat();
CV_Assert( nextPtsMat.checkVector(2, CV_32F, true) == npoints );
const Point2f* prevPts = (const Point2f*)prevPtsMat.data;
Point2f* nextPts = (Point2f*)nextPtsMat.data;
_status.create((int)npoints, 1, CV_8U, -1, true);
Mat statusMat = _status.getMat(), errMat;
CV_Assert( statusMat.isContinuous() );
uchar* status = statusMat.data;
float* err = 0;
for( i = 0; i < npoints; i++ )
status[i] = true;
if( _err.needed() )
{
_err.create((int)npoints, 1, CV_32F, -1, true);
errMat = _err.getMat();
CV_Assert( errMat.isContinuous() );
err = (float*)errMat.data;
}
vector prevPyr, nextPyr;
int levels1 = -1;
int lvlStep1 = 1;
int levels2 = -1;
int lvlStep2 = 1;
if(_prevImg.kind() == _InputArray::STD_VECTOR_MAT)
{
_prevImg.getMatVector(prevPyr);
levels1 = int(prevPyr.size()) - 1;
CV_Assert(levels1 >= 0);
if (levels1 % 2 == 1 && prevPyr[0].channels() * 2 == prevPyr[1].channels() && prevPyr[1].depth() == derivDepth)
{
lvlStep1 = 2;
levels1 /= 2;
}
// ensure that pyramid has reqired padding
if(levels1 > 0)
{
Size fullSize;
Point ofs;
prevPyr[lvlStep1].locateROI(fullSize, ofs);
CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
&& ofs.x + prevPyr[lvlStep1].cols + winSize.width <= fullSize.width
&& ofs.y + prevPyr[lvlStep1].rows + winSize.height <= fullSize.height);
}
if(levels1 < maxLevel)
maxLevel = levels1;
}
if(_nextImg.kind() == _InputArray::STD_VECTOR_MAT)
{
_nextImg.getMatVector(nextPyr);
levels2 = int(nextPyr.size()) - 1;
CV_Assert(levels2 >= 0);
if (levels2 % 2 == 1 && nextPyr[0].channels() * 2 == nextPyr[1].channels() && nextPyr[1].depth() == derivDepth)
{
lvlStep2 = 2;
levels2 /= 2;
}
// ensure that pyramid has reqired padding
if(levels2 > 0)
{
Size fullSize;
Point ofs;
nextPyr[lvlStep2].locateROI(fullSize, ofs);
CV_Assert(ofs.x >= winSize.width && ofs.y >= winSize.height
&& ofs.x + nextPyr[lvlStep2].cols + winSize.width <= fullSize.width
&& ofs.y + nextPyr[lvlStep2].rows + winSize.height <= fullSize.height);
}
if(levels2 < maxLevel)
maxLevel = levels2;
}
if (levels1 < 0)
maxLevel = buildOpticalFlowPyramid(_prevImg, prevPyr, winSize, maxLevel, false);
if (levels2 < 0)
maxLevel = buildOpticalFlowPyramid(_nextImg, nextPyr, winSize, maxLevel, false);
if( (criteria.type & TermCriteria::COUNT) == 0 )
criteria.maxCount = 30;
else
criteria.maxCount = std::min(std::max(criteria.maxCount, 0), 100);
if( (criteria.type & TermCriteria::EPS) == 0 )
criteria.epsilon = 0.01;
else
criteria.epsilon = std::min(std::max(criteria.epsilon, 0.), 10.);
criteria.epsilon *= criteria.epsilon;
// dI/dx ~ Ix, dI/dy ~ Iy
Mat derivIBuf;
if(lvlStep1 == 1)
derivIBuf.create(prevPyr[0].rows + winSize.height*2, prevPyr[0].cols + winSize.width*2, CV_MAKETYPE(derivDepth, prevPyr[0].channels() * 2));
for( level = maxLevel; level >= 0; level-- )
{
Mat derivI;
if(lvlStep1 == 1)
{
Size imgSize = prevPyr[level * lvlStep1].size();
Mat _derivI( imgSize.height + winSize.height*2,
imgSize.width + winSize.width*2, derivIBuf.type(), derivIBuf.data );
derivI = _derivI(Rect(winSize.width, winSize.height, imgSize.width, imgSize.height));
calcSharrDeriv(prevPyr[level * lvlStep1], derivI);
copyMakeBorder(derivI, _derivI, winSize.height, winSize.height, winSize.width, winSize.width, BORDER_CONSTANT|BORDER_ISOLATED);
}
else
derivI = prevPyr[level * lvlStep1 + 1];
CV_Assert(prevPyr[level * lvlStep1].size() == nextPyr[level * lvlStep2].size());
CV_Assert(prevPyr[level * lvlStep1].type() == nextPyr[level * lvlStep2].type());
#ifdef HAVE_TEGRA_OPTIMIZATION
typedef tegra::LKTrackerInvoker LKTrackerInvoker;
#else
typedef cv::detail::LKTrackerInvoker LKTrackerInvoker;
#endif
parallel_for_(Range(0, npoints), LKTrackerInvoker(prevPyr[level * lvlStep1], derivI,
nextPyr[level * lvlStep2], prevPts, nextPts,
status, err,
winSize, criteria, level, maxLevel,
flags, (float)minEigThreshold));
}
}
//! constructs a pyramid which can be used as input for calcOpticalFlowPyrLK
CV_EXPORTS_W int buildOpticalFlowPyramid(InputArray img,
OutputArrayOfArrays pyramid,
Size winSize,
int maxLevel,
bool withDerivatives = true,
int pyrBorder = BORDER_REFLECT_101,
int derivBorder = BORDER_CONSTANT,
bool tryReuseInputImage = true);
int cv::buildOpticalFlowPyramid(InputArray _img, OutputArrayOfArrays pyramid, Size winSize, int maxLevel, bool withDerivatives,
int pyrBorder, int derivBorder, bool tryReuseInputImage)
{
Mat img = _img.getMat();
CV_Assert(img.depth() == CV_8U && winSize.width > 2 && winSize.height > 2 );
int pyrstep = withDerivatives ? 2 : 1;
pyramid.create(1, (maxLevel + 1) * pyrstep, 0 /*type*/, -1, true, 0);
int derivType = CV_MAKETYPE(DataType::depth, img.channels() * 2);
//level 0
bool lvl0IsSet = false;
if(tryReuseInputImage && img.isSubmatrix() && (pyrBorder & BORDER_ISOLATED) == 0)
{
Size wholeSize;
Point ofs;
img.locateROI(wholeSize, ofs);
if (ofs.x >= winSize.width && ofs.y >= winSize.height
&& ofs.x + img.cols + winSize.width <= wholeSize.width
&& ofs.y + img.rows + winSize.height <= wholeSize.height)
{
pyramid.getMatRef(0) = img;
lvl0IsSet = true;
}
}
if(!lvl0IsSet)
{
Mat& temp = pyramid.getMatRef(0);
if(!temp.empty())
temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
if(temp.type() != img.type() || temp.cols != winSize.width*2 + img.cols || temp.rows != winSize.height * 2 + img.rows)
temp.create(img.rows + winSize.height*2, img.cols + winSize.width*2, img.type());
if(pyrBorder == BORDER_TRANSPARENT)
img.copyTo(temp(Rect(winSize.width, winSize.height, img.cols, img.rows)));
else
copyMakeBorder(img, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder);
temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
}
Size sz = img.size();
Mat prevLevel = pyramid.getMatRef(0);
Mat thisLevel = prevLevel;
for(int level = 0; level <= maxLevel; ++level)
{
if (level != 0)
{
Mat& temp = pyramid.getMatRef(level * pyrstep);
if(!temp.empty())
temp.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
if(temp.type() != img.type() || temp.cols != winSize.width*2 + sz.width || temp.rows != winSize.height * 2 + sz.height)
temp.create(sz.height + winSize.height*2, sz.width + winSize.width*2, img.type());
thisLevel = temp(Rect(winSize.width, winSize.height, sz.width, sz.height));
pyrDown(prevLevel, thisLevel, sz);
if(pyrBorder != BORDER_TRANSPARENT)
copyMakeBorder(thisLevel, temp, winSize.height, winSize.height, winSize.width, winSize.width, pyrBorder|BORDER_ISOLATED);
temp.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
}
if(withDerivatives)
{
Mat& deriv = pyramid.getMatRef(level * pyrstep + 1);
if(!deriv.empty())
deriv.adjustROI(winSize.height, winSize.height, winSize.width, winSize.width);
if(deriv.type() != derivType || deriv.cols != winSize.width*2 + sz.width || deriv.rows != winSize.height * 2 + sz.height)
deriv.create(sz.height + winSize.height*2, sz.width + winSize.width*2, derivType);
Mat derivI = deriv(Rect(winSize.width, winSize.height, sz.width, sz.height));
calcSharrDeriv(thisLevel, derivI);
if(derivBorder != BORDER_TRANSPARENT)
copyMakeBorder(derivI, deriv, winSize.height, winSize.height, winSize.width, winSize.width, derivBorder|BORDER_ISOLATED);
deriv.adjustROI(-winSize.height, -winSize.height, -winSize.width, -winSize.width);
}
sz = Size((sz.width+1)/2, (sz.height+1)/2);
if( sz.width <= winSize.width || sz.height <= winSize.height )
{
pyramid.create(1, (level + 1) * pyrstep, 0 /*type*/, -1, true, 0);//check this
return level;
}
prevLevel = thisLevel;
}
return maxLevel;
}
函数 calcOpticalFlowPyrLK
实现了金字塔中 Lucas-Kanade 光流计算的稀疏迭代版本。 它根据给出的前一帧特征点坐标计算当前视频帧上的特征点坐标(函数寻找具有子象素精度的坐标值)。
prew_pts = cur_pts;
跟踪流程如上图所示,具体就不做介绍。
通过以下可视化功能,输出图像移动的位置信息。
-circle
-imwrite
-imshow
上述就是基于OpenCV光流跟踪的实现过程,至此,我们学习了如何读取文件,如何调用OpenCV提供的库函数,更重要的是我们掌握了对应库函数实现原理,提高了我们的理论基础。
最后,通过一整套流程,完成了光流的顺利结项。