opencv 4.5.1
opencv contrib 4.5.1
Scale Invariant Feature Transform,尺度不变特征变换匹配算法,是由David G. Lowe在1999年(《Object Recognition from Local Scale-Invariant Features》)提出的高效区域检测算法,在2004年(《Distinctive Image Features from Scale-Invariant Keypoints》)得以完善。SIFT特征对旋转、尺度缩放、亮度变化等保持不变性,是非常稳定的局部特征,现在应用很广泛。
SIFT算法实现的步骤:
- 【1】关键点检测
- 【2】关键点描述
- 【3】关键点匹配
- 【4】消除错误匹配点
步骤:
原始图像(目标图像)–>[特征点检测+特征点描述]–> 目标特征点集合–>特征点匹配–>特征点矫正
源码:opencv_features2d-> src-> sift.dispatch.cpp
//定义一个类,继承自SIFT
class SIFT_Impl : public SIFT
{
public:
//构造函数,实现SIFT的参数初始化
explicit SIFT_Impl( int nfeatures = 0, int nOctaveLayers = 3,
double contrastThreshold = 0.04, double edgeThreshold = 10,
double sigma = 1.6, int descriptorType = CV_32F );
//! returns the descriptor size in floats (128)
int descriptorSize() const CV_OVERRIDE;
//! returns the descriptor type
int descriptorType() const CV_OVERRIDE;
//! returns the default norm type
int defaultNorm() const CV_OVERRIDE;
//! finds the keypoints and computes descriptors for them using SIFT algorithm.
//! Optionally it can compute descriptors for the user-provided keypoints
void detectAndCompute(InputArray img, InputArray mask,
std::vector<KeyPoint>& keypoints,
OutputArray descriptors,
bool useProvidedKeypoints = false) CV_OVERRIDE;
//构建金字塔(金字塔层数和组数相等
void buildGaussianPyramid( const Mat& base, std::vector<Mat>& pyr, int nOctaves ) const;
//构建高斯差分金字塔
void buildDoGPyramid( const std::vector<Mat>& pyr, std::vector<Mat>& dogpyr ) const;
//在尺度空间内寻找极值
void findScaleSpaceExtrema( const std::vector<Mat>& gauss_pyr, const std::vector<Mat>& dog_pyr,
std::vector<KeyPoint>& keypoints ) const;
protected:
CV_PROP_RW int nfeatures;
CV_PROP_RW int nOctaveLayers;
CV_PROP_RW double contrastThreshold;
CV_PROP_RW double edgeThreshold;
CV_PROP_RW double sigma;
CV_PROP_RW int descriptor_type;
};
void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask,
std::vector& keypoints,
OutputArray _descriptors,
bool useProvidedKeypoints)
参数:
_image : 图像
_mask : 掩膜
keypoints :特征向量矩阵
_descriptors :特征点描述的输出向量(如果不需要输出,需要传cv::noArray())。
useProvidedKeypoints: 是否进行特征点检测。ture,则检测特征点;false,只计算图像特征描述。
void buildGaussianPyramid( const Mat& base, std::vector& pyr, int nOctaves ) const;
void buildDoGPyramid( const std::vector& pyr, std::vector& dogpyr )
void findScaleSpaceExtrema( const std::vector& gauss_pyr, const std::vector& dog_pyr, std::vector& keypoints ) const;
// 检测和计算
void SIFT_Impl::detectAndCompute(InputArray _image, InputArray _mask,
std::vector<KeyPoint>& keypoints,
OutputArray _descriptors,
bool useProvidedKeypoints)
{
CV_TRACE_FUNCTION();
int firstOctave = -1, actualNOctaves = 0, actualNLayers = 0;
Mat image = _image.getMat(), mask = _mask.getMat();
// 目标图像
if( image.empty() || image.depth() != CV_8U )
CV_Error( Error::StsBadArg, "image is empty or has incorrect depth (!=CV_8U)" );
// 模板图像
if( !mask.empty() && mask.type() != CV_8UC1 )
CV_Error( Error::StsBadArg, "mask has incorrect type (!=CV_8UC1)" );
//useProvidedKeypoints:是否进行特征点检测。ture,则检测特征点;false,只计算图像特征描述。
if( useProvidedKeypoints )
{
firstOctave = 0;
int maxOctave = INT_MIN;
for( size_t i = 0; i < keypoints.size(); i++ )
{
int octave, layer;
float scale;
unpackOctave(keypoints[i], octave, layer, scale);
firstOctave = std::min(firstOctave, octave);
maxOctave = std::max(maxOctave, octave);
actualNLayers = std::max(actualNLayers, layer-2);
}
firstOctave = std::min(firstOctave, 0);
CV_Assert( firstOctave >= -1 && actualNLayers <= nOctaveLayers );
actualNOctaves = maxOctave - firstOctave + 1;
}
// 【1】 图像初始化 转灰度图像
Mat base = createInitialImage(image, firstOctave < 0, (float)sigma);
std::vector<Mat> gpyr;
int nOctaves = actualNOctaves > 0 ? actualNOctaves : cvRound(std::log( (double)std::min( base.cols, base.rows ) ) / std::log(2.) - 2) - firstOctave;
//【2】 建立高斯金字塔
//double t, tf = getTickFrequency();
//t = (double)getTickCount();
buildGaussianPyramid(base, gpyr, nOctaves);
//t = (double)getTickCount() - t;
//printf("pyramid construction time: %g\n", t*1000./tf);
if( !useProvidedKeypoints )
{
std::vector<Mat> dogpyr;
// 【3】 建立差分金字塔
buildDoGPyramid(gpyr, dogpyr);
//t = (double)getTickCount();
// 【4】在差分金字塔中寻找极值点,并删除那些对比度强的点和边缘点,计算梯度统计直方图,并计算关键点的梯度、梯度方向以及权重
findScaleSpaceExtrema(gpyr, dogpyr, keypoints);
// 【5】除去重复特征点
KeyPointsFilter::removeDuplicatedSorted( keypoints );
if( nfeatures > 0 )
KeyPointsFilter::retainBest(keypoints, nfeatures);
//t = (double)getTickCount() - t;
//printf("keypoint detection time: %g\n", t*1000./tf);
if( firstOctave < 0 )
for( size_t i = 0; i < keypoints.size(); i++ )
{
KeyPoint& kpt = keypoints[i];
float scale = 1.f/(float)(1 << -firstOctave);
kpt.octave = (kpt.octave & ~255) | ((kpt.octave + firstOctave) & 255);
kpt.pt *= scale;
kpt.size *= scale;
}
if( !mask.empty() )
KeyPointsFilter::runByPixelsMask( keypoints, mask );
}
else
{
// filter keypoints by mask
//KeyPointsFilter::runByPixelsMask( keypoints, mask );
}
if( _descriptors.needed() )
{
//t = (double)getTickCount();
int dsize = descriptorSize();
_descriptors.create((int)keypoints.size(), dsize, descriptor_type);
Mat descriptors = _descriptors.getMat();
calcDescriptors(gpyr, keypoints, descriptors, nOctaveLayers, firstOctave);
//t = (double)getTickCount() - t;
//printf("descriptor extraction time: %g\n", t*1000./tf);
}
}
}
//[1] 图像初始化
static Mat createInitialImage( const Mat& img, bool doubleImageSize, float sigma )
{
CV_TRACE_FUNCTION();
Mat gray, gray_fpt;
if( img.channels() == 3 || img.channels() == 4 )
{
//将图像转换 灰度图像
cvtColor(img, gray, COLOR_BGR2GRAY);
//转化类型保存在 gray_fpt
gray.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
}
else
img.convertTo(gray_fpt, DataType<sift_wt>::type, SIFT_FIXPT_SCALE, 0);
float sig_diff;
//是否需要扩大图像的长宽尺寸
if( doubleImageSize )
{
sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA * 4, 0.01f) );
Mat dbl;
#if DoG_TYPE_SHORT
resize(gray_fpt, dbl, Size(gray_fpt.cols*2, gray_fpt.rows*2), 0, 0, INTER_LINEAR_EXACT);
#else
//resize函数来改变图像的大小 线性插值
resize(gray_fpt, dbl, Size(gray_fpt.cols*2, gray_fpt.rows*2), 0, 0, INTER_LINEAR);
#endif
//高斯滤波
Mat result;
GaussianBlur(dbl, result, Size(), sig_diff, sig_diff);
return result;
}
else
{
//如果不要扩大的话,直接高斯平滑后输出
sig_diff = sqrtf( std::max(sigma * sigma - SIFT_INIT_SIGMA * SIFT_INIT_SIGMA, 0.01f) );
Mat result;
GaussianBlur(gray_fpt, result, Size(), sig_diff, sig_diff);
return result;
}
}
//[2] 构建金字塔(金字塔层数和组数相等)
//构建高斯金字塔,结果保存在vectorpyr中
void SIFT_Impl::buildGaussianPyramid( const Mat& base, std::vector<Mat>& pyr, int nOctaves ) const
{
CV_TRACE_FUNCTION();
std::vector<double> sig(nOctaveLayers + 3);
//定义高斯金子塔的总层数
pyr.resize(nOctaves*(nOctaveLayers + 3));
// precompute Gaussian sigmas using the following formula:
// \sigma_{total}^2 = \sigma_{i}^2 + \sigma_{i-1}^2
sig[0] = sigma;
double k = std::pow( 2., 1. / nOctaveLayers );
for( int i = 1; i < nOctaveLayers + 3; i++ )
{
//前一层得sigma值
double sig_prev = std::pow(k, (double)(i-1))*sigma;
//当前层sigma值
double sig_total = sig_prev*k;
//高斯函数得标准差
sig[i] = std::sqrt(sig_total*sig_total - sig_prev*sig_prev);
}
//遍历高斯金字塔得所有层建立高斯金字塔
for( int o = 0; o < nOctaves; o++ )
{
for( int i = 0; i < nOctaveLayers + 3; i++ )
{
Mat& dst = pyr[o*(nOctaveLayers + 3) + i];
if( o == 0 && i == 0 )
dst = base;
// base of new octave is halved image from end of previous octave
else if( i == 0 )
{
const Mat& src = pyr[(o-1)*(nOctaveLayers + 3) + nOctaveLayers];
resize(src, dst, Size(src.cols/2, src.rows/2),
0, 0, INTER_NEAREST);
}
else
{
//其他情况提取当前层得前一层作高斯平滑处理
const Mat& src = pyr[o*(nOctaveLayers + 3) + i-1];
GaussianBlur(src, dst, Size(), sig[i], sig[i]);
}
}
}
}
//[3] 构建高斯差分金字塔
//创建一个 buildDoGPyramidComputer类,用来实现DOG金字塔得计算;构建DOG金字塔,直接两幅不同得高斯模糊图像的相减值
void SIFT_Impl::buildDoGPyramid( const std::vector<Mat>& gpyr, std::vector<Mat>& dogpyr ) const
{
CV_TRACE_FUNCTION();
int nOctaves = (int)gpyr.size()/(nOctaveLayers + 3);
dogpyr.resize( nOctaves*(nOctaveLayers + 2) );
parallel_for_(Range(0, nOctaves * (nOctaveLayers + 2)), buildDoGPyramidComputer(nOctaveLayers, gpyr, dogpyr));
}
// Detects features at extrema in DoG scale space. Bad features are discarded
// based on contrast and ratio of principal curvatures.
// [4] 在尺度空间内寻找极值
void SIFT_Impl::findScaleSpaceExtrema( const std::vector<Mat>& gauss_pyr, const std::vector<Mat>& dog_pyr,
std::vector<KeyPoint>& keypoints ) const
{
CV_TRACE_FUNCTION();
const int nOctaves = (int)gauss_pyr.size()/(nOctaveLayers + 3);
const int threshold = cvFloor(0.5 * contrastThreshold / nOctaveLayers * 255 * SIFT_FIXPT_SCALE);
keypoints.clear();
TLSDataAccumulator<std::vector<KeyPoint> > tls_kpts_struct;
for( int o = 0; o < nOctaves; o++ )
for( int i = 1; i <= nOctaveLayers; i++ )
{
const int idx = o*(nOctaveLayers+2)+i;
const Mat& img = dog_pyr[idx]; //DOG金字塔当前的索引值
const int step = (int)img.step1();
const int rows = img.rows, cols = img.cols;
parallel_for_(Range(SIFT_IMG_BORDER, rows-SIFT_IMG_BORDER),
findScaleSpaceExtremaComputer(
o, i, threshold, idx, step, cols,
nOctaveLayers,
contrastThreshold,
edgeThreshold,
sigma,
gauss_pyr, dog_pyr, tls_kpts_struct));
}
std::vector<std::vector<KeyPoint>*> kpt_vecs;
tls_kpts_struct.gather(kpt_vecs);
for (size_t i = 0; i < kpt_vecs.size(); ++i) {
keypoints.insert(keypoints.end(), kpt_vecs[i]->begin(), kpt_vecs[i]->end());
}
}
参考:
【OpenCV】SIFT原理与源码分析
Opencv2.4.9源码分析——Stitching(一)