由前一步《DoG尺度空间构造》,我们得到了DoG高斯差分金字塔:
如上图的金字塔,高斯尺度空间金字塔中每组有五层不同尺度图像,相邻两层相减得到四层DoG结果。关键点搜索就在这四层DoG图像上寻找局部极值点。
寻找DoG极值点时,每一个像素点和它所有的相邻点比较,当其大于(或小于)它的图像域和尺度域的所有相邻点时,即为极值点。如下图所示,比较的范围是个3×3的立方体:中间的检测点和它同尺度的8个相邻点,以及和上下相邻尺度对应的9×2个点——共26个点比较,以确保在尺度空间和二维图像空间都检测到极值点。
在一组中,搜索从每组的第二层开始,以第二层为当前层,第一层和第三层分别作为立方体的的上下层;搜索完成后再以第三层为当前层做同样的搜索。所以每层的点搜索两次。通常我们将组Octaves索引以-1开始,则在比较时牺牲了-1组的第0层和第N组的最高层
高斯金字塔,DoG图像及极值计算的相互关系如上图所示。
// Detects features at extrema in DoG scale space. Bad features are discarded // based on contrast and ratio of principal curvatures. // 在DoG尺度空间寻特征点(极值点) void SIFT::findScaleSpaceExtrema( const vector<Mat>& gauss_pyr, const vector<Mat>& dog_pyr, vector<KeyPoint>& keypoints ) const { int nOctaves = (int)gauss_pyr.size()/(nOctaveLayers + 3); // The contrast threshold used to filter out weak features in semi-uniform // (low-contrast) regions. The larger the threshold, the less features are produced by the detector. // 过滤掉弱特征的阈值 contrastThreshold默认为0.04 int threshold = cvFloor(0.5 * contrastThreshold / nOctaveLayers * 255 * SIFT_FIXPT_SCALE); const int n = SIFT_ORI_HIST_BINS; //36 float hist[n]; KeyPoint kpt; keypoints.clear(); for( int o = 0; o < nOctaves; o++ ) for( int i = 1; i <= nOctaveLayers; i++ ) { int idx = o*(nOctaveLayers+2)+i; const Mat& img = dog_pyr[idx]; const Mat& prev = dog_pyr[idx-1]; const Mat& next = dog_pyr[idx+1]; int step = (int)img.step1(); int rows = img.rows, cols = img.cols; for( int r = SIFT_IMG_BORDER; r < rows-SIFT_IMG_BORDER; r++) { const short* currptr = img.ptr<short>(r); const short* prevptr = prev.ptr<short>(r); const short* nextptr = next.ptr<short>(r); for( int c = SIFT_IMG_BORDER; c < cols-SIFT_IMG_BORDER; c++) { int val = currptr[c]; // find local extrema with pixel accuracy // 寻找局部极值点,DoG中每个点与其所在的立方体周围的26个点比较 // if (val比所有都大 或者 val比所有都小) if( std::abs(val) > threshold && ((val > 0 && val >= currptr[c-1] && val >= currptr[c+1] && val >= currptr[c-step-1] && val >= currptr[c-step] && val >= currptr[c-step+1] && val >= currptr[c+step-1] && val >= currptr[c+step] && val >= currptr[c+step+1] && val >= nextptr[c] && val >= nextptr[c-1] && val >= nextptr[c+1] && val >= nextptr[c-step-1] && val >= nextptr[c-step] && val >= nextptr[c-step+1] && val >= nextptr[c+step-1] && val >= nextptr[c+step] && val >= nextptr[c+step+1] && val >= prevptr[c] && val >= prevptr[c-1] && val >= prevptr[c+1] && val >= prevptr[c-step-1] && val >= prevptr[c-step] && val >= prevptr[c-step+1] && val >= prevptr[c+step-1] && val >= prevptr[c+step] && val >= prevptr[c+step+1]) || (val < 0 && val <= currptr[c-1] && val <= currptr[c+1] && val <= currptr[c-step-1] && val <= currptr[c-step] && val <= currptr[c-step+1] && val <= currptr[c+step-1] && val <= currptr[c+step] && val <= currptr[c+step+1] && val <= nextptr[c] && val <= nextptr[c-1] && val <= nextptr[c+1] && val <= nextptr[c-step-1] && val <= nextptr[c-step] && val <= nextptr[c-step+1] && val <= nextptr[c+step-1] && val <= nextptr[c+step] && val <= nextptr[c+step+1] && val <= prevptr[c] && val <= prevptr[c-1] && val <= prevptr[c+1] && val <= prevptr[c-step-1] && val <= prevptr[c-step] && val <= prevptr[c-step+1] && val <= prevptr[c+step-1] && val <= prevptr[c+step] && val <= prevptr[c+step+1]))) { int r1 = r, c1 = c, layer = i; // 关键点精确定位 if( !adjustLocalExtrema(dog_pyr, kpt, o, layer, r1, c1, nOctaveLayers, (float)contrastThreshold, (float)edgeThreshold, (float)sigma) ) continue; float scl_octv = kpt.size*0.5f/(1 << o); // 计算梯度直方图 float omax = calcOrientationHist( gauss_pyr[o*(nOctaveLayers+3) + layer], Point(c1, r1), cvRound(SIFT_ORI_RADIUS * scl_octv), SIFT_ORI_SIG_FCTR * scl_octv, hist, n); float mag_thr = (float)(omax * SIFT_ORI_PEAK_RATIO); for( int j = 0; j < n; j++ ) { int l = j > 0 ? j - 1 : n - 1; int r2 = j < n-1 ? j + 1 : 0; if( hist[j] > hist[l] && hist[j] > hist[r2] && hist[j] >= mag_thr ) { float bin = j + 0.5f * (hist[l]-hist[r2]) / (hist[l] - 2*hist[j] + hist[r2]); bin = bin < 0 ? n + bin : bin >= n ? bin - n : bin; kpt.angle = (float)((360.f/n) * bin); keypoints.push_back(kpt); } } } } } } }
至此,SIFT第二步就完成了。参见《SIFT原理与源码分析》