Harris兴趣点检测器是一种产生平移旋转不变结果的算法。当噪声和离散化的影响可以忽略时,Harris检测器作用于旋转或平移过程的图像,其输出是一组旋转或平移的点集。然而,如果图像被缩放或经过射影变换,则Harris检测器的输出也会剧烈变化。最大稳定极值区域(Maximally Stable Extremal Regions MSERs)是一种图像结构,不仅是是在平移和旋转后,即便是经历相似仿射变换,它仍可被重复检测出来。
MSER检测过程:假如一幅灰度图 I所有可能的阈值,比如通用的范围S = [0,1, 2..., 255] 。对于低于阈值的像素,其称为黑色像素,高于阈值的则称为白色像素。如果要想显示阈值化的图像It的影像,其中t帧对应于阈值t,那么首张为一幅白色图像。随后,对应于局部强度最小值的黑色点开始出现并增长。在某一点处,对应于两个局部极小值的区域将合并。最后一幅图像将为黑色。在很多图像下,我们可以观察到,在特定区域大范围的阈值内,局部二值化很稳定。概括起来,MSER检测算为:
算法:遍历极值区域 输入:图像 I 输出:嵌套的极值区域列表 1. 对按强度排序的所有像素: 1)在图像中放置像素 2)更新连通分量结构 3)更新影响到的连通分量的面积 2. 对所有连通区域: 连通分量面积的变化率局部极小值即为稳定的阈值
该过程产生了一个数据结构,其中存储了每个连通分量的面积,为阈值的函数。两个分量的合并可视为较小分量的消忘,然后小分量的所有像素全部加入大的分量。最后,面积函数变化率的局部极小值的强度级别,即被选作阈值。在输出中,每个MSER都由一个局部强度极小值和一个阈值来表示。此算法的结构和一种高效的分水岭算法是一样的。然而,两种算法的输出结构不同。在分水岭计算中,着重点在于区域合并以及分水岭盆接触的阈值。这种阈值非常不稳定,在一次合并后,区域的面积突然变化,在MSER检测中,找到一组阈值,这样使得分水岭盆有效地保持不变。
参考代码贴出相关部分的MSER.H头文件和MSER.CPP文件。如下代码所示(仅作参考):
#ifndef _MSER_H_ #define _MSER_H_ #include <vector> ......... class MSER { public: // A Maximally Stable Extremal Region. struct Region { int level_; int pixel_; int area_; double moments_[5]; double variation_; Region(int level = 256, int pixel = 0); private: bool stable_; Region * parent_; Region * child_; Region * next_; void accumulate(int x, int y); void merge(Region * child); void detect(int delta, int minArea, int maxArea, double maxVariation, double minDiversity,std::vector<Region> & regions); void process(int delta, int minArea, int maxArea, double maxVariation); bool check(double variation, int area) const; void save(double minDiversity, std::vector<Region> & regions); friend class MSER; }; MSER(int delta = 2, double minArea = 0.0001, double maxArea = 0.5, double maxVariation = 0.5, double minDiversity = 0.33, bool eight = false); void operator()(const uint8_t * bits,int width, int height, std::vector <Region> & regions); .......... private: void processStack(int newPixelGreyLevel,int pixel,std::vector<Region*>®ionStack); std::ptrdiff_t doublePool(std::vector<Region *>®ionStack); // Parameters int delta_; double minArea_; double maxArea_; double maxVariation_; double minDiversity_; bool eight_; // Memory pool of regions for faster allocation std::vector<Region> pool_; std::size_t poolIndex_; .......... }; #endif //_MSER_H_
#include "mser.h" #include <algorithm> #include <cassert> #include <limits> MSER::MSER(int delta, double minArea, double maxArea,double maxVariation, double minDiversity,bool eight):eight_(eight),delta_(delta), minArea_(minArea), maxArea_(maxArea),maxVariation_(maxVariation), minDiversity_(minDiversity), pool_(256), poolIndex_(0) { // Parameter check assert(delta > 0); assert(minArea >= 0.0); assert(maxArea <= 1.0); assert(minArea < maxArea); assert(maxVariation > 0.0); assert(minDiversity >= 0.0); assert(minDiversity < 1.0); } void MSER::operator()(const uint8_t * bits, int width, int height, vector<Region> & regions) { vector<bool> accessible(width * height); vector<int> boundaryPixels[256]; int priority = 256; vector<Region *> regionStack; regionStack.push_back(new (&pool_[poolIndex_++]) Region); int curPixel = 0; int curEdge = 0; int curLevel = bits[0]; accessible[0] = true; regionStack.push_back(new (&pool_[poolIndex_++]) Region(curLevel, curPixel)); if (poolIndex_ == pool_.size()) doublePool(regionStack); for (;;) { const int x = curPixel % width; const int y = curPixel / width; for (; curEdge < (eight_ ? 8 : 4); ++curEdge) { int neighborPixel = curPixel; if (eight_) { switch (curEdge) { case 0: if (x < width - 1) neighborPixel = curPixel + 1; break; case 1: if ((x < width - 1) && (y > 0)) neighborPixel = curPixel - width + 1; break; case 2: if (y > 0) neighborPixel = curPixel - width; break; case 3: if ((x > 0) && (y > 0)) neighborPixel = curPixel - width - 1; break; case 4: if (x > 0) neighborPixel = curPixel - 1; break; case 5: if ((x > 0) && (y < height - 1)) neighborPixel = curPixel + width - 1; break; case 6: if (y < height - 1) neighborPixel = curPixel + width; break; default: if ((x < width - 1) && (y < height - 1)) neighborPixel = curPixel + width + 1; break; } } else { switch (curEdge) { case 0: if (x < width - 1) neighborPixel = curPixel + 1; break; case 1: if (y < height - 1) neighborPixel = curPixel + width; break; case 2: if (x > 0) neighborPixel = curPixel - 1; break; default: if (y > 0) neighborPixel = curPixel - width; break; } } if (neighborPixel != curPixel && !accessible[neighborPixel]) { const int neighborLevel = bits[neighborPixel]; accessible[neighborPixel] = true; if (neighborLevel >= curLevel) { boundaryPixels[neighborLevel].push_back(neighborPixel << 4); if (neighborLevel < priority) priority = neighborLevel; } else { boundaryPixels[curLevel].push_back((curPixel << 4)|(curEdge + 1)); if (curLevel < priority) priority = curLevel; curPixel = neighborPixel; curEdge = 0; curLevel = neighborLevel; goto step_3; } } } regionStack.back()->accumulate(x, y); if (priority == 256) { regionStack.back()->detect(delta_, minArea_ * width * height, maxArea_ * width * height, maxVariation_, minDiversity_,regions); poolIndex_ = 0; return; } curPixel = boundaryPixels[priority].back() >> 4; curEdge = boundaryPixels[priority].back() & 15; boundaryPixels[priority].pop_back(); while (boundaryPixels[priority].empty() && (priority < 256)) ++priority; const int newPixelGreyLevel = bits[curPixel]; if (newPixelGreyLevel != curLevel) { curLevel = newPixelGreyLevel; processStack(newPixelGreyLevel, curPixel, regionStack); } } } void MSER::processStack(int newPixelGreyLevel,int pixel,vector<Region *> ®ionStack) { do { Region * top = regionStack.back(); regionStack.pop_back(); if (newPixelGreyLevel < regionStack.back()->level_) { regionStack.push_back(new(&pool_[poolIndex_++]); Region(newPixelGreyLevel,pixel)); if (poolIndex_ == pool_.size()) top = reinterpret_cast<Region *>(reinterpret_cast<char *>(top) + doublePool(regionStack)); regionStack.back()->merge(top); return; } regionStack.back()->merge(top); } while (newPixelGreyLevel > regionStack.back()->level_); } ptrdiff_t MSER::doublePool(vector<Region *> & regionStack) { assert(!pool_.empty()); vector<Region> newPool(pool_.size() * 2); copy(pool_.begin(), pool_.end(), newPool.begin()); const ptrdiff_t offset = reinterpret_cast<char *>(&newPool[0]) - reinterpret_cast<char *>(&pool_[0]); for (size_t i = 0; i < pool_.size(); ++i) { if (newPool[i].parent_) newPool[i].parent_ = reinterpret_cast<Region *>( reinterpret_cast<char *>(newPool[i].parent_) + offset); if (newPool[i].child_) newPool[i].child_ = reinterpret_cast<Region *>( reinterpret_cast<char *>(newPool[i].child_) + offset); if (newPool[i].next_) newPool[i].next_ = reinterpret_cast<Region *>( reinterpret_cast<char *>(newPool[i].next_) + offset); } for (size_t i = 0; i < regionStack.size(); ++i) regionStack[i] = reinterpret_cast<Region *>( reinterpret_cast<char *>(regionStack[i]) + offset); pool_.swap(newPool); return offset; } .........
import numpy as np import cv2 import video if __name__ == '__main__': import sys try: video_src = sys.argv[1] except: video_src = 0 cam = video.create_capture(video_src) mser = cv2.MSER() while True: ret, img = cam.read() gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) vis = img.copy() regions = mser.detect(gray, None) hulls = [cv2.convexHull(p.reshape(-1, 1, 2)) for p in regions] cv2.polylines(vis, hulls, 1, (0, 255, 0)) cv2.imshow('img', vis) if 0xFF & cv2.waitKey(5) == 27: break cv2.destroyAllWindows()
经测试输出视频图像分析所得:MSER检测有时也有阈值化有关,每个极值区域是阈值化图像的一个连通分量。然而,并不需要全局或"最优"的阈值,测试所有的阈值,连通分量的稳定性都经过评估。最终,分水岭是输入图像的划分,而如果某些图像部分仍有稳定的阈值存在的话MSER就可能是嵌套的。
[1] J. Matas, O. Chum, M. Urban, and T. Pajdla."Robust wide baseline stereo from maximally stable extremal regions." Proc. of British Machine Vision Conference, pages 384-396, 2002.
[2] Forssen, P-E. and Lowe, D.G."Shape Descriptors for Maximally Stable Extremal Regions" ICCV, 2007.
[3] Donoser, M. and Bischof, H."Efficient Maximally Stable Extremal Region (MSER) Tracking ",CVPR, 2006.
关于Image Engineering & Computer Vision的更多讨论与交流,敬请关注本博和新浪微博songzi_tea.