之前看到过C版本的,感觉写的很长,没有仔细看,但是C++版本的写的还是很不错的。我仔细看了一下,并对内容进行了仔细的注释,如果有人没有看懂,欢迎留言讨论。
先看一眼头文件,在background_segm.hpp中
class CV_EXPORTS_W BackgroundSubtractorMOG : public BackgroundSubtractor { public: //! the default constructor CV_WRAP BackgroundSubtractorMOG(); //! the full constructor that takes the length of the history, the number of gaussian mixtures, the background ratio parameter and the noise strength CV_WRAP BackgroundSubtractorMOG(int history, int nmixtures, double backgroundRatio, double noiseSigma=0); //! the destructor virtual ~BackgroundSubtractorMOG(); //! the update operator virtual void operator()(InputArray image, OutputArray fgmask, double learningRate=0); //! re-initiaization method virtual void initialize(Size frameSize, int frameType); virtual AlgorithmInfo* info() const; protected: Size frameSize; int frameType; Mat bgmodel; int nframes; int history;//利用历史帧数计算学习速率,不是主要参数 int nmixtures;//高斯模型的个数 double varThreshold;//方差门限 double backgroundRatio;//背景门限 double noiseSigma;//噪声方差 };
static const int defaultNMixtures = 5;//默认混合模型个数 static const int defaultHistory = 200;//默认历史帧数 static const double defaultBackgroundRatio = 0.7;//默认背景门限 static const double defaultVarThreshold = 2.5*2.5;//默认方差门限 static const double defaultNoiseSigma = 30*0.5;//默认噪声方差 static const double defaultInitialWeight = 0.05;//默认初始权值 //不带参数的构造函数,使用默认值 BackgroundSubtractorMOG::BackgroundSubtractorMOG() { frameSize = Size(0,0); frameType = 0; nframes = 0; nmixtures = defaultNMixtures; history = defaultHistory; varThreshold = defaultVarThreshold; backgroundRatio = defaultBackgroundRatio; noiseSigma = defaultNoiseSigma; } //带参数的构造函数,使用参数传进来的值 BackgroundSubtractorMOG::BackgroundSubtractorMOG(int _history, int _nmixtures, double _backgroundRatio, double _noiseSigma) { frameSize = Size(0,0); frameType = 0; nframes = 0; nmixtures = min(_nmixtures > 0 ? _nmixtures : defaultNMixtures, 8);//不能超过8个,否则就用默认的 history = _history > 0 ? _history : defaultHistory;//不能小于0,否则就用默认的 varThreshold = defaultVarThreshold;//门限使用默认的 backgroundRatio = min(_backgroundRatio > 0 ? _backgroundRatio : 0.95, 1.);//背景门限必须大于0,小于1,否则使用0.95 noiseSigma = _noiseSigma <= 0 ? defaultNoiseSigma : _noiseSigma;//噪声门限大于0 } BackgroundSubtractorMOG::~BackgroundSubtractorMOG() { } void BackgroundSubtractorMOG::initialize(Size _frameSize, int _frameType) { frameSize = _frameSize; frameType = _frameType; nframes = 0; int nchannels = CV_MAT_CN(frameType); CV_Assert( CV_MAT_DEPTH(frameType) == CV_8U ); // for each gaussian mixture of each pixel bg model we store ... // the mixture sort key (w/sum_of_variances), the mixture weight (w), // the mean (nchannels values) and // the diagonal covariance matrix (another nchannels values) bgmodel.create( 1, frameSize.height*frameSize.width*nmixtures*(2 + 2*nchannels), CV_32F );//初始化一个1行*XX列的矩阵 //XX是这样计算的:图像的行*列*混合模型的个数*(1(优先级) + 1(权值) + 2(均值 + 方差) * 通道数) bgmodel = Scalar::all(0);//清零 } //设为模版,就是考虑到了彩色图像与灰度图像两种情况 template<typename VT> struct MixData { float sortKey; float weight; VT mean; VT var; }; static void process8uC1( const Mat& image, Mat& fgmask, double learningRate, Mat& bgmodel, int nmixtures, double backgroundRatio, double varThreshold, double noiseSigma ) { int x, y, k, k1, rows = image.rows, cols = image.cols; float alpha = (float)learningRate, T = (float)backgroundRatio, vT = (float)varThreshold;//学习速率、背景门限、方差门限 int K = nmixtures;//混合模型个数 MixData<float>* mptr = (MixData<float>*)bgmodel.data; const float w0 = (float)defaultInitialWeight;//初始权值 const float sk0 = (float)(w0/(defaultNoiseSigma*2));//初始优先级 const float var0 = (float)(defaultNoiseSigma*defaultNoiseSigma*4);//初始方差 const float minVar = (float)(noiseSigma*noiseSigma);//最小方差 for( y = 0; y < rows; y++ ) { const uchar* src = image.ptr<uchar>(y); uchar* dst = fgmask.ptr<uchar>(y); if( alpha > 0 )//如果学习速率为0,则退化为背景相减 { for( x = 0; x < cols; x++, mptr += K ) { float wsum = 0; float pix = src[x];//每个像素 int kHit = -1, kForeground = -1;//是否属于模型,是否属于前景 for( k = 0; k < K; k++ )//每个高斯模型 { float w = mptr[k].weight;//当前模型的权值 wsum += w;//权值累加 if( w < FLT_EPSILON ) break; float mu = mptr[k].mean;//当前模型的均值 float var = mptr[k].var;//当前模型的方差 float diff = pix - mu;//当前像素与模型均值之差 float d2 = diff*diff;//平方 if( d2 < vT*var )//是否小于方门限,即是否属于该模型 { wsum -= w;//如果匹配,则把它减去,因为之后会更新它 float dw = alpha*(1.f - w); mptr[k].weight = w + dw;//增加权值 //注意源文章中涉及概率的部分多进行了简化,将概率变为1 mptr[k].mean = mu + alpha*diff;//修正均值 var = max(var + alpha*(d2 - var), minVar);//开始时方差清零0,所以这里使用噪声方差作为默认方差,否则使用上一次方差 mptr[k].var = var;//修正方差 mptr[k].sortKey = w/sqrt(var);//重新计算优先级,貌似这里不对,应该使用更新后的mptr[k].weight而不是w for( k1 = k-1; k1 >= 0; k1-- )//从匹配的第k个模型开始向前比较,如果更新后的单高斯模型优先级超过他前面的那个,则交换顺序 { if( mptr[k1].sortKey >= mptr[k1+1].sortKey )//如果优先级没有发生改变,则停止比较 break; std::swap( mptr[k1], mptr[k1+1] );//交换它们的顺序,始终保证优先级最大的在前面 } kHit = k1+1;//记录属于哪个模型 break; } } if( kHit < 0 ) // no appropriate gaussian mixture found at all, remove the weakest mixture and create a new one //当前像素不属于任何一个模型 { //初始化一个新模型 kHit = k = min(k, K-1);//有两种情况,当最开始的初始化时,k并不是等于K-1的 wsum += w0 - mptr[k].weight;//从权值总和中减去原来的那个模型,并加上默认权值 mptr[k].weight = w0;//初始化权值 mptr[k].mean = pix;//初始化均值 mptr[k].var = var0; //初始化方差 mptr[k].sortKey = sk0;//初始化权值 } else for( ; k < K; k++ ) wsum += mptr[k].weight;//求出剩下的总权值 float wscale = 1.f/wsum;//归一化 wsum = 0; for( k = 0; k < K; k++ ) { wsum += mptr[k].weight *= wscale; mptr[k].sortKey *= wscale;//计算归一化权值 if( wsum > T && kForeground < 0 ) kForeground = k+1;//第几个模型之后就判为前景了 } dst[x] = (uchar)(-(kHit >= kForeground));//判决:(ucahr)(-true) = 255;(uchar)(-(false)) = 0; } } else//如果学习速率小于等于0,则没有背景更新过程,其他过程类似 { for( x = 0; x < cols; x++, mptr += K ) { float pix = src[x]; int kHit = -1, kForeground = -1; for( k = 0; k < K; k++ ) { if( mptr[k].weight < FLT_EPSILON ) break; float mu = mptr[k].mean; float var = mptr[k].var; float diff = pix - mu; float d2 = diff*diff; if( d2 < vT*var ) { kHit = k; break; } } if( kHit >= 0 ) { float wsum = 0; for( k = 0; k < K; k++ ) { wsum += mptr[k].weight; if( wsum > T ) { kForeground = k+1; break; } } } dst[x] = (uchar)(kHit < 0 || kHit >= kForeground ? 255 : 0); } } } } static void process8uC3( const Mat& image, Mat& fgmask, double learningRate, Mat& bgmodel, int nmixtures, double backgroundRatio, double varThreshold, double noiseSigma ) { int x, y, k, k1, rows = image.rows, cols = image.cols; float alpha = (float)learningRate, T = (float)backgroundRatio, vT = (float)varThreshold; int K = nmixtures; const float w0 = (float)defaultInitialWeight; const float sk0 = (float)(w0/(defaultNoiseSigma*2*sqrt(3.))); const float var0 = (float)(defaultNoiseSigma*defaultNoiseSigma*4); const float minVar = (float)(noiseSigma*noiseSigma); MixData<Vec3f>* mptr = (MixData<Vec3f>*)bgmodel.data; for( y = 0; y < rows; y++ ) { const uchar* src = image.ptr<uchar>(y); uchar* dst = fgmask.ptr<uchar>(y); if( alpha > 0 ) { for( x = 0; x < cols; x++, mptr += K ) { float wsum = 0; Vec3f pix(src[x*3], src[x*3+1], src[x*3+2]); int kHit = -1, kForeground = -1; for( k = 0; k < K; k++ ) { float w = mptr[k].weight; wsum += w; if( w < FLT_EPSILON ) break; Vec3f mu = mptr[k].mean; Vec3f var = mptr[k].var; Vec3f diff = pix - mu; float d2 = diff.dot(diff); if( d2 < vT*(var[0] + var[1] + var[2]) ) { wsum -= w; float dw = alpha*(1.f - w); mptr[k].weight = w + dw; mptr[k].mean = mu + alpha*diff; var = Vec3f(max(var[0] + alpha*(diff[0]*diff[0] - var[0]), minVar), max(var[1] + alpha*(diff[1]*diff[1] - var[1]), minVar), max(var[2] + alpha*(diff[2]*diff[2] - var[2]), minVar)); mptr[k].var = var; mptr[k].sortKey = w/sqrt(var[0] + var[1] + var[2]); for( k1 = k-1; k1 >= 0; k1-- ) { if( mptr[k1].sortKey >= mptr[k1+1].sortKey ) break; std::swap( mptr[k1], mptr[k1+1] ); } kHit = k1+1; break; } } if( kHit < 0 ) // no appropriate gaussian mixture found at all, remove the weakest mixture and create a new one { kHit = k = min(k, K-1); wsum += w0 - mptr[k].weight; mptr[k].weight = w0; mptr[k].mean = pix; mptr[k].var = Vec3f(var0, var0, var0); mptr[k].sortKey = sk0; } else for( ; k < K; k++ ) wsum += mptr[k].weight; float wscale = 1.f/wsum; wsum = 0; for( k = 0; k < K; k++ ) { wsum += mptr[k].weight *= wscale; mptr[k].sortKey *= wscale; if( wsum > T && kForeground < 0 ) kForeground = k+1; } dst[x] = (uchar)(-(kHit >= kForeground)); } } else { for( x = 0; x < cols; x++, mptr += K ) { Vec3f pix(src[x*3], src[x*3+1], src[x*3+2]); int kHit = -1, kForeground = -1; for( k = 0; k < K; k++ ) { if( mptr[k].weight < FLT_EPSILON ) break; Vec3f mu = mptr[k].mean; Vec3f var = mptr[k].var; Vec3f diff = pix - mu; float d2 = diff.dot(diff); if( d2 < vT*(var[0] + var[1] + var[2]) ) { kHit = k; break; } } if( kHit >= 0 ) { float wsum = 0; for( k = 0; k < K; k++ ) { wsum += mptr[k].weight; if( wsum > T ) { kForeground = k+1; break; } } } dst[x] = (uchar)(kHit < 0 || kHit >= kForeground ? 255 : 0); } } } } void BackgroundSubtractorMOG::operator()(InputArray _image, OutputArray _fgmask, double learningRate) { Mat image = _image.getMat(); bool needToInitialize = nframes == 0 || learningRate >= 1 || image.size() != frameSize || image.type() != frameType;//是否需要初始化 if( needToInitialize ) initialize(image.size(), image.type());//初始化 CV_Assert( image.depth() == CV_8U ); _fgmask.create( image.size(), CV_8U ); Mat fgmask = _fgmask.getMat(); ++nframes; learningRate = learningRate >= 0 && nframes > 1 ? learningRate : 1./min( nframes, history ); CV_Assert(learningRate >= 0); if( image.type() == CV_8UC1 )//处理灰度图像 process8uC1( image, fgmask, learningRate, bgmodel, nmixtures, backgroundRatio, varThreshold, noiseSigma ); else if( image.type() == CV_8UC3 )//处理彩色图像 process8uC3( image, fgmask, learningRate, bgmodel, nmixtures, backgroundRatio, varThreshold, noiseSigma ); else CV_Error( CV_StsUnsupportedFormat, "Only 1- and 3-channel 8-bit images are supported in BackgroundSubtractorMOG" ); } }
其中有几点需要注意:
1.在高斯混合模型中需要使用概率更新参数的地方,程序中都简化成为了1处理,否则计算一个正态分布的概率还是挺花时间的。(程序作者在注释中也指出了他不是完全按照论文写成的,而是做了一些小的修改)。但是除了将概率换成1,其他地方还是严格按照公式的,大家可以仔细推导一下,就会看出其中的差异。
2.作者原文中是如果没有一个高斯模型与该像素点匹配,则去掉一个一个概率最小的,而用当前像素初始化的分布来替代他,而在这里变成了去掉优先级最小的。
3.程序中为了避免多次做循环,把一些步骤拆开做了,比如归一化权值需要先求出总权值,调整权值后的排序之类的,计算背景模型个数等等。减少了遍历的次数。其中的巧妙之处也不得不佩服作者的良苦用心。
3.就是似乎更新优先级的计算有点小问题,也可能是我理解不对。
4.在初始化时,可以使用多种方式,大家一看程序就明白了。
最后附上一个小的示例程序,教你如何使用高斯混合模型:
int main() { VideoCapture capture("D:/videos/shadow/use3.MPG"); if( !capture.isOpened() ) { cout<<"读取视频失败"<<endl; return -1; } //获取整个帧数 long totalFrameNumber = capture.get(CV_CAP_PROP_FRAME_COUNT); cout<<"整个视频共"<<totalFrameNumber<<"帧"<<endl; //设置开始帧() long frameToStart = 200; capture.set( CV_CAP_PROP_POS_FRAMES,frameToStart); cout<<"从第"<<frameToStart<<"帧开始读"<<endl; //设置结束帧 int frameToStop = 650; if(frameToStop < frameToStart) { cout<<"结束帧小于开始帧,程序错误,即将退出!"<<endl; return -1; } else { cout<<"结束帧为:第"<<frameToStop<<"帧"<<endl; } double rate = capture.get(CV_CAP_PROP_FPS); int delay = 1000/rate; Mat frame; //前景图片 Mat foreground; //使用默认参数调用混合高斯模型 BackgroundSubtractorMOG mog; bool stop(false); //currentFrame是在循环体中控制读取到指定的帧后循环结束的变量 long currentFrame = frameToStart; while( !stop ) { if( !capture.read(frame) ) { cout<<"从视频中读取图像失败或者读完整个视频"<<endl; return -2; } cvtColor(frame,frame,CV_RGB2GRAY); imshow("输入视频",frame); //参数为:输入图像、输出图像、学习速率 mog(frame,foreground,0.01); imshow("前景",foreground); //按ESC键退出,按其他键会停止在当前帧 int c = waitKey(delay); if ( (char)c == 27 || currentFrame >= frameToStop) { stop = true; } if ( c >= 0) { waitKey(0); } currentFrame++; } waitKey(0); }