



*  This is a example of the CUDA program.

#include "opencv2/core/core.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/highgui/highgui.hpp"
#include ""
#include "Vibe_M.h"
using namespace std;
using namespace cv;
using namespace cv::gpu;

enum Method

int main(int argc, const char** argv)
	cv::CommandLineParser cmd(argc, argv,
		"{ c | camera | flase       | use camera }"
		"{ f | file   | 768x576.avi | input video file }"
		"{ m | method | vibe         | method (fgd, mog, mog2, vibe, gmg) }"
		"{ h | help   | false       | print help message }");

	if (cmd.get("help"))
		cout << "Usage : bgfg_segm [options]" << endl;
		cout << "Avaible options:" << endl;
		return 0;
	bool useCamera = cmd.get("camera");
	string file = cmd.get("file");
	string method = cmd.get("method");
	if (method != "fgd" && method != "mog" && method != "mog2" && method != "vibe" && method != "gmg")
		cerr << "Incorrect method" << endl;
		return -1;
	Method m = method == "fgd" ? FGD_STAT : method == "mog" ? MOG : method == "mog2" ? MOG2 : method == "vibe" ? VIBE : GMG;

	VideoCapture cap;
	if (useCamera);
	if (!cap.isOpened())
		cerr << "can not open camera or video file" << endl;
		return -1;
	Mat origin, frame;
	cap >> origin;
	GpuMat d_frame(frame);
	Vibe_M vibe;
	GpuMat d_fgmask;

	Mat fgmask;
	Mat fgimg;
	Mat bgimg;

	switch (m)
	case VIBE:


	namedWindow("image", WINDOW_NORMAL);
	namedWindow("foreground mask", WINDOW_NORMAL);

		cap >> origin;
		if (origin.empty())



		//update the model
		switch (m)
		case VIBE:
			vibe(d_frame, d_fgmask);

		imshow("image", frame);
		imshow("foreground mask", fgmask);
		int key = waitKey(30);
		if (key == 27)
		else if(key == ' ')


#ifndef _VIBE_M_H_
#define _VIBE_M_H_
#include "opencv2/core/core.hpp"
#include "opencv2/core/gpumat.hpp"
#include "opencv2/gpu/gpu.hpp"
#include "opencv2/imgproc/imgproc.hpp"
#include "opencv2/objdetect/objdetect.hpp"
#include "opencv2/features2d/features2d.hpp"
using namespace std;
using namespace cv;
using namespace cv::gpu;

class  Vibe_M
	//! the default constructor
	explicit Vibe_M(unsigned long rngSeed = 1234567);
	//! re-initiaization method
	void initialize(const GpuMat& firstFrame, Stream& stream = Stream::Null());
	//! the update operator
	void operator()(const GpuMat& frame, GpuMat& fgmask, Stream& stream = Stream::Null());
	//! releases all inner buffers
	void release();
	int nbSamples;         // number of samples per pixel
	int reqMatches;        // #_min
	int radius;            // R
	int subsamplingFactor; // amount of random subsampling

	Size frameSize_;
	unsigned long rngSeed_;
	GpuMat randStates_;
	GpuMat samples_;


#include "Vibe_M.h"
#include "opencv2/gpu/stream_accessor.hpp"

namespace cv { namespace gpu { namespace device
	namespace vibe_m
		void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor);

		void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream);

		void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream);

	const int defaultNbSamples = 20;
	const int defaultReqMatches = 2;
	const int defaultRadius = 20;
	const int defaultSubsamplingFactor = 16;

Vibe_M::Vibe_M(unsigned long rngSeed) :
frameSize_(0, 0), rngSeed_(rngSeed)
	nbSamples = defaultNbSamples;
	reqMatches = defaultReqMatches;
	radius = defaultRadius;
	subsamplingFactor = defaultSubsamplingFactor;

void Vibe_M::initialize(const GpuMat& firstFrame, Stream& s)
	using namespace cv::gpu::device::vibe_m;

	CV_Assert(firstFrame.type() == CV_8UC1 || firstFrame.type() == CV_8UC3 || firstFrame.type() == CV_8UC4);

	cudaStream_t stream = cv::gpu::StreamAccessor::getStream(s);

	loadConstants(nbSamples, reqMatches, radius, subsamplingFactor);

	frameSize_ = firstFrame.size();

	if (randStates_.size() != frameSize_)
		cv::RNG rng(rngSeed_);
		cv::Mat h_randStates(frameSize_, CV_8UC4);
		rng.fill(h_randStates, cv::RNG::UNIFORM, 0, 255);

	int ch = firstFrame.channels();
	int sample_ch = ch == 1 ? 1 : 4;

	samples_.create(nbSamples * frameSize_.height, frameSize_.width, CV_8UC(sample_ch));

	init_gpu(firstFrame, ch, samples_, randStates_, stream);

void Vibe_M::operator()(const GpuMat& frame, GpuMat& fgmask, Stream& s)
	using namespace cv::gpu::device::vibe_m;

	CV_Assert(frame.depth() == CV_8U);

	int ch = frame.channels();
	int sample_ch = ch == 1 ? 1 : 4;

	if (frame.size() != frameSize_ || sample_ch != samples_.channels())

	fgmask.create(frameSize_, CV_8UC1);

	update_gpu(frame, ch, fgmask, samples_, randStates_, cv::gpu::StreamAccessor::getStream(s));

void Vibe_M::release()
	frameSize_ = Size(0, 0);



#include "opencv2/gpu/device/common.hpp"
#include "opencv2/gpu/device/vec_math.hpp"

namespace cv { namespace gpu { namespace device
	namespace vibe_m
		__constant__ int c_nbSamples;
		__constant__ int c_reqMatches;
		__constant__ int c_radius;
		__constant__ int c_subsamplingFactor;

		void loadConstants(int nbSamples, int reqMatches, int radius, int subsamplingFactor)
			cudaSafeCall( cudaMemcpyToSymbol(c_nbSamples, &nbSamples, sizeof(int)) );
			cudaSafeCall( cudaMemcpyToSymbol(c_reqMatches, &reqMatches, sizeof(int)) );
			cudaSafeCall( cudaMemcpyToSymbol(c_radius, &radius, sizeof(int)) );                        
			cudaSafeCall( cudaMemcpyToSymbol(c_subsamplingFactor, &subsamplingFactor, sizeof(int)) );

		__device__ __forceinline__ uint nextRand(uint& state)
			//const unsigned int CV_RNG_COEFF = 4164903690U;//已经定义
			state = state * CV_RNG_COEFF + (state >> 16);
			return state;

		__constant__ int c_xoff[9] = {-1,  0,  1, -1, 1, -1, 0, 1, 0};
		__constant__ int c_yoff[9] = {-1, -1, -1,  0, 0,  1, 1, 1, 0};

		__device__ __forceinline__ int2 chooseRandomNeighbor(int x, int y, uint& randState, int count = 8)
			int idx = nextRand(randState) % count;

			return make_int2(x + c_xoff[idx], y + c_yoff[idx]);

		__device__ __forceinline__ uchar cvt(uchar val)
			return val;
		__device__ __forceinline__ uchar4 cvt(const uchar3& val)
			return make_uchar4(val.x, val.y, val.z, 0);
		__device__ __forceinline__ uchar4 cvt(const uchar4& val)
			return val;

		__global__ void init(const PtrStepSz frame, PtrStep samples, PtrStep randStates)
			const int x = blockIdx.x * blockDim.x + threadIdx.x;
			const int y = blockIdx.y * blockDim.y + threadIdx.y;

			if (x >= frame.cols || y >= frame.rows)

			uint localState = randStates(y, x);

			for (int k = 0; k < c_nbSamples; ++k)
				int2 np = chooseRandomNeighbor(x, y, localState, 9);

				np.x = ::max(0, ::min(np.x, frame.cols - 1));
				np.y = ::max(0, ::min(np.y, frame.rows - 1));

				SrcT pix = frame(np.y, np.x);

				samples(k * frame.rows + y, x) = cvt(pix);

			randStates(y, x) = localState;

		void init_caller(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream)
			dim3 block(32, 8);
			dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));

			cudaSafeCall( cudaFuncSetCacheConfig(init, cudaFuncCachePreferL1) );

			init<<>>((PtrStepSz) frame, (PtrStepSz) samples, randStates);
			cudaSafeCall( cudaGetLastError() );

			if (stream == 0)
				cudaSafeCall( cudaDeviceSynchronize() );

		void init_gpu(PtrStepSzb frame, int cn, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream)
			typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream);
			static const func_t funcs[] =
				0, init_caller, 0, init_caller, init_caller

			funcs[cn](frame, samples, randStates, stream);

		__device__ __forceinline__ int calcDist(uchar a, uchar b)
			return ::abs(a - b);
		__device__ __forceinline__ int calcDist(const uchar3& a, const uchar4& b)
			return (::abs(a.x - b.x) + ::abs(a.y - b.y) + ::abs(a.z - b.z)) / 3;
		__device__ __forceinline__ int calcDist(const uchar4& a, const uchar4& b)
			return (::abs(a.x - b.x) + ::abs(a.y - b.y) + ::abs(a.z - b.z)) / 3;

		__global__ void update(const PtrStepSz frame, PtrStepb fgmask, PtrStep samples, PtrStep randStates)
			const int x = blockIdx.x * blockDim.x + threadIdx.x;
			const int y = blockIdx.y * blockDim.y + threadIdx.y;

			if (x >= frame.cols || y >= frame.rows)

			uint localState = randStates(y, x);

			SrcT imgPix = frame(y, x);

			// comparison with the model

			int count = 0;
			for (int k = 0; (count < c_reqMatches) && (k < c_nbSamples); ++k)
				SampleT samplePix = samples(k * frame.rows + y, x);

				int distance = calcDist(imgPix, samplePix);

				if (distance < c_radius)

			// pixel classification according to reqMatches

			fgmask(y, x) = (uchar) (-(count < c_reqMatches));//当count<2时,为前景 当计数器count>=2时,为背景

			if (count >= c_reqMatches)
				// the pixel belongs to the background

				// gets a random number between 0 and subsamplingFactor-1
				int randomNumber = nextRand(localState) % c_subsamplingFactor;

				// update of the current pixel model
				if (randomNumber == 0)
					// random subsampling

					int k = nextRand(localState) % c_nbSamples;

					samples(k * frame.rows + y, x) = cvt(imgPix);

				// update of a neighboring pixel model
				randomNumber = nextRand(localState) % c_subsamplingFactor;

				if (randomNumber == 0)
					// random subsampling

					// chooses a neighboring pixel randomly
					int2 np = chooseRandomNeighbor(x, y, localState);

					np.x = ::max(0, ::min(np.x, frame.cols - 1));
					np.y = ::max(0, ::min(np.y, frame.rows - 1));

					// chooses the value to be replaced randomly
					int k = nextRand(localState) % c_nbSamples;
					samples(k * frame.rows + np.y, np.x) = cvt(imgPix);

			randStates(y, x) = localState;

		void update_caller(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream)
			dim3 block(32, 8);
			dim3 grid(divUp(frame.cols, block.x), divUp(frame.rows, block.y));

			cudaSafeCall( cudaFuncSetCacheConfig(update, cudaFuncCachePreferL1) );

			update<<>>((PtrStepSz) frame, fgmask, (PtrStepSz) samples, randStates);
			cudaSafeCall( cudaGetLastError() );

			if (stream == 0)
				cudaSafeCall( cudaDeviceSynchronize() );

		void update_gpu(PtrStepSzb frame, int cn, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream)
			typedef void (*func_t)(PtrStepSzb frame, PtrStepSzb fgmask, PtrStepSzb samples, PtrStepSz randStates, cudaStream_t stream);
			static const func_t funcs[] =
				0, update_caller, 0, update_caller, update_caller

			funcs[cn](frame, fgmask, samples, randStates, stream);
