如下:
// This file ispart of the OpenCV project.
// It is subject to the license terms in the LICENSEfile found in the top-level directory
// of this distribution and athttp://opencv.org/license.html.
//
//[1] David S. Bolme et al. "Visual Object Trackingusing Adaptive Correlation Filters"
// http://www.cs.colostate.edu/~draper/papers/bolme_cvpr10.pdf
//
//
// credits:
// Kun-Hsin Chen: for initial c++ code
// Cracki: for the idea of only converting the usedpatch to gray
//
#include "opencv2/tracking.hpp"
namespace cv {
namespace tracking {
struct DummyModel :TrackerModel
{
virtual void modelUpdateImpl(){}
virtual void modelEstimationImpl( const std::vector& ){}
};
const double eps=0.00001; // fornormalization
const double rate=0.2; //learning rate
const double psrThreshold=5.7; //no detection, if PSR is smaller than this
struct MosseImpl :TrackerMOSSE
{
protected:
Point2d center;//center of the bounding box
Size size; //size ofthe bounding box
Mat hanWin;
Mat G; //goal
Mat H, A,B; //state
// Element-wisedivision of complex numbers in src1 and src2
Mat divDFTs( const Mat &src1, const Mat &src2 ) const
{
Mat c1[2],c2[2],a1,a2,s1,s2,denom,re,im;
// split into re and im per src
cv::split(src1, c1);
cv::split(src2, c2);
// (Re2*Re2 + Im2*Im2) = denom
// denom is same forboth channels
cv::multiply(c2[0], c2[0], s1);
cv::multiply(c2[1], c2[1], s2);
cv::add(s1,s2, denom);
// (Re1*Re2 + Im1*Im1)/(Re2*Re2 + Im2*Im2) = Re
cv::multiply(c1[0], c2[0], a1);
cv::multiply(c1[1], c2[1], a2);
cv::divide(a1+a2, denom, re, 1.0 );
// (Im1*Re2 - Re1*Im2)/(Re2*Re2 + Im2*Im2) = Im
cv::multiply(c1[1], c2[0], a1);
cv::multiply(c1[0], c2[1], a2);
cv::divide(a1+a2, denom, im, -1.0);
// Merge Re and Im back into a complex matrix
Mat dst,chn[] = {re,im};
cv::merge(chn, 2, dst);
return dst;
}
void preProcess( Mat &window) const
{
window.convertTo(window, CV_32F);
log(window+ 1.0f, window);
//normalize
Scalarmean,StdDev;
meanStdDev(window, mean, StdDev);
window =(window-mean[0]) /(StdDev[0]+eps);
//Gaussain weighting
window =window.mul(hanWin);
}
double correlate( const Mat &image_sub, Point&delta_xy ) const//计算相对位移
{
MatIMAGE_SUB, RESPONSE, response;
// filter in dft space
dft(image_sub, IMAGE_SUB, DFT_COMPLEX_OUTPUT);
mulSpectrums(IMAGE_SUB, H, RESPONSE, 0, true );
idft(RESPONSE, response, DFT_SCALE|DFT_REAL_OUTPUT);
// update center position
double maxVal; Point maxLoc;
minMaxLoc(response, 0, &maxVal, 0, &maxLoc);
delta_xy.x= maxLoc.x - int(response.size().width/2);
delta_xy.y= maxLoc.y - int(response.size().height/2);
// normalize response
Scalarmean,std;
meanStdDev(response, mean, std);
return (maxVal-mean[0]) / (std[0]+eps); // PSR
}
Mat randWarp( const Mat& a ) const
{
cv::RNGrng(8031965);
// random rotation
double C=0.1;
double ang = rng.uniform(-C,C);
double c=cos(ang), s=sin(ang);
// affine warp matrix
Mat_ W(2,3);
W < center_warp(2, 1);
center_warp << a.cols/2, a.rows/2;
W.col(2) = center_warp - (W.colRange(0, 2))*center_warp;
Mat warped;
warpAffine(a, warped, W, a.size(), BORDER_REFLECT);
return warped;
}
virtual bool initImpl( const Mat& image, const Rect2d& boundingBox )
{
model =makePtr();
Mat img;
if (image.channels() == 1)
img =image;
else
cvtColor(image, img, COLOR_BGR2GRAY);
int w = getOptimalDFTSize(int(boundingBox.width));
int h = getOptimalDFTSize(int(boundingBox.height));
//Get the center position
int x1 = int(floor((2*boundingBox.x+boundingBox.width-w)/2));
int y1 = int(floor((2*boundingBox.y+boundingBox.height-h)/2));
center.x =x1 + (w)/2;
center.y =y1 + (h)/2;
size.width= w;
size.height= h;
Mat window;
getRectSubPix(img, size, center, window);
createHanningWindow(hanWin, size, CV_32F);
// goal
Matg=Mat::zeros(size,CV_32F);
g.at(h/2, w/2) = 1;
GaussianBlur(g, g, Size(-1,-1),2.0);
double maxVal;
minMaxLoc(g, 0,&maxVal);
g = g /maxVal;
dft(g, G,DFT_COMPLEX_OUTPUT);
// initial A,B and H
A =Mat::zeros(G.size(), G.type());
B =Mat::zeros(G.size(), G.type());
for(int i=0; i<8; i++)
{
Matwindow_warp = randWarp(window);
preProcess(window_warp);
MatWINDOW_WARP, A_i, B_i;
dft(window_warp, WINDOW_WARP, DFT_COMPLEX_OUTPUT);
mulSpectrums(G ,WINDOW_WARP, A_i, 0, true);
mulSpectrums(WINDOW_WARP, WINDOW_WARP, B_i, 0, true);
A+=A_i;
B+=B_i;
}
H =divDFTs(A,B);
return true;
}
virtual bool updateImpl( const Mat& image,Rect2d& boundingBox )
{
if (H.empty()) // not initialized
return false;
Matimage_sub;
getRectSubPix(image, size, center, image_sub);
if (image_sub.channels() != 1)
cvtColor(image_sub, image_sub, COLOR_BGR2GRAY);
preProcess(image_sub);
Pointdelta_xy;
double PSR =correlate(image_sub, delta_xy);
if (PSR < psrThreshold)
return false;
//update location
center.x +=delta_xy.x;
center.y +=delta_xy.y;
Matimg_sub_new;
getRectSubPix(image, size, center, img_sub_new);
if (img_sub_new.channels() !=1)
cvtColor(img_sub_new, img_sub_new, COLOR_BGR2GRAY);
preProcess(img_sub_new);
// new state for A and B
Mat F, A_new,B_new;
dft(img_sub_new, F, DFT_COMPLEX_OUTPUT);
mulSpectrums(G, F, A_new, 0, true );
mulSpectrums(F, F, B_new, 0, true );
// update A ,B, and H
A = A*(1-rate) + A_new*rate;
B = B*(1-rate) + B_new*rate;
H =divDFTs(A, B);
// return tracked rect
double x=center.x, y=center.y;
int w = size.width,h=size.height;
boundingBox= Rect2d(Point2d(x-0.5*w,y-0.5*h), Point2d(x+0.5*w, y+0.5*h));
return true;
}
public:
MosseImpl() {isInit = 0; }
// dummy implementation.
virtual void read( const FileNode& ){}
virtual void write( FileStorage& ) const{}
}; // MosseImpl
} // tracking
Ptr TrackerMOSSE::create()
{
returnmakePtr();
}
} // cv
函数完成两个复数Mat相除的计算,即:
$$(src1/src2).re=(src1.re^2+src2.re^2)/(src2.re^2+src2.im^2);\\(src1/src2).im=(-src1.re*src2.im+src2.re*src1.im)/(src2.re^2+src2.im^2);\qquad\text{(undefined)}$$
是对图像预处理,MOSSE原文讲到: One issue with the FFTconvolution algorithm is that the image and the filter are mapped to thetopological structure of a torus. In other words, it connects the left edge ofthe image to the right edge, and the top to the bottom. During convolution, theimages rotate through the toroidal space instead of translating as they wouldin the spatial domain. Artificially connecting the boundaries of the imageintroducesan artifact which effects the correlation output.
This effect is reduced by following thepreprocessing steps outlined in [3]. First, the pixel values are transformedusing a log function which helps with low contrast lighting situations. Thepixel values are normalized to have a mean value of 0:0 and a norm of 1:0.Finally, the image is multiplied by a cosine window which gradually reduces thepixel values near the edge to zero. This also has the benefit that it puts moreemphasis near the center of the target.
原始像素值通过一个对数函数转换,这将有助于低对比度照明的情况。这个原始的像素值就被规范化为0.0和1.0。最终,图像乘上一个余弦窗口将是靠近图像边缘的像素值接近于零。这同样有利于突出靠近中心的目标。
此函数先对当前帧用上一帧得到的滤波器模版H去卷积得到响应Response,对Response进行反傅里叶变换后找到峰值,更新峰值点(目标所在点),返回峰值旁瓣比,若峰值旁瓣比小代表未能检测出目标。
此函数对图像进行随机仿射变换。
此函数是初始化A,B,H的值,先是将图像转化为灰度图,然后以目标框(boundingbox)的中心作傅里叶变换,目标框的初始傅里叶变换值G是将中间值设为1然后作高斯滤波得到,之后就多次带入如下公式对A,B初始化,初始化过程中学习速率η为1
先是找到峰值作为新的中心点,然后按照上个公式更新A,B,H。
http://www.cs.colostate.edu/~draper/papers/bolme_cvpr10.pdf