Correlation Filter in Visual Tracking

     涉及两篇论文:Visual Object Tracking using Adaptive Correlation Filters 和Fast Visual Tracking via Dense Spatio-Temporal Context Learning

     可参考这位博主笔记:http://www.cnblogs.com/hanhuili/p/4266990.html

     第一篇我说下自己的理解:训练时的输出都认为是高斯形状,因为这种形状符合PSR。

                              训练得到模板后开始跟踪,由输出继续按照新的规则更校模板,进行跟踪。

      第二篇主要用到了上下文的信息,通过背景信息来确定目标的位置。可参考这篇博文:http://blog.csdn.net/zouxy09/article/details/16889905,博主还将其用C++实现了,很有启发性。

        STCTracker.h

Correlation Filter in Visual Tracking
// Fast object tracking algorithm  

// Author : zouxy  

// Date   : 2013-11-21  

// HomePage : http://blog.csdn.net/zouxy09  

// Email  : [email protected]  

// Reference: Kaihua Zhang, et al. Fast Tracking via Spatio-Temporal Context Learning  

// HomePage : http://www4.comp.polyu.edu.hk/~cskhzhang/  

// Email: [email protected]   

#pragma once  

  

#include <opencv2/opencv.hpp>  

  

using namespace cv;  

using namespace std;  

  

class STCTracker  

{  

public:  

    STCTracker();  

    ~STCTracker();  

    void init(const Mat frame, const Rect box);   

    void tracking(const Mat frame, Rect &trackBox);  

  

private:  

    void createHammingWin();  

    void complexOperation(const Mat src1, const Mat src2, Mat &dst, int flag = 0);  

    void getCxtPriorPosteriorModel(const Mat image);  

    void learnSTCModel(const Mat image);  

  

private:  

    double sigma;           // scale parameter (variance)  

    double alpha;           // scale parameter  

    double beta;            // shape parameter  

    double rho;             // learning parameter  

    Point center;           // the object position  

    Rect cxtRegion;         // context region  

      

    Mat cxtPriorPro;        // prior probability  

    Mat cxtPosteriorPro;    // posterior probability  

    Mat STModel;            // conditional probability  

    Mat STCModel;           // spatio-temporal context model  

    Mat hammingWin;         // Hamming window  

};  
View Code

       STCTracker.cpp

Correlation Filter in Visual Tracking
// Fast object tracking algorithm  

// Author : zouxy  

// Date   : 2013-11-21  

// HomePage : http://blog.csdn.net/zouxy09  

// Email  : [email protected]  

// Reference: Kaihua Zhang, et al. Fast Tracking via Spatio-Temporal Context Learning  

// HomePage : http://www4.comp.polyu.edu.hk/~cskhzhang/  

// Email: [email protected]   

  

#include "STCTracker.h"  

  

STCTracker::STCTracker()  

{  

      

}  

  

STCTracker::~STCTracker()  

{  

  

}  

  

/************ Create a Hamming window ********************/  

void STCTracker::createHammingWin()  

{  

    for (int i = 0; i < hammingWin.rows; i++)  

    {  

        for (int j = 0; j < hammingWin.cols; j++)  

        {  

            hammingWin.at<double>(i, j) = (0.54 - 0.46 * cos( 2 * CV_PI * i / hammingWin.rows ))   

                                        * (0.54 - 0.46 * cos( 2 * CV_PI * j / hammingWin.cols ));  

        }  

    }  

}  

  

/************ Define two complex-value operation *****************/  

void STCTracker::complexOperation(const Mat src1, const Mat src2, Mat &dst, int flag)  

{  

    CV_Assert(src1.size == src2.size);  

    CV_Assert(src1.channels() == 2);  

  

    Mat A_Real, A_Imag, B_Real, B_Imag, R_Real, R_Imag;  

    vector<Mat> planes;  

    split(src1, planes);  

    planes[0].copyTo(A_Real);  

    planes[1].copyTo(A_Imag);  

      

    split(src2, planes);  

    planes[0].copyTo(B_Real);  

    planes[1].copyTo(B_Imag);  

      

    dst.create(src1.rows, src1.cols, CV_64FC2);  

    split(dst, planes);  

    R_Real = planes[0];  

    R_Imag = planes[1];  

      

    for (int i = 0; i < A_Real.rows; i++)  

    {  

        for (int j = 0; j < A_Real.cols; j++)  

        {  

            double a = A_Real.at<double>(i, j);  

            double b = A_Imag.at<double>(i, j);  

            double c = B_Real.at<double>(i, j);  

            double d = B_Imag.at<double>(i, j);  

  

            if (flag)  

            {  

                // division: (a+bj) / (c+dj)  

                R_Real.at<double>(i, j) = (a * c + b * d) / (c * c + d * d + 0.000001);  

                R_Imag.at<double>(i, j) = (b * c - a * d) / (c * c + d * d + 0.000001);  

            }  

            else  

            {  

                // multiplication: (a+bj) * (c+dj)  

                R_Real.at<double>(i, j) = a * c - b * d;  

                R_Imag.at<double>(i, j) = b * c + a * d;  

            }  

        }  

    }  

    merge(planes, dst);  

}  

  

/************ Get context prior and posterior probability ***********/  

void STCTracker::getCxtPriorPosteriorModel(const Mat image)  

{  

    CV_Assert(image.size == cxtPriorPro.size);  

  

    double sum_prior(0), sum_post(0);  

    for (int i = 0; i < cxtRegion.height; i++)  

    {  

        for (int j = 0; j < cxtRegion.width; j++)  

        {  

            double x = j + cxtRegion.x;  

            double y = i + cxtRegion.y;  

            double dist = sqrt((center.x - x) * (center.x - x) + (center.y - y) * (center.y - y));  

  

            // equation (5) in the paper  

            cxtPriorPro.at<double>(i, j) = exp(- dist * dist / (2 * sigma * sigma));  

            sum_prior += cxtPriorPro.at<double>(i, j);  

  

            // equation (6) in the paper  

            cxtPosteriorPro.at<double>(i, j) = exp(- pow(dist / sqrt(alpha), beta));  

            sum_post += cxtPosteriorPro.at<double>(i, j);  

        }  

    }  

    cxtPriorPro.convertTo(cxtPriorPro, -1, 1.0/sum_prior);  

    cxtPriorPro = cxtPriorPro.mul(image);  

    cxtPosteriorPro.convertTo(cxtPosteriorPro, -1, 1.0/sum_post);  

}  

  

/************ Learn Spatio-Temporal Context Model ***********/  

void STCTracker::learnSTCModel(const Mat image)  

{  

    // step 1: Get context prior and posterior probability  

    getCxtPriorPosteriorModel(image);  

      

    // step 2-1: Execute 2D DFT for prior probability  

    Mat priorFourier;  

    Mat planes1[] = {cxtPriorPro, Mat::zeros(cxtPriorPro.size(), CV_64F)};  

    merge(planes1, 2, priorFourier);  

    dft(priorFourier, priorFourier);  

  

    // step 2-2: Execute 2D DFT for posterior probability  

    Mat postFourier;  

    Mat planes2[] = {cxtPosteriorPro, Mat::zeros(cxtPosteriorPro.size(), CV_64F)};  

    merge(planes2, 2, postFourier);  

    dft(postFourier, postFourier);  

  

    // step 3: Calculate the division  

    Mat conditionalFourier;  

    complexOperation(postFourier, priorFourier, conditionalFourier, 1);  

  

    // step 4: Execute 2D inverse DFT for conditional probability and we obtain STModel  

    dft(conditionalFourier, STModel, DFT_INVERSE | DFT_REAL_OUTPUT | DFT_SCALE);  

  

    // step 5: Use the learned spatial context model to update spatio-temporal context model  

    addWeighted(STCModel, 1.0 - rho, STModel, rho, 0.0, STCModel);  

}  

  

/************ Initialize the hyper parameters and models ***********/  

void STCTracker::init(const Mat frame, const Rect box)  

{  

    // initial some parameters  

    alpha = 2.25;  

    beta = 1;  

    rho = 0.075;  

    sigma = 0.5 * (box.width + box.height);  

  

    // the object position  

    center.x = box.x + 0.5 * box.width;  

    center.y = box.y + 0.5 * box.height;  

  

    // the context region  

    cxtRegion.width = 2 * box.width;  

    cxtRegion.height = 2 * box.height;  

    cxtRegion.x = center.x - cxtRegion.width * 0.5;  

    cxtRegion.y = center.y - cxtRegion.height * 0.5;  

    cxtRegion &= Rect(0, 0, frame.cols, frame.rows);  

  

    // the prior, posterior and conditional probability and spatio-temporal context model  

    cxtPriorPro = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);  

    cxtPosteriorPro = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);  

    STModel = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);  

    STCModel = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);  

  

    // create a Hamming window  

    hammingWin = Mat::zeros(cxtRegion.height, cxtRegion.width, CV_64FC1);  

    createHammingWin();  

  

    Mat gray;  

    cvtColor(frame, gray, CV_RGB2GRAY);  

  

    // normalized by subtracting the average intensity of that region  

    Scalar average = mean(gray(cxtRegion));  

    Mat context;  

    gray(cxtRegion).convertTo(context, CV_64FC1, 1.0, - average[0]);  

  

    // multiplies a Hamming window to reduce the frequency effect of image boundary  

    context = context.mul(hammingWin);  

  

    // learn Spatio-Temporal context model from first frame  

    learnSTCModel(context);  

}  

  

/******** STCTracker: calculate the confidence map and find the max position *******/  

void STCTracker::tracking(const Mat frame, Rect &trackBox)  

{  

    Mat gray;  

    cvtColor(frame, gray, CV_RGB2GRAY);  

  

    // normalized by subtracting the average intensity of that region  

    Scalar average = mean(gray(cxtRegion));  

    Mat context;  

    gray(cxtRegion).convertTo(context, CV_64FC1, 1.0, - average[0]);  

  

    // multiplies a Hamming window to reduce the frequency effect of image boundary  

    context = context.mul(hammingWin);  

  

    // step 1: Get context prior probability  

    getCxtPriorPosteriorModel(context);  

  

    // step 2-1: Execute 2D DFT for prior probability  

    Mat priorFourier;  

    Mat planes1[] = {cxtPriorPro, Mat::zeros(cxtPriorPro.size(), CV_64F)};  

    merge(planes1, 2, priorFourier);  

    dft(priorFourier, priorFourier);  

  

    // step 2-2: Execute 2D DFT for conditional probability  

    Mat STCModelFourier;  

    Mat planes2[] = {STCModel, Mat::zeros(STCModel.size(), CV_64F)};  

    merge(planes2, 2, STCModelFourier);  

    dft(STCModelFourier, STCModelFourier);  

  

    // step 3: Calculate the multiplication  

    Mat postFourier;  

    complexOperation(STCModelFourier, priorFourier, postFourier, 0);  

  

    // step 4: Execute 2D inverse DFT for posterior probability namely confidence map  

    Mat confidenceMap;  

    dft(postFourier, confidenceMap, DFT_INVERSE | DFT_REAL_OUTPUT| DFT_SCALE);  

  

    // step 5: Find the max position  

    Point point;  

    minMaxLoc(confidenceMap, 0, 0, 0, &point);  

  

    // step 6-1: update center, trackBox and context region  

    center.x = cxtRegion.x + point.x;  

    center.y = cxtRegion.y + point.y;  

    trackBox.x = center.x - 0.5 * trackBox.width;  

    trackBox.y = center.y - 0.5 * trackBox.height;  

    trackBox &= Rect(0, 0, frame.cols, frame.rows);  

  

    cxtRegion.x = center.x - cxtRegion.width * 0.5;  

    cxtRegion.y = center.y - cxtRegion.height * 0.5;  

    cxtRegion &= Rect(0, 0, frame.cols, frame.rows);  

  

    // step 7: learn Spatio-Temporal context model from this frame for tracking next frame  

    average = mean(gray(cxtRegion));  

    gray(cxtRegion).convertTo(context, CV_64FC1, 1.0, - average[0]);  

    context = context.mul(hammingWin);  

    learnSTCModel(context);  

}  
View Code

     runTracker.cpp

    

Correlation Filter in Visual Tracking
// Fast object tracking algorithm  

// Author : zouxy  

// Date   : 2013-11-21  

// HomePage : http://blog.csdn.net/zouxy09  

// Email  : [email protected]  

// Reference: Kaihua Zhang, et al. Fast Tracking via Spatio-Temporal Context Learning  

// HomePage : http://www4.comp.polyu.edu.hk/~cskhzhang/  

// Email: [email protected]   

  

#include "STCTracker.h"  

  

// Global variables  

Rect box;  

bool drawing_box = false;  

bool gotBB = false;  

  

// bounding box mouse callback  

void mouseHandler(int event, int x, int y, int flags, void *param){  

  switch( event ){  

  case CV_EVENT_MOUSEMOVE:  

    if (drawing_box){  

        box.width = x-box.x;  

        box.height = y-box.y;  

    }  

    break;  

  case CV_EVENT_LBUTTONDOWN:  

    drawing_box = true;  

    box = Rect( x, y, 0, 0 );  

    break;  

  case CV_EVENT_LBUTTONUP:  

    drawing_box = false;  

    if( box.width < 0 ){  

        box.x += box.width;  

        box.width *= -1;  

    }  

    if( box.height < 0 ){  

        box.y += box.height;  

        box.height *= -1;  

    }  

    gotBB = true;  

    break;  

  }  

}  

  

int main(int argc, char * argv[])  

{  

    VideoCapture capture;  

    capture.open("handwave.wmv");  

    bool fromfile = true;  

  

    if (!capture.isOpened())  

    {  

        cout << "capture device failed to open!" << endl;  

        return -1;  

    }  

    //Register mouse callback to draw the bounding box  

    cvNamedWindow("Tracker", CV_WINDOW_AUTOSIZE);  

    cvSetMouseCallback("Tracker", mouseHandler, NULL );   

  

    Mat frame;  

    capture >> frame;  

    while(!gotBB)  

    {  

        if (!fromfile)  

            capture >> frame;  

  

        imshow("Tracker", frame);  

        if (cvWaitKey(20) == 27)  

            return 1;  

    }  

    //Remove callback  

    cvSetMouseCallback("Tracker", NULL, NULL );   

      

    STCTracker stcTracker;  

    stcTracker.init(frame, box);  

  

    int frameCount = 0;  

    while (1)  

    {  

        capture >> frame;  

        if (frame.empty())  

            return -1;  

        double t = (double)cvGetTickCount();  

        frameCount++;  

  

        // tracking  

        stcTracker.tracking(frame, box);      

  

        // show the result  

        stringstream buf;  

        buf << frameCount;  

        string num = buf.str();  

        putText(frame, num, Point(20, 30), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 0, 255), 3);  

        rectangle(frame, box, Scalar(0, 0, 255), 3);  

        imshow("Tracker", frame);  

  

        t = (double)cvGetTickCount() - t;  

        cout << "cost time: " << t / ((double)cvGetTickFrequency()*1000.) << endl;  

  

        if ( cvWaitKey(1) == 27 )  

            break;  

    }  

  

    return 0;  

}  
View Code

              这篇论文的原码作者已经看出,非常的简洁,有空再研究。

               文中还将生成模型和判别模型进行了对比。生成模型一般就是学习一个外表模型来表示目标,然后寻找最匹配的图像区域。

               判别模型把跟踪作为一个分类问题,评估目标和背景的决策边界。

 

   这篇文章Real-time part-based visual tracking via adaptive correlation filter

   是15年CVPR上的一篇文章,可参考这位博主的翻译,翻译的还不错:http://blog.csdn.net/tjdatamining/article/details/45577045

   主要思想是将目标分成多个部分进行跟踪,给不同的部分分配不同的权值。每个跟踪器都是KCF,文章的重点在于如何联合各个部分的置信map,如何给各个部分分配不同的权值。

   还有就是提出了评估相邻帧平滑程度的SCCM,也是个创新点。这篇文章刚开始看的时候真是云里雾里,多看几遍就能大致明白了。

  

  这篇论文:High-Speed Tracking with Kernelized Correlation Filters也是2015年的一篇论文,用到了很多没有学过的知识,可参考:http://blog.csdn.net/carrierlxksuper/article/details/46461245。

  看完别人的笔记仍然不能理解,在知识环节中缺了很多环,准备先补作者在12年的一篇论文Exploiting the Circulant Structure of Tracking-by-Detection with Kernel


  其中提到了redge regression(岭回归),可参考:http://www.cnblogs.com/zhangchaoyang/articles/2802806.html

  

  

你可能感兴趣的:(filter)