Struck跟踪算法介绍及代码解读(三)

博客:http://blog.csdn.net/qianxin_dh


      
        本人水平有限,代码理解难免有不正确的地方,希望大家能够见谅。
  
main.cpp
/* 
* Struck: Structured Output Tracking with Kernels
* 
* Code to accompany the paper:
*   Struck: Structured Output Tracking with Kernels
*   Sam Hare, Amir Saffari, Philip H. S. Torr
*   International Conference on Computer Vision (ICCV), 2011
* 
* Copyright (C) 2011 Sam Hare, Oxford Brookes University, Oxford, UK
* 
* This file is part of Struck.
* 
* Struck is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
* 
* Struck is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
* GNU General Public License for more details.
* 
* You should have received a copy of the GNU General Public License
* along with Struck.  If not, see <http://www.gnu.org/licenses/>.
* 
*/

#include "Tracker.h"
#include "Config.h"

#include <iostream>
#include <fstream>

#include <opencv/cv.h>
#include <opencv/highgui.h>

#include "vot.hpp"

using namespace std;
using namespace cv;

static const int kLiveBoxWidth = 80;       
static const int kLiveBoxHeight = 80;      

void rectangle(Mat& rMat, const FloatRect& rRect, const Scalar& rColour)
{
	IntRect r(rRect);
	rectangle(rMat, Point(r.XMin(), r.YMin()), Point(r.XMax(), r.YMax()), rColour);
}

int main(int argc, char* argv[])
{
	// 读取文件对程序参数进行初始化
	string configPath = "config.txt";
	if (argc > 1)
	{
		configPath = argv[1];
	}
	Config conf(configPath);       //Config类主要读取config.txt中的参数

	if (conf.features.size() == 0)
	{
		cout << "error: no features specified in config" << endl;
		return EXIT_FAILURE;
	}

	Tracker tracker(conf);   

	//Check if --challenge was passed as an argument
	bool challengeMode = false;
	for (int i = 1; i < argc; i++) {
		if (strcmp("--challenge", argv[i]) == 0) {        //判断是否有挑战模式(vot挑战)
			challengeMode = true;
		}
	}

	if (challengeMode) {    //VOT(Visual object tracking)挑战,它提供了一个公共平台,目标是比较各种跟踪算法再短期跟踪内的性能,讨论视觉跟踪领域的发展。
		//load region, images and prepare for output
		Mat frameOrig;
		Mat frame;
		VOT vot_io("region.txt", "images.txt", "output.txt");
		vot_io.getNextImage(frameOrig);
		resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));
		cv::Rect initPos = vot_io.getInitRectangle();
		vot_io.outputBoundingBox(initPos);
		float scaleW = (float)conf.frameWidth/frameOrig.cols;
		float scaleH = (float)conf.frameHeight/frameOrig.rows;

		FloatRect initBB_vot = FloatRect(initPos.x*scaleW, initPos.y*scaleH, initPos.width*scaleW, initPos.height*scaleH);
		tracker.Initialise(frame, initBB_vot);

		while (vot_io.getNextImage(frameOrig) == 1){
			resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));

			tracker.Track(frame);
			const FloatRect& bb = tracker.GetBB();
			float x = bb.XMin()/scaleW;
			float y = bb.YMin()/scaleH;
			float w = bb.Width()/scaleW;
			float h = bb.Height()/scaleH;

			cv::Rect output = cv::Rect(x,y,w,h);

			vot_io.outputBoundingBox(output);
		}

		return 0;
	}

	
	ofstream outFile;
	if (conf.resultsPath != "")
	{
		outFile.open(conf.resultsPath.c_str(), ios::out);    //将程序写入resultpath
		if (!outFile)
		{
			cout << "error: could not open results file: " << conf.resultsPath << endl;
			return EXIT_FAILURE;
		}
	}

	// if no sequence specified then use the camera
	bool useCamera = (conf.sequenceName == "");

	VideoCapture cap;

	int startFrame = -1;
	int endFrame = -1;
	FloatRect initBB;
	string imgFormat;
	float scaleW = 1.f;
	float scaleH = 1.f;

	if (useCamera)
	{
		if (!cap.open(0))
		{
			cout << "error: could not start camera capture" << endl;
			return EXIT_FAILURE;
		}
		startFrame = 0;
		endFrame = INT_MAX;        /* maximum (signed) int value */
		Mat tmp;
		cap >> tmp;
		scaleW = (float)conf.frameWidth/tmp.cols;
		scaleH = (float)conf.frameHeight/tmp.rows;

		initBB = IntRect(conf.frameWidth/2-kLiveBoxWidth/2, conf.frameHeight/2-kLiveBoxHeight/2, kLiveBoxWidth, kLiveBoxHeight);
		cout << "press 'i' to initialise tracker" << endl;
	}
	else
	{
		// parse frames file
		string framesFilePath = conf.sequenceBasePath+"/"+conf.sequenceName+"/"+conf.sequenceName+"_frames.txt";  //girl_frames.txt的文件路径,该文件放在girl文件夹里,内容为0,501。
		ifstream framesFile(framesFilePath.c_str(), ios::in);   
		if (!framesFile)
		{
			cout << "error: could not open sequence frames file: " << framesFilePath << endl;
			return EXIT_FAILURE;
		}
	
		string framesLine;
		getline(framesFile, framesLine);
		sscanf(framesLine.c_str(), "%d,%d", &startFrame, &endFrame);   //startFrame=0;endFrame=501;

		if (framesFile.fail() || startFrame == -1 || endFrame == -1)   
		{
			cout << "error: could not parse sequence frames file" << endl;
			return EXIT_FAILURE;
		}

		imgFormat = conf.sequenceBasePath+"/"+conf.sequenceName+"/imgs/img%05d.png";

		// read first frame to get size
		char imgPath[256];
		sprintf(imgPath, imgFormat.c_str(), startFrame);  //sprintf把格式化的数据写入某个字符串缓冲区(imgPath);
		Mat tmp = cv::imread(imgPath, 0);
		scaleW = (float)conf.frameWidth/tmp.cols;   //=1;
		scaleH = (float)conf.frameHeight/tmp.rows; //=1;

		// read init box from ground truth file
		string gtFilePath = conf.sequenceBasePath+"/"+conf.sequenceName+"/"+conf.sequenceName+"_gt.txt";  //读取girl_gt.txt文件
		ifstream gtFile(gtFilePath.c_str(), ios::in);
		if (!gtFile)
		{
			cout << "error: could not open sequence gt file: " << gtFilePath << endl;
			return EXIT_FAILURE;
		}

	
		string gtLine;
		getline(gtFile, gtLine);
		float xmin = -1.f;
		float ymin = -1.f;
		float width = -1.f;
		float height = -1.f;
		sscanf(gtLine.c_str(), "%f,%f,%f,%f", &xmin, &ymin, &width, &height);  //128,46,104,127

		if (gtFile.fail() || xmin < 0.f || ymin < 0.f || width < 0.f || height < 0.f)
		{
			cout << "error: could not parse sequence gt file" << endl;
			return EXIT_FAILURE;
		}
		initBB = FloatRect(xmin*scaleW, ymin*scaleH, width*scaleW, height*scaleH);
	}

	if (!conf.quietMode)
	{
		namedWindow("result");
	}

	Mat result(conf.frameHeight, conf.frameWidth, CV_8UC3);
	bool paused = false;
	bool doInitialise = false;
	srand(conf.seed);

	for (int frameInd = startFrame; frameInd <= endFrame; ++frameInd)    //逐帧处理
	{
		Mat frame;
		if (useCamera)   //若使用摄像头
		{
			Mat frameOrig;
			cap >> frameOrig;
			resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));
			flip(frame, frame, 1);
			frame.copyTo(result);
			if (doInitialise)
			{
				if (tracker.IsInitialised())
				{
					tracker.Reset();
				}
				else
				{
					tracker.Initialise(frame, initBB);
				}
				doInitialise = false;
			}
			else if (!tracker.IsInitialised())
			{
				rectangle(result, initBB, CV_RGB(255, 255, 255));
			}
		}
		else    //若读取图片序列
		{			
			char imgPath[256];
			sprintf(imgPath, imgFormat.c_str(), frameInd);
			Mat frameOrig = cv::imread(imgPath, 0);
			if (frameOrig.empty())
			{
				cout << "error: could not read frame: " << imgPath << endl;
				return EXIT_FAILURE;
			}

			resize(frameOrig, frame, Size(conf.frameWidth, conf.frameHeight));  //将读取的每帧图像统一为320*240;
			cvtColor(frame, result, CV_GRAY2RGB);
                        
                                                    if (frameInd == startFrame)
			{
				tracker.Initialise(frame, initBB);                 //对第一帧进行初始化
			}
		}

		if (tracker.IsInitialised()) 
		{
			tracker.Track(frame);                     //开始跟踪

			if (!conf.quietMode && conf.debugMode)
			{
				tracker.Debug();  //用于显示样本图像
			}

			rectangle(result, tracker.GetBB(), CV_RGB(0, 255, 0));

			if (outFile)
			{
				const FloatRect& bb = tracker.GetBB();
				outFile << bb.XMin()/scaleW << "," << bb.YMin()/scaleH << "," << bb.Width()/scaleW << "," << bb.Height()/scaleH << endl;
			}   //输出跟踪结果坐标
		}

		if (!conf.quietMode)
		{
			imshow("result", result);   //显示跟踪画面
			int key = waitKey(paused ? 0 : 1);

			if (key != -1)
			{
				if (key == 27 || key == 113) // esc q
				{
					break;
				}
				else if (key == 112) // p
				{
					paused = !paused;
				}
				else if (key == 105 && useCamera)
				{
					doInitialise = true;
				}
			}
			if (conf.debugMode && frameInd == endFrame)
			{
				cout << "\n\nend of sequence, press any key to exit" << endl;
				waitKey();
			}
		}
	}

	if (outFile.is_open())
	{
		outFile.close();
	}

	return EXIT_SUCCESS;
}




Tracker.cpp
#include "Tracker.h"
#include "Config.h"
#include "ImageRep.h"
#include "Sampler.h"
#include "Sample.h"
#include "GraphUtils/GraphUtils.h"

#include "HaarFeatures.h"
#include "RawFeatures.h"
#include "HistogramFeatures.h"
#include "MultiFeatures.h"

#include "Kernels.h"

#include "LaRank.h"

#include <opencv/cv.h>
#include <opencv/highgui.h>

#include <Eigen/Core>

#include <vector>
#include <algorithm>

using namespace cv;
using namespace std;
using namespace Eigen;

Tracker::Tracker(const Config& conf) :      //构造函数,对参数进行初始化
	m_config(conf),
	m_initialised(false),
	m_pLearner(0),
	m_debugImage(2*conf.searchRadius+1, 2*conf.searchRadius+1, CV_32FC1),
	m_needsIntegralImage(false)
{
	Reset();
}

Tracker::~Tracker()
{
	delete m_pLearner;
	for (int i = 0; i < (int)m_features.size(); ++i)
	{
		delete m_features[i];
		delete m_kernels[i];
	}
}

void Tracker::Reset()               //因为初始化为haar特征核高斯核函数,所以m_needsIntegralImage = true,m_needsIntegralHist = false;
{
	m_initialised = false;
	m_debugImage.setTo(0);
	if (m_pLearner) delete m_pLearner;
	for (int i = 0; i < (int)m_features.size(); ++i)
	{
		delete m_features[i];
		delete m_kernels[i];
	}
	m_features.clear();
	m_kernels.clear();
	
	m_needsIntegralImage = false;
	m_needsIntegralHist = false;
	
	int numFeatures = m_config.features.size();
	vector<int> featureCounts;
	for (int i = 0; i < numFeatures; ++i)
	{
		switch (m_config.features[i].feature)
		{
		case Config::kFeatureTypeHaar:
			m_features.push_back(new HaarFeatures(m_config));
			m_needsIntegralImage = true;
			break;			
		case Config::kFeatureTypeRaw:
			m_features.push_back(new RawFeatures(m_config));
			break;
		case Config::kFeatureTypeHistogram:
			m_features.push_back(new HistogramFeatures(m_config));
			m_needsIntegralHist = true;
			break;
		}
		featureCounts.push_back(m_features.back()->GetCount());
		
		switch (m_config.features[i].kernel)
		{
		case Config::kKernelTypeLinear:
			m_kernels.push_back(new LinearKernel());
			break;
		case Config::kKernelTypeGaussian:
			m_kernels.push_back(new GaussianKernel(m_config.features[i].params[0]));
			break;
		case Config::kKernelTypeIntersection:
			m_kernels.push_back(new IntersectionKernel());
			break;
		case Config::kKernelTypeChi2:
			m_kernels.push_back(new Chi2Kernel());
			break;
		}
	}
	
	if (numFeatures > 1)
	{
		MultiFeatures* f = new MultiFeatures(m_features);
		m_features.push_back(f);
		
		MultiKernel* k = new MultiKernel(m_kernels, featureCounts);
		m_kernels.push_back(k);		
	}
	
	m_pLearner = new LaRank(m_config, *m_features.back(), *m_kernels.back());
}
	

void Tracker::Initialise(const cv::Mat& frame, FloatRect bb)
{
	m_bb = IntRect(bb);//将目标框坐标转为int型
    //该类主要实现了积分图计算
	ImageRep image(frame, m_needsIntegralImage, m_needsIntegralHist);  //后两个参数分别为true,false
	for (int i = 0; i < 1; ++i)
	{
		UpdateLearner(image);// 更新预测函数
	}
	m_initialised = true;
}

void Tracker::Track(const cv::Mat& frame)
{
	assert(m_initialised);
	
	ImageRep image(frame, m_needsIntegralImage, m_needsIntegralHist);   //获得当前帧的积分图
	
	vector<FloatRect> rects = Sampler::PixelSamples(m_bb, m_config.searchRadius);  //抽样
	
	vector<FloatRect> keptRects;
	keptRects.reserve(rects.size());
	for (int i = 0; i < (int)rects.size(); ++i)
	{
		if (!rects[i].IsInside(image.GetRect())) continue;
		keptRects.push_back(rects[i]);        //将超出图像范围的框舍弃,剩余的保留在keptRects中
	}
	
	MultiSample sample(image, keptRects);     //多样本类,主要包括样本框以及ImageRep image
	
	vector<double> scores;
	m_pLearner->Eval(sample, scores);     //scores里存放的是论文中公式(10)后半部分
	
	double bestScore = -DBL_MAX;
	int bestInd = -1;
	for (int i = 0; i < (int)keptRects.size(); ++i)
	{		
		if (scores[i] > bestScore)
		{
			bestScore = scores[i];
			bestInd = i;              //找到bestScore
		}
	}
	
	UpdateDebugImage(keptRects, m_bb, scores);//更新debug图像,用于显示
	
	if (bestInd != -1)
	{
		m_bb = keptRects[bestInd];
		UpdateLearner(image);
#if VERBOSE		
		cout << "track score: " << bestScore << endl;
#endif
	}
}

void Tracker::UpdateDebugImage(const vector<FloatRect>& samples, const FloatRect& centre, const vector<double>& scores)
{
	double mn = VectorXd::Map(&scores[0], scores.size()).minCoeff();   //Map:将现存的结构映射到Eigen的数据结构里,进行计算
	double mx = VectorXd::Map(&scores[0], scores.size()).maxCoeff();   //R.minCoeff()=min(R(:)), R.maxCoeff()=max(R(:));
	m_debugImage.setTo(0);     //置为全黑色
	for (int i = 0; i < (int)samples.size(); ++i)
	{
		int x = (int)(samples[i].XMin() - centre.XMin());
		int y = (int)(samples[i].YMin() - centre.YMin());
        m_debugImage.at<float>(m_config.searchRadius+y,m_config.searchRadius+x)=(float)((scores[i]-mn)/(mx-mn));//scores得分越大的框,会在m_debugImage上具有越大的值,即该点越亮(类似于置信图)
	}
}

void Tracker::Debug()
{
	imshow("tracker", m_debugImage);   //显示m_debugImage图像
	m_pLearner->Debug();
}

void Tracker::UpdateLearner(const ImageRep& image)     //更新预测函数
{
	// note these return the centre sample at index 0
	vector<FloatRect> rects = Sampler::RadialSamples(m_bb, 2*m_config.searchRadius, 5, 16);//5*16=80,加上一个原始rect,共包含81个rect
	//vector<FloatRect> rects = Sampler::PixelSamples(m_bb, 2*m_config.searchRadius, true);
	
	vector<FloatRect> keptRects;
	keptRects.push_back(rects[0]); // 原始目标框
	for (int i = 1; i < (int)rects.size(); ++i)
	{
		if (!rects[i].IsInside(image.GetRect())) continue;   //判断生成的样本框是否超出图像范围,超出的舍弃
		keptRects.push_back(rects[i]);
	}
		
#if VERBOSE		
	cout << keptRects.size() << " samples" << endl;
#endif
		
	MultiSample sample(image, keptRects);      //多样本类对象sample,包含ImageRep& image,以及保留下来样本框
	m_pLearner->Update(sample, 0);       //更新,在LaRank类下实现
}

LaRank.h

#ifndef LARANK_H
#define LARANK_H

#include "Rect.h"
#include "Sample.h"

#include <vector>
#include <Eigen/Core>

#include <opencv/cv.h>

class Config;
class Features;
class Kernel;

class LaRank   //文献《Solving multiclass support vector machine with LaRank》,该类实现了struck算法的主要步骤
{
public:
	LaRank(const Config& conf, const Features& features, const Kernel& kernel);  //初始化参数,特征值,核
	~LaRank();
	
	virtual void Eval(const MultiSample& x, std::vector<double>& results);
	virtual void Update(const MultiSample& x, int y);
	
	virtual void Debug();

private:

	struct SupportPattern
	{
		std::vector<Eigen::VectorXd> x;   //特征值
		std::vector<FloatRect> yv;        //变化关系
		std::vector<cv::Mat> images;      //图像片
		int y;                            //索引值
		int refCount;                    //统计sp的个数?
	};

	struct SupportVector
	{
		SupportPattern* x;
		int y;
		double b;                //beta
		double g;                 //gradient
		cv::Mat image;
	};
	
	const Config& m_config;
	const Features& m_features;
	const Kernel& m_kernel;
	
	std::vector<SupportPattern*> m_sps;
	std::vector<SupportVector*> m_svs;

	cv::Mat m_debugImage;
	
	double m_C;
	Eigen::MatrixXd m_K;

	inline double Loss(const FloatRect& y1, const FloatRect& y2) const         //损失函数
	{
		// overlap loss
		return 1.0-y1.Overlap(y2);
		// squared distance loss
		//double dx = y1.XMin()-y2.XMin();
		//double dy = y1.YMin()-y2.YMin();
		//return dx*dx+dy*dy;
	}
	
	double ComputeDual() const;

	void SMOStep(int ipos, int ineg);
	std::pair<int, double> MinGradient(int ind);
	void ProcessNew(int ind);
	void Reprocess();
	void ProcessOld();
	void Optimize();

	int AddSupportVector(SupportPattern* x, int y, double g);
	void RemoveSupportVector(int ind);
	void RemoveSupportVectors(int ind1, int ind2);
	void SwapSupportVectors(int ind1, int ind2);
	
	void BudgetMaintenance();
	void BudgetMaintenanceRemove();

	double Evaluate(const Eigen::VectorXd& x, const FloatRect& y) const;
	void UpdateDebugImage();
};

#endif

LaRank.cpp

#include "LaRank.h"

#include "Config.h"
#include "Features.h"
#include "Kernels.h"
#include "Sample.h"
#include "Rect.h"
#include "GraphUtils/GraphUtils.h"

#include <Eigen/Array>

#include <opencv/highgui.h>
static const int kTileSize = 30;
using namespace cv;

using namespace std;
using namespace Eigen;

static const int kMaxSVs = 2000; // TODO (only used when no budget)


LaRank::LaRank(const Config& conf, const Features& features, const Kernel& kernel) :
	m_config(conf),
	m_features(features),
	m_kernel(kernel),
	m_C(conf.svmC)
{
	int N = conf.svmBudgetSize > 0 ? conf.svmBudgetSize+2 : kMaxSVs;     //N=100+2,特征向量的个数不能超过这个阈值
	m_K = MatrixXd::Zero(N, N);            //m_K表示核矩阵,102*102
	m_debugImage = Mat(800, 600, CV_8UC3);
}

LaRank::~LaRank()
{
}

double LaRank::Evaluate(const Eigen::VectorXd& x, const FloatRect& y) const  //论文中公式10后半部分计算,即F
{
	double f = 0.0;
	for (int i = 0; i < (int)m_svs.size(); ++i)
	{
		const SupportVector& sv = *m_svs[i];
		f += sv.b*m_kernel.Eval(x, sv.x->x[sv.y]);       //beta*高斯核
	}
	return f;
}

void LaRank::Eval(const MultiSample& sample, std::vector<double>& results)
{
	const FloatRect& centre(sample.GetRects()[0]);       //原始目标框
	vector<VectorXd> fvs;
	const_cast<Features&>(m_features).Eval(sample, fvs);     //fvs存放haar特征值
	results.resize(fvs.size());
	for (int i = 0; i < (int)fvs.size(); ++i)
	{
		// express y in coord frame of centre sample
		FloatRect y(sample.GetRects()[i]);
		y.Translate(-centre.XMin(), -centre.YMin());     //将每个框的横纵坐标分别减去原始目标框的横纵坐标
		results[i] = Evaluate(fvs[i], y);         //计算每个框的F函数,结果保存在results中。
	}
}

void LaRank::Update(const MultiSample& sample, int y)
{
	// add new support pattern
	SupportPattern* sp = new SupportPattern;        //定义一个sp
	const vector<FloatRect>& rects = sample.GetRects();      //获得所有的样本框
	FloatRect centre = rects[y];                     //原始目标框
	for (int i = 0; i < (int)rects.size(); ++i)
	{
		// express r in coord frame of centre sample
		FloatRect r = rects[i];
		r.Translate(-centre.XMin(), -centre.YMin());   //这就表示帧间目标位置变化关系
		sp->yv.push_back(r);
		if (!m_config.quietMode && m_config.debugMode)
		{
			// store a thumbnail for each sample
			Mat im(kTileSize, kTileSize, CV_8UC1);
			IntRect rect = rects[i];
			cv::Rect roi(rect.XMin(), rect.YMin(), rect.Width(), rect.Height());  //感兴趣的区域是那些抽取的样本区域
			cv::resize(sample.GetImage().GetImage(0)(roi), im, im.size());       //0表示通道数,将感兴趣区域统一为30*30,并保存在sp里的images
			sp->images.push_back(im);
		}
	}
	// evaluate features for each sample
	sp->x.resize(rects.size());    //有多少个感兴趣的框,就有多少个特征值向量。
	const_cast<Features&>(m_features).Eval(sample, sp->x);    //将每个样本框计算得到的haar特征存入sp->x,这里关于haar特征的代码不再列出,我将代码提取出来单独写出一篇博客《http://blog.csdn.net/qianxin_dh/article/details/39268113》
	sp->y = y;
	sp->refCount = 0;
	m_sps.push_back(sp);   //存储sp

	ProcessNew((int)m_sps.size()-1);  //执行该步骤,添加支持向量,并对beta值进行调整
	BudgetMaintenance();       //保证支持向量没有超出限定阈值
	
	for (int i = 0; i < 10; ++i)
	{
		Reprocess();           //包括processold:增加新的sv;optimize:在现有的sv基础上调整beta值
		BudgetMaintenance();
	}
}

void LaRank::BudgetMaintenance()
{
	if (m_config.svmBudgetSize > 0)
	{
		while ((int)m_svs.size() > m_config.svmBudgetSize)
		{
			BudgetMaintenanceRemove();  //支持向量的个数超出阈值后,找到对于F函数影响最小的负sv,并移除。
		}
	}
}

void LaRank::Reprocess()
{
	ProcessOld();       //每个processold步骤伴随着10个optimize步骤。
	for (int i = 0; i < 10; ++i)
	{
		Optimize();
	}
}

double LaRank::ComputeDual() const
{
	double d = 0.0;
	for (int i = 0; i < (int)m_svs.size(); ++i)
	{
		const SupportVector* sv = m_svs[i];
		d -= sv->b*Loss(sv->x->yv[sv->y], sv->x->yv[sv->x->y]);
		for (int j = 0; j < (int)m_svs.size(); ++j)
		{
			d -= 0.5*sv->b*m_svs[j]->b*m_K(i,j);
		}
	}
	return d;
}

void LaRank::SMOStep(int ipos, int ineg)
{
	if (ipos == ineg) return;

	SupportVector* svp = m_svs[ipos];    //定义一个正支持向量
	SupportVector* svn = m_svs[ineg];    //定义一个负支持向量
	assert(svp->x == svn->x);
	SupportPattern* sp = svp->x;    //定义一个支持模式sp,将正支持向量的支持模式赋予sp

#if VERBOSE
	cout << "SMO: gpos:" << svp->g << " gneg:" << svn->g << endl;
#endif	
	if ((svp->g - svn->g) < 1e-5)
	{
#if VERBOSE
		cout << "SMO: skipping" << endl;
#endif		
	}
	else
	{   //论文中的Algorithm步骤
		double kii = m_K(ipos, ipos) + m_K(ineg, ineg) - 2*m_K(ipos, ineg);
		double lu = (svp->g-svn->g)/kii;
		// no need to clamp against 0 since we'd have skipped in that case
		double l = min(lu, m_C*(int)(svp->y == sp->y) - svp->b);

		svp->b += l;
		svn->b -= l;

		// update gradients
		for (int i = 0; i < (int)m_svs.size(); ++i)
		{
			SupportVector* svi = m_svs[i];
			svi->g -= l*(m_K(i, ipos) - m_K(i, ineg));
		}
#if VERBOSE
		cout << "SMO: " << ipos << "," << ineg << " -- " << svp->b << "," << svn->b << " (" << l << ")" << endl;
#endif		
	}
	
	// check if we should remove either sv now
	
	if (fabs(svp->b) < 1e-8)         //beta为0,该向量被移除
	{
		RemoveSupportVector(ipos);
		if (ineg == (int)m_svs.size())
		{
			// ineg and ipos will have been swapped during sv removal
			ineg = ipos;
		}
	}

	if (fabs(svn->b) < 1e-8)  //beta=0,该向量被移除
	{
		RemoveSupportVector(ineg);
	}
}

pair<int, double> LaRank::MinGradient(int ind)
{
	const SupportPattern* sp = m_sps[ind];
	pair<int, double> minGrad(-1, DBL_MAX);
	for (int i = 0; i < (int)sp->yv.size(); ++i)
	{
		double grad = -Loss(sp->yv[i], sp->yv[sp->y]) - Evaluate(sp->x[i], sp->yv[i]);//通过公式10找到最小梯度对应的样本框
		if (grad < minGrad.second)
		{
			minGrad.first = i;
			minGrad.second = grad;
		}
	}
	return minGrad;
}

void LaRank::ProcessNew(int ind)  //可以添加新的支持向量,增加的正负支持向量(sv)具有相同的支持模式(sp)
{
	// gradient is -f(x,y) since loss=0
	int ip = AddSupportVector(m_sps[ind], m_sps[ind]->y, -Evaluate(m_sps[ind]->x[m_sps[ind]->y],m_sps[ind]->yv[m_sps[ind]->y]));  //处理当前新样本,将上一帧目标位置作为正向量加入

	pair<int, double> minGrad = MinGradient(ind);  //int,double分别是具有最小梯度的样本框存放的位置,最小梯度的数值
	int in = AddSupportVector(m_sps[ind], minGrad.first, minGrad.second);    //将当前具有最小梯度的样本作为负向量加入

	SMOStep(ip, in);   //Algorithm 1,更新beta和gradient值
}

void LaRank::ProcessOld()  //可以添加新的支持向量
{
	if (m_sps.size() == 0) return;

	// choose pattern to process
	int ind = rand() % m_sps.size();   //随机选取sp

	// find existing sv with largest grad and nonzero beta
	int ip = -1;
	double maxGrad = -DBL_MAX;
	for (int i = 0; i < (int)m_svs.size(); ++i)
	{
		if (m_svs[i]->x != m_sps[ind]) continue;

		const SupportVector* svi = m_svs[i];
		if (svi->g > maxGrad && svi->b < m_C*(int)(svi->y == m_sps[ind]->y))   //找出符合该条件的,作为y+,后一个条件保证了y+是从现存的sv中找出,因此不会增加新的向量
		{
			ip = i;
			maxGrad = svi->g;
		}
	}
	assert(ip != -1);
	if (ip == -1) return;

	// find potentially new sv with smallest grad
	pair<int, double> minGrad = MinGradient(ind);
	int in = -1;
	for (int i = 0; i < (int)m_svs.size(); ++i)
	{
		if (m_svs[i]->x != m_sps[ind]) continue;              //找出满足该条件的,作为y-

		if (m_svs[i]->y == minGrad.first)
		{
			in = i;
			break;
		}
	}
	if (in == -1)
	{
		// add new sv
		in = AddSupportVector(m_sps[ind], minGrad.first, minGrad.second);  //将该样本作为负sv加入
	}

	SMOStep(ip, in);    //更新beta和gradient的值
}

void LaRank::Optimize()    //
{
	if (m_sps.size() == 0) return;
	
	// choose pattern to optimize
	int ind = rand() % m_sps.size();   //随机处理现存的sp

	int ip = -1;
	int in = -1;
	double maxGrad = -DBL_MAX;
	double minGrad = DBL_MAX;
	for (int i = 0; i < (int)m_svs.size(); ++i)
	{
		if (m_svs[i]->x != m_sps[ind]) continue;

		const SupportVector* svi = m_svs[i];
        if(svi->g>maxGrad&&svi->b<m_C*(int)(svi->y==m_sps->[y]))   //将满足该条件的作为y+
		{
			ip = i;
			maxGrad = svi->g;
		}
		if (svi->g < minGrad)                       //将满足该条件的作为y-
		{
			in = i;
			minGrad = svi->g;
		}
	}
	assert(ip != -1 && in != -1);
	if (ip == -1 || in == -1)
	{
		// this shouldn't happen
		cout << "!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!" << endl;
		return;
	}

	SMOStep(ip, in);         //更新beta和gradient
}

int LaRank::AddSupportVector(SupportPattern* x, int y, double g)
{
	SupportVector* sv = new SupportVector;
	sv->b = 0.0;        //beta初始化为0
	sv->x = x;
	sv->y = y;
	sv->g = g;

	int ind = (int)m_svs.size();
	m_svs.push_back(sv);
	x->refCount++;

#if VERBOSE
	cout << "Adding SV: " << ind << endl;
#endif

	// update kernel matrix
	for (int i = 0; i < ind; ++i)    //计算核矩阵
	{
		m_K(i,ind) = m_kernel.Eval(m_svs[i]->x->x[m_svs[i]->y], x->x[y]);
		m_K(ind,i) = m_K(i,ind);
	}
	m_K(ind,ind) = m_kernel.Eval(x->x[y]);

	return ind;
}

void LaRank::SwapSupportVectors(int ind1, int ind2)
{
	SupportVector* tmp = m_svs[ind1];
	m_svs[ind1] = m_svs[ind2];
	m_svs[ind2] = tmp;
	
	VectorXd row1 = m_K.row(ind1);
	m_K.row(ind1) = m_K.row(ind2);
	m_K.row(ind2) = row1;
	
	VectorXd col1 = m_K.col(ind1);
	m_K.col(ind1) = m_K.col(ind2);
	m_K.col(ind2) = col1;
}

void LaRank::RemoveSupportVector(int ind)
{
#if VERBOSE
	cout << "Removing SV: " << ind << endl;
#endif	

	m_svs[ind]->x->refCount--;
	if (m_svs[ind]->x->refCount == 0)
	{
		// also remove the support pattern
		for (int i = 0; i < (int)m_sps.size(); ++i)
		{
			if (m_sps[i] == m_svs[ind]->x)
			{
				delete m_sps[i];
				m_sps.erase(m_sps.begin()+i);
				break;
			}
		}
	}

	// make sure the support vector is at the back, this
	// lets us keep the kernel matrix cached and valid
	if (ind < (int)m_svs.size()-1)
	{
		SwapSupportVectors(ind, (int)m_svs.size()-1);
		ind = (int)m_svs.size()-1;
	}
	delete m_svs[ind];
	m_svs.pop_back();
}

void LaRank::BudgetMaintenanceRemove()
{
	// find negative sv with smallest effect on discriminant function if removed
	double minVal = DBL_MAX;
	int in = -1;
	int ip = -1;
	for (int i = 0; i < (int)m_svs.size(); ++i)
	{
		if (m_svs[i]->b < 0.0)           //找到负sv
		{
			// find corresponding positive sv
			int j = -1;
			for (int k = 0; k < (int)m_svs.size(); ++k)
			{
				if (m_svs[k]->b > 0.0 && m_svs[k]->x == m_svs[i]->x)   //找到同一支持模式下的正sv
				{
					j = k;
					break;
				}
			}
			double val = m_svs[i]->b*m_svs[i]->b*(m_K(i,i) + m_K(j,j) - 2.0*m_K(i,j));
			if (val < minVal)         //找到对F影响最小的sv
			{
				minVal = val;
				in = i;
				ip = j;
			}
		}
	}

	// adjust weight of positive sv to compensate for removal of negative
	m_svs[ip]->b += m_svs[in]->b;    //将负sv移除,其相应的beta值需补偿到正sv上。

	// remove negative sv
	RemoveSupportVector(in);
	if (ip == (int)m_svs.size())
	{
		// ip and in will have been swapped during support vector removal
		ip = in;
	}
	
	if (m_svs[ip]->b < 1e-8)     //beta值为0,移除该向量
	{
		// also remove positive sv
		RemoveSupportVector(ip);
	}

	// update gradients
	// TODO: this could be made cheaper by just adjusting incrementally rather than recomputing
	for (int i = 0; i < (int)m_svs.size(); ++i)
	{
		SupportVector& svi = *m_svs[i];
		svi.g = -Loss(svi.x->yv[svi.y],svi.x->yv[svi.x->y]) - Evaluate(svi.x->x[svi.y], svi.x->yv[svi.y]);
	}	
}

void LaRank::Debug()
{
	cout << m_sps.size() << "/" << m_svs.size() << " support patterns/vectors" << endl;
	UpdateDebugImage();
	imshow("learner", m_debugImage);
}

void LaRank::UpdateDebugImage()    //该函数主要用于样本显示,与算法关系不大,这里不做分析了
{
	m_debugImage.setTo(0);
	
	int n = (int)m_svs.size();
	
	if (n == 0) return;
	
	const int kCanvasSize = 600;
	int gridSize = (int)sqrtf((float)(n-1)) + 1;
	int tileSize = (int)((float)kCanvasSize/gridSize);
	
	if (tileSize < 5)
	{
		cout << "too many support vectors to display" << endl;
		return;
	}
	
	Mat temp(tileSize, tileSize, CV_8UC1);
	int x = 0;
	int y = 0;
	int ind = 0;
	float vals[kMaxSVs];
	memset(vals, 0, sizeof(float)*n);
	int drawOrder[kMaxSVs];
	
	for (int set = 0; set < 2; ++set)
	{
		for (int i = 0; i < n; ++i)
		{
			if (((set == 0) ? 1 : -1)*m_svs[i]->b < 0.0) continue;
			
			drawOrder[ind] = i;
			vals[ind] = (float)m_svs[i]->b;
			++ind;
			
			Mat I = m_debugImage(cv::Rect(x, y, tileSize, tileSize));
			resize(m_svs[i]->x->images[m_svs[i]->y], temp, temp.size());
			cvtColor(temp, I, CV_GRAY2RGB);
			double w = 1.0;
			rectangle(I, Point(0, 0), Point(tileSize-1, tileSize-1), (m_svs[i]->b > 0.0) ? CV_RGB(0, (uchar)(255*w), 0) : CV_RGB((uchar)(255*w), 0, 0), 3);
			x += tileSize;
			if ((x+tileSize) > kCanvasSize)
			{
				y += tileSize;
				x = 0;
			}
		}
	}
	
	const int kKernelPixelSize = 2;
	int kernelSize = kKernelPixelSize*n;
	
	double kmin = m_K.minCoeff();
	double kmax = m_K.maxCoeff();
	
	if (kernelSize < m_debugImage.cols && kernelSize < m_debugImage.rows)
	{
		Mat K = m_debugImage(cv::Rect(m_debugImage.cols-kernelSize, m_debugImage.rows-kernelSize, kernelSize, kernelSize));
		for (int i = 0; i < n; ++i)
		{
			for (int j = 0; j < n; ++j)
			{
				Mat Kij = K(cv::Rect(j*kKernelPixelSize, i*kKernelPixelSize, kKernelPixelSize, kKernelPixelSize));
				uchar v = (uchar)(255*(m_K(drawOrder[i], drawOrder[j])-kmin)/(kmax-kmin));
				Kij.setTo(Scalar(v, v, v));
			}
		}
	}
	else
	{
		kernelSize = 0;
	}
	
	
	Mat I = m_debugImage(cv::Rect(0, m_debugImage.rows - 200, m_debugImage.cols-kernelSize, 200));
	I.setTo(Scalar(255,255,255));
	IplImage II = I;
	setGraphColor(0);
	drawFloatGraph(vals, n, &II, 0.f, 0.f, I.cols, I.rows);
}




你可能感兴趣的:(源代码,图像处理,Struck)