简介
结合梯度特征HOG及颜色特征的实时跟踪算法,速度可以达到80FPS。
摘要
近些年来,基于相关滤波跟踪器的算法得到了很大的改进,已经能够达到很好的跟踪效果,14年的KCF,15年的SRDCF、HCF等算法,都已经达到了很高的精度,像HCF已经在OTB-50上达到了0.89的效果,但是美中不足的就是使用了深度学习特征之后速度相对来说比较慢一点,不能达到实时的要求。算法作者经过研究发现,以前算法的模型的学习还是很依赖于跟踪目标的空间信息,对变形的目标跟踪效果不是很好,但是使用颜色特征对目标进行学习能够很好的处理跟踪目标发生形变和运动模糊的问题,但是当光照条件变化的时候,颜色特征就表现的相对无力,这个时候使用HOG特征能够对光照变化的跟踪目标进行很好的跟踪。所以,作者就想到,使用一个融合互补因子在同一个回归框架当中能够很好的对这些因素进行很好的处理同时能够达到比较快的速度,并且在跟踪效果上面也比大部分现有的跟踪算法效果要好。
实现
这里感谢贡献C++代码的大神 xuduo35
源码地址:https://github.com/xuduo35/STAPLE
//main.cpp
#include "staple_tracker.hpp"
#include
#include
#include
#include
#include
#include
#include
#include
cv::Rect_ getAxisAlignedBB(std::vector polygon);
std::vector> getgroundtruth(std::string txt_file);
int main(int argc, char * argv[])
{
std::string sequence = "/sequence";
if (argc >= 2) {
sequence = std::string("/vot2015/") + argv[1];
}
std::string video_base_path = "..";
std::string pattern_jpg = video_base_path + sequence + "/*.jpg";
std::string txt_base_path = video_base_path + sequence + "/groundtruth.txt";
std::vector image_files;
cv::glob(pattern_jpg, image_files);
if (image_files.size() == 0)
return -1;
std::vector> groundtruth_rect;
groundtruth_rect = getgroundtruth(txt_base_path);
//for (size_t i = 0; i < groundtruth_rect.size(); ++i)
// std::cout << i+1 << '\t' < location = groundtruth_rect[0];
cv::Mat image;
std::vector> result_rects;
int64 tic, toc;
double time = 0;
bool show_visualization = true;
for (unsigned int frame = 0; frame < image_files.size(); ++frame) {
image = cv::imread(image_files[frame]);
tic = cv::getTickCount();
if (frame == 0){
staple.tracker_staple_initialize(image, location);
staple.tracker_staple_train(image, true);
} else {
location = staple.tracker_staple_update(image);
staple.tracker_staple_train(image, false);
}
toc = cv::getTickCount() - tic;
time += toc;
result_rects.push_back(location);
if (show_visualization) {
cv::putText(image, std::to_string(frame + 1), cv::Point(20, 40), 6, 1,
cv::Scalar(0, 255, 255), 2);
cv::rectangle(image, groundtruth_rect[frame], cv::Scalar(0, 255, 0), 2);
cv::rectangle(image, location, cv::Scalar(0, 128, 255), 2);
cv::imshow("STAPLE", image);
char key = cv::waitKey(10);
if (key == 27 || key == 'q' || key == 'Q')
break;
}
}
time = time / double(cv::getTickFrequency());
double fps = double(result_rects.size()) / time;
std::cout << "fps:" << fps << std::endl;
cv::destroyAllWindows();
return 0;
}
cv::Rect_ getAxisAlignedBB(std::vector polygon)
{
double cx = double(polygon[0].x + polygon[1].x + polygon[2].x + polygon[3].x) / 4.;
double cy = double(polygon[0].y + polygon[1].y + polygon[2].y + polygon[3].y) / 4.;
double x1 = std::min(std::min(std::min(polygon[0].x, polygon[1].x), polygon[2].x), polygon[3].x);
double x2 = std::max(std::max(std::max(polygon[0].x, polygon[1].x), polygon[2].x), polygon[3].x);
double y1 = std::min(std::min(std::min(polygon[0].y, polygon[1].y), polygon[2].y), polygon[3].y);
double y2 = std::max(std::max(std::max(polygon[0].y, polygon[1].y), polygon[2].y), polygon[3].y);
double A1 = norm(polygon[1] - polygon[2])*norm(polygon[2] - polygon[3]);
double A2 = (x2 - x1) * (y2 - y1);
double s = sqrt(A1 / A2);
double w = s * (x2 - x1) + 1;
double h = s * (y2 - y1) + 1;
cv::Rect_ rect(cx-1-w/2.0, cy-1-h/2.0, w, h);
return rect;
}
std::vector> getgroundtruth(std::string txt_file)
{
std::vector> rects;
std::ifstream gt;
gt.open(txt_file.c_str());
if (!gt.is_open())
std::cout << "Ground truth file " << txt_file
<< " can not be read" << std::endl;
std::string line;
float x1, y1, x2, y2, x3, y3, x4, y4;
while (getline(gt, line)) {
std::replace(line.begin(), line.end(), ',', ' ');
std::stringstream ss;
ss.str(line);
ss >> x1 >> y1 >> x2 >> y2 >> x3 >> y3 >> x4 >> y4;
std::vectorpolygon;
polygon.push_back(cv::Point2f(x1, y1));
polygon.push_back(cv::Point2f(x2, y2));
polygon.push_back(cv::Point2f(x3, y3));
polygon.push_back(cv::Point2f(x4, y4));
rects.push_back(getAxisAlignedBB(polygon)); //0-index
}
gt.close();
return rects;
}
//staple_tracker.hpp
#ifndef STAPLE_TRACKER_HPP
#define STAPLE_TRACKER_HPP
#include
#include
#include
#include
#include
#include
#include
#include
#include
///
/// \brief The staple_cfg struct
///
struct staple_cfg
{
bool grayscale_sequence = false; // suppose that sequence is colour
int hog_cell_size = 4;
int fixed_area = 150*150; // standard area to which we resize the target
int n_bins = 2*2*2*2*2; // number of bins for the color histograms (bg and fg models)
double learning_rate_pwp = 0.04; // bg and fg color models learning rate
const char * feature_type = "fhog"; // "fhog", ""gray""
double inner_padding = 0.2; // defines inner area used to sample colors from the foreground
double output_sigma_factor = 1/16.0; // standard deviation for the desired translation filter output
double lambda = 1e-3; // egularization weight
double learning_rate_cf = 0.01; // HOG model learning rate
double merge_factor = 0.3; // fixed interpolation factor - how to linearly combine the two responses
const char * merge_method = "const_factor";
bool den_per_channel = false;
// scale related
bool scale_adaptation = true;
int hog_scale_cell_size = 4; // Default DSST=4
double learning_rate_scale = 0.025;
double scale_sigma_factor = 1/4.0;
int num_scales = 33;
double scale_model_factor = 1.0;
double scale_step = 1.02;
double scale_model_max_area = 32*16;
// debugging stuff
int visualization = 0; // show output bbox on frame
int visualization_dbg = 0; // show also per-pixel scores, desired response and filter output
cv::Point_ init_pos;
cv::Size target_sz;
};
///
/// \brief The STAPLE_TRACKER class
///
class STAPLE_TRACKER
{
public:
STAPLE_TRACKER()
{
cfg = default_parameters_staple(cfg);
frameno = 0;
}
~STAPLE_TRACKER(){}
void mexResize(const cv::Mat &im, cv::Mat &output, cv::Size newsz, const char *method);
void tracker_staple_train(const cv::Mat &im, bool first);
void tracker_staple_initialize(const cv::Mat &im, cv::Rect_ region);
cv::Rect tracker_staple_update(const cv::Mat &im);
protected:
staple_cfg default_parameters_staple(staple_cfg cfg);
void initializeAllAreas(const cv::Mat &im);
void getSubwindow(const cv::Mat &im, cv::Point_ centerCoor, cv::Size model_sz, cv::Size scaled_sz, cv::Mat &output);
void getSubwindowFloor(const cv::Mat &im, cv::Point_ centerCoor, cv::Size model_sz, cv::Size scaled_sz, cv::Mat &output);
void updateHistModel(bool new_model, cv::Mat &patch, double learning_rate_pwp=0.0);
void CalculateHann(cv::Size sz, cv::Mat &output);
void gaussianResponse(cv::Size rect_size, double sigma, cv::Mat &output);
void getFeatureMap(cv::Mat &im_patch, const char *feature_type, cv::MatND &output);
void cropFilterResponse(const cv::Mat &response_cf, cv::Size response_size, cv::Mat& output);
void getColourMap(const cv::Mat &patch, cv::Mat& output);
void getCenterLikelihood(const cv::Mat &object_likelihood, cv::Size m, cv::Mat& center_likelihood);
void mergeResponses(const cv::Mat &response_cf, const cv::Mat &response_pwp, cv::Mat &response);
void getScaleSubwindow(const cv::Mat &im, cv::Point_ centerCoor, cv::Mat &output);
private:
staple_cfg cfg;
cv::Point_ pos;
cv::Size target_sz;
cv::Size bg_area;
cv::Size fg_area;
double area_resize_factor;
cv::Size cf_response_size;
cv::Size norm_bg_area;
cv::Size norm_target_sz;
cv::Size norm_delta_area;
cv::Size norm_pwp_search_area;
cv::Mat im_patch_pwp;
cv::MatND bg_hist;
cv::MatND fg_hist;
cv::Mat hann_window;
cv::Mat yf;
std::vector hf_den;
std::vector hf_num;
cv::Rect rect_position;
float scale_factor;
cv::Mat scale_window;
cv::Mat scale_factors;
cv::Size scale_model_sz;
float min_scale_factor;
float max_scale_factor;
cv::Size base_target_sz;
cv::Mat ysf;
cv::Mat sf_den;
cv::Mat sf_num;
int frameno = 0;
};
#endif
代码运行结果:
实测该代码可用,效果还不错。文章只粘贴了部分代码,有需要的可以去Github上下载;或者在我的博客资源里下载