KCF代码阅读笔记(matlab版)

1 主函数run_tracker.m

提取HOG特征用的函数和CPP文件作者已经给出并编译
什么也不输入时,默认为使用HOG特征和高斯核函数(还可以选择使用灰度特征和线性核)
要把base_path改成自己的benchamark文件夹所在路径(第43行),比如:
base_path = ‘F:\track\Benchmark\’;

主函数中给出的相关参数:
padding 目标搜索范围系数,决定了每帧进行检测的范围
Lambada 正则化参数λ
output_sigma_factor 空间带宽:与目标大小成比例
interp_factor 时间更新系数
sigma 高斯核函数的带宽/方差
poly_a/poly_b
cell_size 求HOG特征时cell的尺寸

%  High-Speed Tracking with Kernelized Correlation Filters
%
%  Joao F. Henriques, 2014
%  http://www.isr.uc.pt/~henriques/
%
%------------------------------------------------------------------------------------------------------------
%  内核化/双相关滤波器的主函数(KCF/DCF)。这个函数负责设置参数,加载视频信息和计算精度。对于实际的跟踪代码进行检查跟踪器函数。
%
%  Main interface for Kernelized/Dual Correlation Filters (KCF/DCF).  
%  This function takes care of setting up parameters, loading video
%  information and computing precisions. For the actual tracking code,
%  check out the TRACKER function. 
%  
%--------------------------------------------------------------------------------------------------------------
%  没有任何参数,会要求您选择视频,跟踪使用的高斯KCF上的HOG,并在交互中显示结果的人物。
%  按下“Esc”可以提前停止跟踪器。你可以浏览视频使用滚动条在底部。
%
%  RUN_TRACKER
%    Without any parameters, will ask you to choose a video, track using
%    the Gaussian KCF on HOG, and show the results in an interactive
%    figure. Press 'Esc' to stop the tracker early. You can navigate the
%    video using the scrollbar at the bottom.
%
%--------------------------------------------------------------------------------------------------------------
%  允许您选择一个视频的名称。all将运行所有视频,并显示平均统计数据。“选择”将交互式地选择一个。
%
%  RUN_TRACKER VIDEO
%    Allows you to select a VIDEO by its name. 'all' will run all videos
%    and show average statistics. 'choose' will select one interactively.
%
%---------------------------------------------------------------------------------------------------------------
%  选择一个内核。'高斯'/'多项式'运行KCF'线性'DCF%
%  RUN_TRACKER VIDEO KERNEL
%    Choose a KERNEL. 'gaussian'/'polynomial' to run KCF, 'linear' for DCF.
%
%---------------------------------------------------------------------------------------------------------------
%  选择一个特征类型,要么是“hog”,要么是“gray”(原始像素)%
%  RUN_TRACKER VIDEO KERNEL FEATURE
%    Choose a FEATURE type, either 'hog' or 'gray' (raw pixels).
%
%---------------------------------------------------------------------------------------------------------------
%  决定是否显示可滚动图形和精度图。
%
%  RUN_TRACKER(VIDEO, KERNEL, FEATURE, SHOW_VISUALIZATION, SHOW_PLOTS)
%    Decide whether to show the scrollable figure, and the precision plot.
%
%---------------------------------------------------------------------------------------------------------------
%  有用的组合:
%
%  Useful combinations:
%  >> run_tracker choose gaussian hog  %Kernelized Correlation Filter (KCF)
%  >> run_tracker choose linear hog    %Dual Correlation Filter (DCF)
%  >> run_tracker choose gaussian gray %Single-channel KCF (ECCV'12 paper)
%  >> run_tracker choose linear gray   %MOSSE filter (single channel)
%


function [precision, fps] = run_tracker(video, kernel_type, feature_type, show_visualization, show_plots)

	%path to the videos (you'll be able to choose one with the GUI).
	base_path = './data/Benchmark/';

	%default settings %默认设置
	if nargin < 1, video = 'choose'; end%如果输入变量的个数小于1个,那么给video赋默认值,只执行这一条语句
	if nargin < 2, kernel_type = 'gaussian'; end
	if nargin < 3, feature_type = 'hog'; end
	if nargin < 4, show_visualization = ~strcmp(video, 'all'); end
	if nargin < 5, show_plots = ~strcmp(video, 'all'); end

    %根据论文定义的参数,在这里我们基于已经选择的核函数与特征类型 写入参数
	%parameters according to the paper. at this point we can override
	%parameters based on the chosen kernel or feature type
	kernel.type = kernel_type;
	
	features.gray = false;
	features.hog = false;
	
	padding = 1.5;  %extra area surrounding the target 目标搜索范围系数,决定了每帧进行检测的范围 
	lambda = 1e-4;  %regularization 正则化参数λ 
	output_sigma_factor = 0.1;  %spatial bandwidth (proportional to target) 空间带宽:与目标大小成比例 
	
	switch feature_type
	case 'gray',
		interp_factor = 0.075;  %linear interpolation factor for adaptation 时间更新系数 

		kernel.sigma = 0.2;  %gaussian kernel bandwidth 高斯核函数的带宽/方差 
		
		kernel.poly_a = 1;  %polynomial kernel additive term 
		kernel.poly_b = 7;  %polynomial kernel exponent
	
		features.gray = true;
		cell_size = 1;HOG特征时cell的尺寸input

		
	case 'hog',
		interp_factor = 0.02;
		
		kernel.sigma = 0.5;
		
		kernel.poly_a = 1;
		kernel.poly_b = 9;
		
		features.hog = true;
		features.hog_orientations = 9;
		cell_size = 4;
		
	otherwise
		error('Unknown feature.')
	end


	assert(any(strcmp(kernel_type, {'linear', 'polynomial', 'gaussian'})), 'Unknown kernel.')
 	%在matlab中assert函数用来判断一个expression是否成立
    %strcmp是用于做字符串比较的函数


	switch video
	case 'choose',
		%ask the user for the video, then call self with that video name.
		video = choose_video(base_path);
		if ~isempty(video),
			[precision, fps] = run_tracker(video, kernel_type, ...
				feature_type, show_visualization, show_plots);
			
			if nargout == 0,  %don't output precision as an argument
				clear precision
			end
		end
		
		
	case 'all',
		%all videos, call self with each video name.
		
		%only keep valid directory names
		dirs = dir(base_path);
		videos = {dirs.name};
		videos(strcmp('.', videos) | strcmp('..', videos) | ...
			strcmp('anno', videos) | ~[dirs.isdir]) = [];
		
		%the 'Jogging' sequence has 2 targets, create one entry for each.
		%we could make this more general if multiple targets per video
		%becomes a common occurence.
		videos(strcmpi('Jogging', videos)) = [];
		videos(end+1:end+2) = {'Jogging.1', 'Jogging.2'};
		
		all_precisions = zeros(numel(videos),1);  %to compute averages
		all_fps = zeros(numel(videos),1);
		
		if ~exist('matlabpool', 'file'),
			%no parallel toolbox, use a simple 'for' to iterate
			for k = 1:numel(videos),
				[all_precisions(k), all_fps(k)] = run_tracker(videos{k}, ...
					kernel_type, feature_type, show_visualization, show_plots);
			end
		else
			%evaluate trackers for all videos in parallel
			if matlabpool('size') == 0,
				matlabpool open;
			end
			parfor k = 1:numel(videos),
				[all_precisions(k), all_fps(k)] = run_tracker(videos{k}, ...
					kernel_type, feature_type, show_visualization, show_plots);
			end
		end
		
		%compute average precision at 20px, and FPS
		mean_precision = mean(all_precisions);
		fps = mean(all_fps);
		fprintf('\nAverage precision (20px):% 1.3f, Average FPS:% 4.2f\n\n', mean_precision, fps)
		if nargout > 0,
			precision = mean_precision;
		end
		
		
	case 'benchmark',
		%running in benchmark mode - this is meant to interface easily
		%with the benchmark's code.
		
		%get information (image file names, initial position, etc) from
		%the benchmark's workspace variables
		seq = evalin('base', 'subS');
		target_sz = seq.init_rect(1,[4,3]);
		pos = seq.init_rect(1,[2,1]) + floor(target_sz/2);
		img_files = seq.s_frames;
		video_path = [];
		
		%call tracker function with all the relevant parameters
		positions = tracker(video_path, img_files, pos, target_sz, ...
			padding, kernel, lambda, output_sigma_factor, interp_factor, ...
			cell_size, features, false);
		
		%return results to benchmark, in a workspace variable
		rects = [positions(:,2) - target_sz(2)/2, positions(:,1) - target_sz(1)/2];
		rects(:,3) = target_sz(2);
		rects(:,4) = target_sz(1);
		res.type = 'rect';
		res.res = rects;
		assignin('base', 'res', res);
		
		
	otherwise
		%we were given the name of a single video to process.
	
		%get image file names, initial state, and ground truth for evaluation
		[img_files, pos, target_sz, ground_truth, video_path] = load_video_info(base_path, video);
		
		
		%call tracker function with all the relevant parameters
		[positions, time] = tracker(video_path, img_files, pos, target_sz, ...
			padding, kernel, lambda, output_sigma_factor, interp_factor, ...
			cell_size, features, show_visualization);
		
		
		%calculate and show precision plot, as well as frames-per-second
		precisions = precision_plot(positions, ground_truth, video, show_plots);
		fps = numel(img_files) / time;

		fprintf('%12s - Precision (20px):% 1.3f, FPS:% 4.2f\n', video, precisions(20), fps)

		if nargout > 0,
			%return precisions at a 20 pixels threshold
			precision = precisions(20);
		end

	end
end

2 选择序列choose_video.m

输入base_path即benchmark序列库的路径,输出你选择的序列的名称(如’boy’,’Car4’等)
利用listdlg函数生成了一个列表GUI,方便进行选择

function video_name = choose_video(base_path)
%CHOOSE_VIDEO
%   Allows the user to choose a video (sub-folder in the given path).
%
%   Joao F. Henriques, 2014
%   http://www.isr.uc.pt/~henriques/
%---------------------------------------------------------------------------------------------
%ispc用来判断当前电脑是不是Windows系统,是返回1,不是返回0

	%process path to make sure it's uniform
	if ispc(), base_path = strrep(base_path, '\', '/'); end 
	if base_path(end) ~= '/', base_path(end+1) = '/'; end
	
	%list all sub-folders  列出所有子文件夹
	contents = dir(base_path);  %dir('G:\Matlab')列出指定目录下所有子文件夹和文件
	names = {};
	for k = 1:numel(contents),%numel():返回数组或者向量中所含元素的总数。
		name = contents(k).name;
		if isdir([base_path name]) && ~any(strcmp(name, {'.', '..'})),%isdir用于判断输入是否表示一个文件夹
		%any函数作用:判断元素是否为非零元素any(v),如果v是非零元素返回true(1)否则返回flase(0)
			names{end+1} = name;  %#ok
		end
	end
	
	%no sub-folders found   没有找到子文件夹
	if isempty(names), video_name = []; return; end
	%isempty(names)????判断names是否为空,如果为空,结果为1,否则为0.
	%choice GUI
	choice = listdlg('ListString',names, 'Name','Choose video', 'SelectionMode','single');
	%'Name','Choose video',代表的是选择框的名字是Choose video
	%listdlg列表选择对话框
	if isempty(choice),  %user cancelled
		video_name = [];
	else
		video_name = names{choice};
	end
	
end

3 提取序列中目标的参数load_video_info.m

  • 输入base_path和通过choose_video选择得到的序列名video_name,得到所选序列的存储路径。
video_path = [base_path video '/'];
  • 并且打开benchmark中预留的groundtruth_rect.txt文件,将序列中目标的参数存储到ground_truth中。
filename = [video_path 'groundtruth_rect' suffix '.txt'];
f = fopen(filename);
  • ground_truth是一个矩阵,行数为序列的帧数。开始时列数为4,第三列和第四列为目标跟踪框的宽和高,第一列和第二列为目标跟踪框左上角的列坐标和行坐标(这样我猜大概是因为imcrop函数中行和列的位置是反过来的吧)。跟踪框左上角坐标再加上其二分之一的边长后由4列变为2列,表示目标的中心位置轨迹。
    将第一帧的参数作为初始化时的参数target_sz和pos(有4个序列作者应该是出于跟踪精度的需要修改了参数,暂且不表)。
target_sz = [ground_truth(1,4), ground_truth(1,3)];%目标大小(高,宽)
pos = [ground_truth(1,2), ground_truth(1,1)] + floor(target_sz/2);%中心点坐标
ground_truth = ground_truth(:,[2,1]) + ground_truth(:,[4,3]) / 2;%4列变为2
  • img_files按顺序存储了所有的序列图片的文件名,如’0001.jpg’(图片不必设置为0001.jpg,0002.jpg…0030.jpg这种格式,即使是从一个完整序列中截取的片段,也可以顺利读取,比如0031.jpg,0033.jpg…0080.jpg),长度为图片数。
    如果要加入自己采集的序列,最好将名称都改成“xxxx.jpg”的格式,并且要放在一个叫img的子文件夹下,比如这个路径:“F:\track\Benchmark\IRA\img\0001.jpg”。
video_path = [video_path 'img/'];%图片应放在video_path路径下的img子文件夹中
img_files = dir([video_path '*.jpg']);
  • 可以自己仿照benchmark里的groundtruth_rect.txt进行标注,
    比如“F:\track\Benchmark\IRA\groundtruth_rect.txt”。不标注的话就需要对代码做一些修改,使其可以顺利读入无标注的序列,并通过imcrop的方式初始化跟踪窗口。

详细代码如下:

function [img_files, pos, target_sz, ground_truth, video_path] = load_video_info(base_path, video)
%LOAD_VIDEO_INFO
%   Loads all the relevant information for the video in the given path:
%   the list of image files (cell array of strings), initial position
%   (1x2), target size (1x2), the ground truth information for precision
%   calculations (Nx2, for N frames), and the path where the images are
%   located. The ordering of coordinates and sizes is always [y, x].
%
%   Joao F. Henriques, 2014
%   http://www.isr.uc.pt/~henriques/


	%see if there's a suffix, specifying one of multiple targets, for
	%example the dot and number in 'Jogging.1' or 'Jogging.2'.
	if numel(video) >= 2 && video(end-1) == '.' && ~isnan(str2double(video(end))),
		suffix = video(end-1:end);  %remember the suffix
		video = video(1:end-2);  %remove it from the video name
	else
		suffix = '';
	end

	%full path to the video's files
	if base_path(end) ~= '/' && base_path(end) ~= '\',
		base_path(end+1) = '/';
	end
	video_path = [base_path video '/'];

	%try to load ground truth from text file (Benchmark's format)
	filename = [video_path 'groundtruth_rect' suffix '.txt'];
	f = fopen(filename);
	assert(f ~= -1, ['No initial position or ground truth to load ("' filename '").'])
	
	%the format is [x, y, width, height]
	try
		ground_truth = textscan(f, '%f,%f,%f,%f', 'ReturnOnError',false);  
	catch  %#ok, try different format (no commas)
		frewind(f);
		ground_truth = textscan(f, '%f %f %f %f');  
	end
	ground_truth = cat(2, ground_truth{:});
	fclose(f);
	
	%set initial position and size
	target_sz = [ground_truth(1,4), ground_truth(1,3)];
	pos = [ground_truth(1,2), ground_truth(1,1)] + floor(target_sz/2);
	
	if size(ground_truth,1) == 1,
		%we have ground truth for the first frame only (initial position)
		ground_truth = [];
	else
		%store positions instead of boxes
		ground_truth = ground_truth(:,[2,1]) + ground_truth(:,[4,3]) / 2;
	end
	
	
	%from now on, work in the subfolder where all the images are
	video_path = [video_path 'img/'];
	
	%for these sequences, we must limit ourselves to a range of frames.
	%for all others, we just load all png/jpg files in the folder.
	frames = {'David', 300, 770;
			  'Football1', 1, 74;
			  'Freeman3', 1, 460;
			  'Freeman4', 1, 283};
	
	idx = find(strcmpi(video, frames(:,1)));
	
	if isempty(idx),
		%general case, just list all images
		img_files = dir([video_path '*.png']);
		if isempty(img_files),
			img_files = dir([video_path '*.jpg']);
			assert(~isempty(img_files), 'No image files to load.')
		end
		img_files = sort({img_files.name});
	else
		%list specified frames. try png first, then jpg.
		if exist(sprintf('%s%04i.png', video_path, frames{idx,2}), 'file'),
			img_files = num2str((frames{idx,2} : frames{idx,3})', '%04i.png');
			
		elseif exist(sprintf('%s%04i.jpg', video_path, frames{idx,2}), 'file'),
			img_files = num2str((frames{idx,2} : frames{idx,3})', '%04i.jpg');
			
		else
			error('No image files to load.')
		end
		
		img_files = cellstr(img_files);
	end
	
end


你可能感兴趣的:(目标检测与跟踪,KCF,matlab)