from __future__ import absolute_import
import os
import glob
import numpy as np
from siamfc import TrackerSiamFC
if __name__ == '__main__':
seq_dir = os.path.expanduser('D:\\OTB\\Crossing\\')
img_files = sorted(glob.glob(seq_dir + 'img/*.jpg'))
anno = np.loadtxt(seq_dir + 'groundtruth_rect.txt', delimiter=',')
net_path = 'pretrained/siamfc_alexnet_e50.pth'
tracker = TrackerSiamFC(net_path=net_path)
tracker.track(img_files, anno[0], visualize=True)
我多加了一点代码:, delimiter=','
,不加这个会报这样的错:ValueError: could not convert string to float
现在就来看一下类TrackerSiamFC下的track方法。这个函数的作用就是传入video sequence和first frame中的ground truth bbox,然后通过模型,得到后续帧的目标位置,可以看到主要有两个函数实现:init和update,这也是继承Tracker需要重写的两个方法:
:就是传入后续帧,然后根据SiamFC网络返回目标的box坐标,之后就是根据这些坐标来show,起到一个demo的效果。def track(self, img_files, box, visualize=False):
frame_num = len(img_files)
boxes = np.zeros((frame_num, 4))
boxes[0] = box
times = np.zeros(frame_num)
for f, img_file in enumerate(img_files):
img = ops.read_image(img_file)
begin = time.time()
if f == 0:
self.init(img, box)
boxes[f, :] = self.update(img)
times[f] = time.time() - begin
if visualize:
ops.show_image(img, boxes[f, :])
return boxes, times
def init(self, img, box):
# set to evaluation mode
# convert box to 0-indexed and center based [y, x, h, w]
box = np.array([
box[1] - 1 + (box[3] - 1) / 2,
box[0] - 1 + (box[2] - 1) / 2,
box[3], box[2]], dtype=np.float32)
self.center, self.target_sz = box[:2], box[2:]
# create hanning window
self.upscale_sz = self.cfg.response_up * self.cfg.response_sz # 272
self.hann_window = np.outer(
self.hann_window /= self.hann_window.sum()
# search scale factors
self.scale_factors = self.cfg.scale_step ** np.linspace(
-(self.cfg.scale_num // 2),
self.cfg.scale_num // 2, self.cfg.scale_num) # 1.0375**(-2,-0.5,1)
# exemplar and search sizes
context = self.cfg.context * np.sum(self.target_sz)
self.z_sz = np.sqrt(np.prod(self.target_sz + context))
self.x_sz = self.z_sz * \
self.cfg.instance_sz / self.cfg.exemplar_sz
# exemplar image
self.avg_color = np.mean(img, axis=(0, 1))
z = ops.crop_and_resize(
img, self.center, self.z_sz,
# print(z.shape) # [127,127,3]
# exemplar features [H,W,C]->[C,H,W]
z = torch.from_numpy(z).to(
self.device).permute(2, 0, 1).unsqueeze(0).float()
self.kernel = self.net.backbone(z) # torch.Size([1, 256, 6, 6])
We found that upsampling the score map using bicubic interpolation, from 17 × 17 to 272 × 272, results in more accurate localization since the original map is relatively coarse.
也就是17×16=272Online, ... and a cosine window is added to the score map to penalize large displacements
We found that updating (the feature representation of) the exemplar online through simple strategies, such as linear interpolation, does not gain much performance and thus we keep it fixed
def update(self, img):
# set to evaluation mode
# search images
x = [ops.crop_and_resize(
img, self.center, self.x_sz * f,
border_value=self.avg_color) for f in self.scale_factors]
x = np.stack(x, axis=0) # [3, 255, 255, 3]
x = torch.from_numpy(x).to(
self.device).permute(0, 3, 1, 2).float()
# responses
x = self.net.backbone(x) # [3, 256, 22, 22]
responses = self.net.head(self.kernel, x) # [3, 1, 17, 17]
responses = responses.squeeze(1).cpu().numpy() # [3, 17, 17]
# upsample responses and penalize scale changes
responses = np.stack([cv2.resize(
u, (self.upscale_sz, self.upscale_sz),
for u in responses]) # [3, 272, 272]
responses[:self.cfg.scale_num // 2] *= self.cfg.scale_penalty
responses[self.cfg.scale_num // 2 + 1:] *= self.cfg.scale_penalty
# peak scale
scale_id = np.argmax(np.amax(responses, axis=(1, 2))) # which channel is max
# peak location
response = responses[scale_id]
response -= response.min()
response /= response.sum() + 1e-16
response = (1 - self.cfg.window_influence) * response + \
self.cfg.window_influence * self.hann_window
loc = np.unravel_index(response.argmax(), response.shape)
# locate target center: disp stand for displacement
disp_in_response = np.array(loc) - (self.upscale_sz - 1) / 2
disp_in_instance = disp_in_response * \
self.cfg.total_stride / self.cfg.response_up
disp_in_image = disp_in_instance * self.x_sz * \
self.scale_factors[scale_id] / self.cfg.instance_sz
self.center += disp_in_image
# update target size
scale = (1 - self.cfg.scale_lr) * 1.0 + \
self.cfg.scale_lr * self.scale_factors[scale_id]
self.target_sz *= scale
self.z_sz *= scale
self.x_sz *= scale
# return 1-indexed and left-top based bounding box
box = np.array([
self.center[1] + 1 - (self.target_sz[1] - 1) / 2,
self.center[0] + 1 - (self.target_sz[0] - 1) / 2,
self.target_sz[1], self.target_sz[0]])
return box
we only search for the object within a region of approximately four times its previous size
就是对尺度进行惩罚,我是这样理解的,因为中间的尺度肯定是接近于1,其他两边的尺度不是缩一点就是放大一点,所以给以惩罚,如论文中说:Any change in scale is penalized
找到一张response上峰值点(peak location)【关于这个函数可以看这里】39行
修正center,然后update target size,因为论文有一句:update the scale by linear interpolation with a factor of 0.35 to provide damping
success OPE | |
我的 | 0.466/0.520 |
代码提供者的 | 0.589 |
siamfc论文中的 | 0.612 |
success OPE | |
我的 | 0.469/0.529 |
代码提供者的 | 0.578 |
siamfc论文中的 | 0.582 |
注意:siamfc论文中的没有OTB2015的success OPE,我摘抄自SiamRPN论文,不过可以去官方地址有matlab结果文件,有机会用official toolkit评估一下,再来放个结果
上面的结果更新了一下,原因之前的OTB数据集没整理好,导致实际评估的序列数少了。我看过OTB benchmark官方评测代码python版本,里面评测和画图的方法和GOT-10k里面的ExperimentOTB是一样的,可以放心使用。
2020/05/20 情人节更新一下结果,这个是SiamFC官方project的结果:我使用的是
results_SiamFC-3s_OTB-100.zip,然后用OTB official MATLAB toolkit的代码tracker_benchmark_v1.0.zip
trackers = trackersSiamfc;
(这个结果文件是TRE的,但是TRE的第一次就是OPE)import os
tracker_name = 'siamfc3s'
location = len(tracker_name) + 1
# replace to your own path
anno_seqs_path = 'D:\\tracker_benchmark_v1.0\\anno'
anno_seqs = os.listdir(anno_seqs_path)
assert len(anno_seqs) == 100, 'otb must have 100 seqs!'
anno_seqs = [anno_seq[:-4] for anno_seq in anno_seqs]
lower_anno_seqs = [i.lower() for i in anno_seqs]
# print(anno_seqs)
# replace to your own path
res_seqs_path = 'D:\\tracker_benchmark_v1.0\\results\\results_TRE_OTB100'
res_seqs = os.listdir(res_seqs_path)
assert len(res_seqs) == len(anno_seqs), \
'otb result must have equal length with anno'
# remove .mat
res_seqs = [res_seq[:-4] for res_seq in res_seqs]
lower_res_no_tracker_name = [seq[:-location].lower() for seq in res_seqs]
# print(lower_res_no_tracker_name)
# different naming methods
diff = []
for res_seq in lower_res_no_tracker_name:
if res_seq not in lower_anno_seqs:
print('different naming methods:', diff)
assert not diff, 'before rename, should rename diff name seqs!'
# rename res file name
for res_seq in lower_res_no_tracker_name:
anno_idx = lower_anno_seqs.index(res_seq)
res_idx = lower_res_no_tracker_name.index(res_seq)
old_name = os.path.join(res_seqs_path, res_seqs[res_idx]+'.mat')
new_name = os.path.join(res_seqs_path, anno_seqs[anno_idx]+'_'+tracker_name+'.mat')
os.rename(old_name, new_name)