ByteTrack是一个通用的基于检测的多目标跟踪的方法,能够适用于各种框架,本文将会对算法原理、代码进一步的学习。
与deepsort原理类似,但是目标跟踪时,仅仅使用了卡尔曼滤波来预测目标框,然后利用匈牙利算法来匹配检测框与轨迹。ByteTrack中用多次匹配的方法,首先将得分较高的目标框与历史轨迹相匹配,然后将得分较低的目标框与与第一次没有匹配上的轨迹匹配,用于检测目标遮挡的情形。相对于deepsort,直接减少了ReID模型,更加方便移动端的部署。
由于代码里面存在多种轨迹,因此,首先需要对轨迹进行分类,避免在代码阅读时出现混淆的情形。
轨迹的状态可分为4种:
def imageflow_demo(predictor:Predictor, vis_folder, current_time, args):
...
while True:
if ret_val:
# 目标检测部分
# outputs:[xxx, 7]
outputs, img_info = predictor.inference(frame, timer)
# 目标跟踪部分
if outputs[0] is not None:
online_targets = tracker.update(outputs[0], [img_info['height'], img_info['width']], exp.test_size) # -> yolox/tracker/byte_tracker.py
class STrack(BaseTrack):
shared_kalman = KalmanFilter()
def __init__(self, tlwh, score):
# 新建轨迹
# 轨迹属性
self._tlwh = np.asarray(tlwh, dtype=np.float)
self.kalman_filter = None
# 均值方差
self.mean, self.covariance = None, None
# 不活跃的轨迹
self.is_activated = False
self.score = score
# 被跟踪的次数
self.tracklet_len = 0
def predict(self):
mean_state = self.mean.copy()
if self.state != TrackState.Tracked:
mean_state[7] = 0
更新新的均值与方差
self.mean, self.covariance = self.kalman_filter.predict(mean_state, self.covariance)
@staticmethod
def multi_predict(stracks):
# 预测多个轨迹
if len(stracks) > 0:
multi_mean = np.asarray([st.mean.copy() for st in stracks]) # 均值
multi_covariance = np.asarray([st.covariance for st in stracks]) # 方差
for i, st in enumerate(stracks):
if st.state != TrackState.Tracked: # 未被跟踪
multi_mean[i][7] = 0
multi_mean, multi_covariance = STrack.shared_kalman.multi_predict(multi_mean, multi_covariance)
for i, (mean, cov) in enumerate(zip(multi_mean, multi_covariance)):
stracks[i].mean = mean
stracks[i].covariance = cov
def activate(self, kalman_filter, frame_id):
"""Start a new tracklet"""
# 开始一个新的轨迹
# 初始化一个卡尔曼滤波器
self.kalman_filter = kalman_filter
# 跟踪ID
self.track_id = self.next_id()
# 初始化卡尔曼滤波参数
self.mean, self.covariance = self.kalman_filter.initiate(self.tlwh_to_xyah(self._tlwh))
# 跟踪次数设为0
self.tracklet_len = 0
self.state = TrackState.Tracked # 状态设置为 "已经被跟踪"
def re_activate(self, new_track, frame_id, new_id=False):
# 将一个旧的轨迹的状态修改为 "活跃"
self.mean, self.covariance = self.kalman_filter.update(
self.mean, self.covariance, self.tlwh_to_xyah(new_track.tlwh)
)
self.tracklet_len = 0 # 跟踪的次数设置为 0
self.state = TrackState.Tracked # 跟踪状态设为 "被跟踪"
self.is_activated = True # 设置为活跃轨迹
def update(self, new_track, frame_id):
# 更新以跟踪的轨迹的信息
self.tracklet_len += 1 # 跟踪的次数+1
new_tlwh = new_track.tlwh # 新的目标框
# 根据当前的位置预测新的 self.mean, self.covariance
self.mean, self.covariance = self.kalman_filter.update(
self.mean, self.covariance, self.tlwh_to_xyah(new_tlwh))
self.state = TrackState.Tracked # 状态设置为 "已跟踪"
self.is_activated = True # 活跃轨迹
class BYTETracker(object):
def __init__(self, args, frame_rate=30):
...
def update(self, output_results, img_info, img_size):
"""
output_results:目标检测结果
"""
activated_starcks = [] # 保存当前帧中的活跃轨迹(活跃轨迹是已跟踪的轨迹)
refind_stracks = [] # 保存当前帧匹配到之前目标丢失的轨迹(不活跃的轨迹)
lost_stracks = [] # 保存当前帧没有匹配到目标的轨迹
removed_stracks = [] # 保存当前帧需要删除的轨迹
if output_results.shape[1] == 5:
...
else:
output_results = output_results.cpu().numpy()
# x1,y1,x2,y2,objectness,label_score,label
scores = output_results[:, 4] * output_results[:, 5] # 前景概率 * 类别概率
bboxes = output_results[:, :4] # x1,y1,x2,y2
remain_inds = scores > self.args.track_thresh # 提取当前值目标框中得分大于跟踪阈值的框
inds_low = scores > 0.1 # 提取当前值目标框中得分大于0.1的框
inds_high = scores < self.args.track_thresh # 提取当前帧目标框中得分小于跟踪阈值的框
inds_second = np.logical_and(inds_low, inds_high) # 提取目标框中得分小于跟踪阈值的框分数处于0.1<分数<跟踪阈值,用于匹配 已跟踪但不活跃的轨迹(目标遮挡等。。。)
dets_second = bboxes[inds_second] # 提取分得分处于 0.1<分数<跟踪阈值的目标框
dets = bboxes[remain_inds] # 提取分得分处于 大于跟踪阈值的目标框
scores_keep = scores[remain_inds] # 提取得分大于跟踪阈值的目标框的得分
scores_second = scores[inds_second] # 提取分得分处于 0.1<分数<跟踪阈值 目标框的得分
if len(dets) > 0:
# 为当前帧每个大于跟踪阈值的目标框初始化一个 轨迹STrack
detections = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for
(tlbr, s) in zip(dets, scores_keep)]
else:
...
unconfirmed = [] # 储存未确认的框
tracked_stracks = [] # 历史帧已经跟踪上的轨迹
# 遍历已跟踪的轨迹(包含 活跃和不活跃两种)
for track in self.tracked_stracks:
if not track.is_activated:
# 不活跃轨迹
unconfirmed.append(track)
else:
# 活跃轨迹
tracked_stracks.append(track)
#---- 第一次匹配 ----
# 将活跃轨迹与丢失轨迹合并
strack_pool = joint_stracks(tracked_stracks, self.lost_stracks)
# 预测strack_pool中 每个轨迹的在当前帧的 mean和convariance
STrack.multi_predict(strack_pool)
# 将当前帧中,得分高于跟踪阈值的轨迹与strack_pool中的所有轨迹 进行IOU计算
dists = matching.iou_distance(strack_pool, detections) #dists为二维矩阵 x轴:strack_pool , y轴:当前检测结果中阈值大的轨迹
# 使用匈牙利匹配算法
# matches为已经已跟踪的轨迹 匹配上的 当前帧 检测出来的轨迹
# u_track 为 已经跟踪的轨迹 未匹配上 当前帧 检测出来的轨迹
# u_detection 为 当前帧 检测出来的轨迹 为匹配上 已经跟踪的轨迹
matches, u_track , u_detection = matching.linear_assignment(dists, thresh=self.args.match_thresh)
# 遍历匹配上的轨迹
for itracked, idet in matches:
track = strack_pool[itracked] # stack_pool 中的第几个轨迹
det = detections[idet] # 当前帧检测出来的第几个轨迹
if track.state == TrackState.Tracked:
# 当前轨迹的状态为已被跟踪
# 更新当前track的mean, covariance ,并将self.is_activated 设置为 True,跟踪长度+1
track.update(detections[idet], self.frame_id)
else:
# 更新当前track的mean, covariance ,并将self.is_activated 设置为 True,跟踪长度初始化为0
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track) # 重新找回的轨迹
# 第二次匹配:和低分的矩阵进行匹配
if len(dets_second) > 0:
# 为每个低分目标框 初始化一个轨迹
detections_second = [STrack(STrack.tlbr_to_tlwh(tlbr), s) for
(tlbr, s) in zip(dets_second, scores_second)]
else:
...
# 找到第一次没有匹配上的轨迹 ,但是状态为已跟踪的轨迹(由于运动、遮挡,导致轨迹匹配度较小)
r_tracked_stracks = [strack_pool[i] for i in u_track if strack_pool[i].state == TrackState.Tracked]
# 计算 r_tracked_stracks 与 detections_second (低分轨迹)之间的IOU
dists = matching.iou_distance(r_tracked_stracks, detections_second)
# 匈牙利匹配
matches, u_track, u_detection_second = matching.linear_assignment(dists, thresh=0.5)
for itracked, idet in matches:
track = r_tracked_stracks[itracked]
det = detections_second[idet]
if track.state == TrackState.Tracked:
# 第一次匹配中未匹配到的轨迹 与 低分轨迹匹配上
track.update(det, self.frame_id)
activated_starcks.append(track)
else:
track.re_activate(det, self.frame_id, new_id=False)
refind_stracks.append(track)
# 遍历第二次也没匹配上的轨迹,调用mark_losk方法,并加入lost_stracks,等待下一帧匹配
for it in u_track:
# 如果状态不为 Lost
if not track.state == TrackState.Lost:
track.mark_lost() # 将状态标记为 Lost 在下一帧中会会继续进行匹配,如本函数开始时 合并已跟踪的轨迹以及丢失的轨迹
lost_stracks.append(track)
# 当前帧检出来的但是没有匹配任何历史轨迹 的轨迹 ,即当前帧检测出来的 新轨迹
detections = [detections[i] for i in u_detection]
# 新轨迹与 历史未被确认(状态为Lost)的轨迹匹配
dists = matching.iou_distance(unconfirmed, detections)
matches, u_unconfirmed, u_detection = matching.linear_assignment(dists, thresh=0.7)
for itracked, idet in matches:
# 丢失的轨迹重新找回
unconfirmed[itracked].update(detections[idet], self.frame_id)
activated_starcks.append(unconfirmed[itracked])
# 遍历第二次匹配中,历史轨迹没有与当前帧检测出来的轨迹相匹配的轨迹
for it in u_unconfirmed:
track = unconfirmed[it]
track.mark_removed() # 将状态设置为 Removed 需要删除的轨迹
removed_stracks.append(track)
# 遍历u_detection(前两步都没匹配到历史轨迹的的目标框,且得分超过跟踪阈值的)认为它是新的目标
for inew in u_detection:
track = detections[inew]
if track.score < self.det_thresh:
continue
# 激活一个新的轨迹
track.activate(self.kalman_filter, self.frame_id)
activated_starcks.append(track)
for track in self.lost_stracks:
# 删除消失时间过长的轨迹
if self.frame_id - track.end_frame > self.max_time_lost:
track.mark_removed()
removed_stracks.append(track)
# 筛选出已跟踪的轨迹
self.tracked_stracks = [t for t in self.tracked_stracks if t.state == TrackState.Tracked]
# 将当前帧重新出现的活跃轨迹 以及 第一次出现的活跃轨迹合并
self.tracked_stracks = joint_stracks(self.tracked_stracks, activated_starcks)
# 将重新找到的轨迹合并到已跟踪的轨迹
self.tracked_stracks = joint_stracks(self.tracked_stracks, refind_stracks)
#
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
# 筛选出 lost 轨迹,参与下一帧的匹配
self.lost_stracks = sub_stracks(self.lost_stracks, self.tracked_stracks)
# 将本帧新发现的 lost_stracks 添加到 self.lost_stracks
self.lost_stracks.extend(lost_stracks)
# 在lost轨迹中剔除 要删除的轨迹
self.lost_stracks = sub_stracks(self.lost_stracks, self.removed_stracks)
# 添加本帧要删除的轨迹
self.removed_stracks.extend(removed_stracks)
# 去除重复的轨迹
self.tracked_stracks, self.lost_stracks = remove_duplicate_stracks(self.tracked_stracks, self.lost_stracks)
# 返回 当前帧活跃的轨迹
output_stracks = [track for track in self.tracked_stracks if track.is_activated]
output_stracks
def iou_distance(atracks, btracks):
Compute cost based on IoU
:type atracks: list[STrack]
:type btracks: list[STrack]
:rtype cost_matrix np.ndarray
# 利用iou计算代价矩阵
if (len(atracks)>0 and isinstance(atracks[0], np.ndarray)) or (len(btracks) > 0 and isinstance(btracks[0], np.ndarray)):
atlbrs = atracks
btlbrs = btracks
else:
# 轨迹的tlbr
atlbrs = [track.tlbr for track in atracks]
btlbrs = [track.tlbr for track in btracks]
# 计算atlbrs与btlbrs之间的iou
_ious = ious(atlbrs与, btlbrs)
# 求 1 - _ious 的最小值
cost_matrix = 1 - _ious
return cost_matrix
def ious(atlbrs, btlbrs):
ious = np.zeros((len(atlbrs), len(btlbrs)), dtype=np.float)
ious = bbox_ious(
np.ascontiguousarray(atlbrs, dtype=np.float),
np.ascontiguousarray(btlbrs, dtype=np.float)
)
return ious
def linear_assignment(cost_matrix, thresh):
# cost: 代价矩阵,如果return_cost为False,则不返回。
# x: 一个大小为n的数组,用于指定 cost代价矩阵中 x轴的轨迹(已存在轨迹)与y轴的轨迹(当前帧大于跟踪阈值的轨迹)匹配
# y: 一个大小为n的数组,用于指定 cost代价矩阵中 y轴与y轴的轨迹匹配
cost, x, y = lap.lapjv(cost_matrix, extend_cost=True, cost_limit=thresh)
# 遍历已跟踪轨迹对当前轨迹的序列
for ix, mx in enumerate(x):
# 匹配成功
if mx >= 0:
# 记录第ix个轨迹匹配上,且对应当前帧的轨迹为 第mx个
matches.append([ix, mx])
# 已跟踪轨迹 对 当前帧轨迹 未匹配上的结果
unmatched_a = np.where(x < 0)[0]
# 当前帧轨迹 对 已跟踪轨迹 未匹配上的结果
unmatched_b = np.where(y < 0)[0]
return matches, unmatched_a, unmatched_b
class KalmanFilter:
"""
x, y, a, h, vx, vy, va, vh
选择其中的 (x, y, a, h)作为状态变量
"""
def __init__(self):
ndim, dt = 4, 1.
self._motion_mat = np.eye(2 * ndim, 2 * ndim)
for i in range(ndim):
self._motion_mat[i, ndim + i] = dt
self._update_mat = np.eye(ndim, 2 * ndim)
self._std_weight_position = 1. / 20
self._std_weight_velocity = 1. / 160
```
### 4、总结
从官方中自带的结果中可以看出,其效果还是可以的,最近ByteTrackv2也已经出来了,并且还只是3d框的跟踪,包括BEV视角下的目标跟踪,等代码出来后,再一起学习吧!