1.os.path.join(video_dir, “img/*.jpg”)连接两个或更多的路径名组件
video_dir = '../Car1/'
b = os.path.join(video_dir, "img/*.jpg")
如果video_dir = '../Car1'
test = glob.glob(os.path.join(video_dir, "img/*.jpg"))
print('test: ', test)
test: ['../Car1/img\\0001.jpg', '../Car1/img\\0002.jpg', '../Car1/img\\0003.jpg',.....]
path = 'D:/honey/0001.jpg'
print('basepath: ', os.path.basename(path))
print('split: ', os.path.basename(path).split('.'))
basepath: 0001.jpg
split: ['0001', 'jpg']
import glob
import os
import pandas as pd
import argparse
import numpy as np
import cv2
import time
import sys
from fire import Fire
from tqdm import tqdm
from siamfc import SiamFCTracker
def main(video_dir, gpu_id, model_path):
#savepath = cv2.VideoWriter('test_track.avi', cv2.VideoWriter_fourcc('M', 'P', '4', '2'), 25, (320,240),True)
filenames = sorted(glob.glob(os.path.join(video_dir, "img/*.jpg")),
key=lambda x: int(os.path.basename(x).split('.')[0]))#os.path.basename()返回最后的文件名
frames = [cv2.cvtColor(cv2.imread(filename), cv2.COLOR_BGR2RGB) for filename in filenames]
gt_bboxes = pd.read_csv(os.path.join(video_dir, "groundtruth_rect.txt"), sep='\t|,| ',
header=None, names=['xmin', 'ymin', 'width', 'height'],
#print('gt_bboxes: ', gt_bboxes)
title = video_dir.split('/')[-1]
#print('title: ', title)
# starting tracking
tracker = SiamFCTracker(model_path, gpu_id)
for idx, frame in enumerate(frames):
if idx == 0:
bbox = gt_bboxes.iloc[0].values
tracker.init(frame, bbox)
bbox = (bbox[0]-1, bbox[1]-1,
bbox[0]+bbox[2]-1, bbox[1]+bbox[3]-1)
bbox = tracker.update(frame)
frame = cv2.rectangle(frame,
(int(bbox[0]), int(bbox[1])),
(int(bbox[2]), int(bbox[3])),
(0, 255, 0),
# 画groundtruth_rect中的坐标
gt_bbox = gt_bboxes.iloc[idx].values
gt_bbox = (gt_bbox[0], gt_bbox[1],
gt_bbox[0]+gt_bbox[2], gt_bbox[1]+gt_bbox[3])
frame = cv2.rectangle(frame,
(int(gt_bbox[0]-1), int(gt_bbox[1]-1)), # 0-index
(int(gt_bbox[2]-1), int(gt_bbox[3]-1)),
(255, 0, 0),
if len(frame.shape) == 3:
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
frame = cv2.putText(frame, str(idx), (5, 20), cv2.FONT_HERSHEY_COMPLEX_SMALL, 1, (0, 255, 0), 1)
cv2.imshow(title, frame)
if __name__ == "__main__":
video_dir = '../Car1/'#视频路径
gpu_id = 0#gpu id
model_path = '../models/siamfc_pretrained.pth'#模型路径
main(video_dir, gpu_id, model_path)
import numpy as np
x = np.array([1, 2, 3, 4])
print('x: ', x)
print('x.shape: ', x.shape)
x1 = x[np.newaxis, :]
print('x1: ', x1)
print('x.shape: ', x1.shape)
x2 = x[:, np.newaxis]
print('x2: ', x2)
print('x2.shape: ', x2.shape)
x: [1 2 3 4]
x.shape: (4,)
x1: [[1 2 3 4]]
x.shape: (1, 4)
x2: [[1]
x2.shape: (4, 1)
import numpy as np
import cv2
import torch
import torch.nn.functional as F
import time
import warnings
import torchvision.transforms as transforms
from torch.autograd import Variable
from .alexnet import SiameseAlexNet
from .config import config
from .custom_transforms import ToTensor
from .utils import get_exemplar_image, get_pyramid_instance_image, get_instance_image
torch.set_num_threads(1) # otherwise pytorch will take all cpus
class SiamFCTracker:
def __init__(self, model_path, gpu_id):
self.gpu_id = gpu_id
with torch.cuda.device(gpu_id):
self.model = SiameseAlexNet(gpu_id, train=False)
self.model = self.model.cuda()
#不启用 BatchNormalization 和 Dropout,测试模式,对应于nn.Module.train()
self.transforms = transforms.Compose([
def _cosine_window(self, size):
get the cosine window
cos_window = np.hanning(int(size[0]))[:, np.newaxis].dot(np.hanning(int(size[1]))[np.newaxis, :])
cos_window = cos_window.astype(np.float32)
cos_window /= np.sum(cos_window)
return cos_window
def init(self, frame, bbox):
""" initialize siamfc tracker
frame: an RGB image
bbox: one-based bounding box [x, y, width, height]
self.bbox = (bbox[0]-1, bbox[1]-1, bbox[0]-1+bbox[2], bbox[1]-1+bbox[3]) # zero based
self.pos = np.array([bbox[0]-1+(bbox[2]-1)/2, bbox[1]-1+(bbox[3]-1)/2]) # center x, center y, zero based
self.target_sz = np.array([bbox[2], bbox[3]]) # width, height
#对应于matlab代码avgChans = gather([mean(mean(im(:,:,1))) mean(mean(im(:,:,2))) mean(mean(im(:,:,3)))]);
self.img_mean = tuple(map(int, frame.mean(axis=(0, 1))))
print('img_mean: ', self.img_mean)
img_mean: (140, 140, 140)
#get_exemplar_image(frame, box, 127, 0.5, self.img_mean)
exemplar_img, scale_z, s_z = get_exemplar_image(frame, self.bbox,
config.exemplar_size, config.context_amount, self.img_mean)
# get exemplar feature
exemplar_img = self.transforms(exemplar_img)[None,:,:,:]
with torch.cuda.device(self.gpu_id):
exemplar_img_var = Variable(exemplar_img.cuda())
self.model((exemplar_img_var, None))
#config.num_scale = 3
self.penalty = np.ones((config.num_scale)) * config.scale_penalty#0.9745
#self.penalty[1] = 1,self.penalty = (0.9745,1,0.9745)
self.penalty[config.num_scale//2] = 1
# create cosine window
#config.response_up_stride = 16上采样的stride,response_sz = 17
self.interp_response_sz = config.response_up_stride * config.response_sz#272
self.cosine_window = self._cosine_window((self.interp_response_sz, self.interp_response_sz))
# create scalse
#三尺度scale_step = 1.0375, 1/1.0375,1,1.0375
self.scales = config.scale_step ** np.arange(np.ceil(config.num_scale/2)-config.num_scale,
# instance_size = 255, exemplar_size = 127
self.s_x = s_z + (config.instance_size-config.exemplar_size) / scale_z
# arbitrary scale saturation
self.min_s_x = 0.2 * self.s_x
self.max_s_x = 5 * self.s_x
def update(self, frame):
"""track object based on the previous frame
frame: an RGB image
bbox: tuple of 1-based bounding box(xmin, ymin, xmax, ymax)
size_x_scales = self.s_x * self.scales
pyramid = get_pyramid_instance_image(frame, self.pos, config.instance_size, size_x_scales, self.img_mean)
instance_imgs = torch.cat([self.transforms(x)[None,:,:,:] for x in pyramid], dim=0)
#print('instance_imgs: ', instance_imgs.size())
with torch.cuda.device(self.gpu_id):
instance_imgs_var = Variable(instance_imgs.cuda())
response_maps = self.model((None, instance_imgs_var))
response_maps = response_maps.data.cpu().numpy().squeeze()
response_maps_up = [cv2.resize(x, (self.interp_response_sz, self.interp_response_sz), cv2.INTER_CUBIC)
for x in response_maps]
max_score = np.array([x.max() for x in response_maps_up]) * self.penalty
# penalty scale change
scale_idx = max_score.argmax()#得分最大的索引值,是将数组平铺成一维下的索引
response_map = response_maps_up[scale_idx]
response_map -= response_map.min()
response_map /= response_map.sum()
#config.window_influenc = 0.176
response_map = (1 - config.window_influence) * response_map + \
config.window_influence * self.cosine_window
max_r, max_c = np.unravel_index(response_map.argmax(), response_map.shape)
# displacement in interpolation response
disp_response_interp = np.array([max_c, max_r]) - (self.interp_response_sz-1) / 2.
# displacement in input
disp_response_input = disp_response_interp * config.total_stride / config.response_up_stride
# displacement in frame
scale = self.scales[scale_idx]
disp_response_frame = disp_response_input * (self.s_x * scale) / config.instance_size
# 绝对坐标
self.pos += disp_response_frame
# scale_lr = 0.59尺度学习率
self.s_x *= ((1 - config.scale_lr) + config.scale_lr * scale)
self.s_x = max(self.min_s_x, min(self.max_s_x, self.s_x))
self.target_sz = ((1 - config.scale_lr) + config.scale_lr * scale) * self.target_sz
bbox = (self.pos[0] - self.target_sz[0]/2 + 1, # xmin convert to 1-based
self.pos[1] - self.target_sz[1]/2 + 1, # ymin
self.pos[0] + self.target_sz[0]/2 + 1, # xmax
self.pos[1] + self.target_sz[1]/2 + 1) # ymax
return bbox
import numpy as np
import cv2
def get_center(x):
return (x - 1.) / 2.
def xyxy2cxcywh(bbox):
return get_center(bbox[0]+bbox[2]), \
get_center(bbox[1]+bbox[3]), \
(bbox[2]-bbox[0]), \
def crop_and_pad(img, cx, cy, model_sz, original_sz, img_mean=None):
xmin = cx - original_sz // 2
xmax = cx + original_sz // 2
ymin = cy - original_sz // 2
ymax = cy + original_sz // 2
im_h, im_w, _ = img.shape
left = right = top = bottom = 0
if xmin < 0:
left = int(abs(xmin))
if xmax > im_w:
right = int(xmax - im_w)
if ymin < 0:
top = int(abs(ymin))
if ymax > im_h:
bottom = int(ymax - im_h)
xmin = int(max(0, xmin))
xmax = int(min(im_w, xmax))
ymin = int(max(0, ymin))
ymax = int(min(im_h, ymax))
im_patch = img[ymin:ymax, xmin:xmax]
if left != 0 or right !=0 or top!=0 or bottom!=0:
if img_mean is None:
img_mean = tuple(map(int, img.mean(axis=(0, 1))))
im_patch = cv2.copyMakeBorder(im_patch, top, bottom, left, right,
cv2.BORDER_CONSTANT, value=img_mean)
if model_sz != original_sz:
im_patch = cv2.resize(im_patch, (model_sz, model_sz))#缩放成127x127
return im_patch
#get_exemplar_image(frame, box, 127, 0.5, self.img_mean),取初始化模型的片段图像
def get_exemplar_image(img, bbox, size_z, context_amount, img_mean=None):
cx, cy, w, h = xyxy2cxcywh(bbox)
wc_z = w + context_amount * (w+h)
hc_z = h + context_amount * (w+h)
s_z = np.sqrt(wc_z * hc_z)
#计算尺度 = 127 / sz,前面缩放的比例
scale_z = size_z / s_z
exemplar_img = crop_and_pad(img, cx, cy, size_z, s_z, img_mean)
return exemplar_img, scale_z, s_z
def get_instance_image(img, bbox, size_z, size_x, context_amount, img_mean=None):
cx, cy, w, h = xyxy2cxcywh(bbox)
wc_z = w + context_amount * (w+h)
hc_z = h + context_amount * (w+h)
s_z = np.sqrt(wc_z * hc_z)
scale_z = size_z / s_z
d_search = (size_x - size_z) / 2
pad = d_search / scale_z
s_x = s_z + 2 * pad
scale_x = size_x / s_x
instance_img = crop_and_pad(img, cx, cy, size_x, s_x, img_mean)
return instance_img, scale_x, s_x
def get_pyramid_instance_image(img, center, size_x, size_x_scales, img_mean=None):
if img_mean is None:
img_mean = tuple(map(int, img.mean(axis=(0, 1))))
pyramid = [crop_and_pad(img, center[0], center[1], size_x, size_x_scale, img_mean)
for size_x_scale in size_x_scales]
return pyramid