GitHub
链接:https://github.com/blainetse/mytoolkitsvot-toolkit-python
官方链接:https://github.com/votchallenge/toolkit- 参考链接1:https://blog.csdn.net/laizi_laizi/article/details/122492396 你们也可以参考这位大佬的写的,写的很好。他还写了一篇博客,几乎提供了所有的目标追踪的数据集和下载地址。
- 参考链接2:https://www.cnblogs.com/kingsonfu/p/11683494.html
Linux
下查看指定目录中文件的数量
提示:如果不使用代理,下载速度会很慢!
源代码如下:
import os
import requests
import zipfile
import pandas as pd
from tqdm import tqdm
from urllib.parse import urlparse, urljoin
def get_data(url):
"""Step1: 获取数据集下载链接
Args:
url: 数据集对应的 `json` 文件下载链接,可以直接在浏览器中输入该链接,查看其内容
"""
base_url = url.rsplit("/", 1)[0] + "/"
try:
meta = requests.get(url).json()
except requests.exceptions.RequestException as e:
raise Exception("Unable to read JSON file {}".format(e))
# global sequences_url, annos_url, fnames
sequences_url, annos_url, fnames = [], [], []
for sequence in meta["sequences"]:
# get data name
fnames.append(sequence["name"])
# get groundtruth zip file
url = sequence["annotations"]["url"]
if bool(urlparse(url).netloc):
anno_url = url
else:
anno_url = urljoin(base_url, url)
# get pic zip file
url = sequence["channels"]["color"]["url"]
if bool(urlparse(url).netloc):
frame_url = url
else:
sequence_url = urljoin(base_url, url)
annos_url.append(anno_url)
sequences_url.append(sequence_url)
return sequences_url, annos_url, fnames
def write2csv(csvfile, fnames, urls):
"""Step2: 将下载链接保存到 csv 文件中
Args:
csvfile: str 将下载链接等信息保存到 `csv` 文件,对应文件名 `{version}_sequences.csv` 以及 `{version}_anno.csv`
fnames: [list: str] 对应着每一个 `sequence` 的名称,也可以理解为 `sequences`
urls: [list: str] 对应着每一个 `sequence/frame` 的下载链接
"""
# 1.创建一个 DataFrame 作为一行写入,以键值对——字典的形式存储
df = pd.DataFrame({"filename": fnames, "urls": urls, "state": [False] * 50})
# 2.将 DataFrame 存储为 csv 文件,index 表示是否显示行名称(可以是数字,也可以是自定义的字符串)default=True
df.to_csv(csvfile, index=0)
def run_writer(fnames, sequences_url, annos_url):
"""执行 `write2csv`,将数据集信息写入到文件中"""
if not os.path.exists(csvfile["sequences"]):
write2csv(csvfile["sequences"], fnames, sequences_url)
if not os.path.isfile(csvfile["annos"]):
write2csv(csvfile["annos"], fnames, annos_url)
def download(url, folder_path, fname, use_proxy=False):
"""Step3: 下载数据
Args:
url: 下载链接
folder_path: 下载的数据集文件保存路径
fname: 每一个 `sequence` 对应的名字
use_proxy: 是否使用代理
"""
# 屏蔽warning信息
requests.packages.urllib3.disable_warnings()
# 构建自己的代理 IP 池
proxies = {
# 这里修改为自己的代理端口号,可在代理软件中进行查看更改,clash 默认是7890
"http": "http://127.0.0.1:7890",
"https": "http://127.0.0.1:7890",
}
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36",
}
if not os.path.exists(folder_path):
os.makedirs(folder_path) # 创建存放每一个图片集的单独文件夹
fname = os.path.join(folder_path, f"{fname}.zip")
############# 断点续传实现 ##################
# 第一次请求是为了得到文件总大小
response = requests.get(
url, stream=True, verify=False, proxies=proxies if use_proxy else ""
)
total_size = int(response.headers["Content-Length"])
if os.path.isfile(fname):
temp_size = os.path.getsize(fname) # 本地已经下载的文件大小
if temp_size == total_size:
print(
# 注意双引号中不能包括双引号!!!只能使用外面双引号,内部单引号
f"{fname.split('/')[-2] + '.zip'} exists and have totaly been downloaded!"
)
return
else:
temp_size = 0
# 显示一下下载了多少
print(
f"{fname.split('/')[-2] + '.zip'} downloaded: {temp_size/(1024*1024):.2f}MB || Total size: {total_size/(1024*1024):.2f}MB || Remaining download rate {1 - temp_size/total_size:.2f}"
)
# 核心部分,这个是请求下载时,从本地文件已经下载过的后面下载
# headers = {'Range': 'bytes=%d-' % temp_size}
headers = {
"Range": f"bytes={temp_size}-{total_size}",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.75 Safari/537.36",
}
# 重新请求网址,加入新的请求头的
response = requests.get(
url,
stream=True,
verify=False,
headers=headers,
proxies=proxies if use_proxy else "",
)
############################################
with tqdm.wrapattr(
open(fname, "ab"), # 以 ab 追加的形式写入!!!
"write",
miniters=1,
desc=fname.split("/")[-2] + ".zip",
total=int(response.headers.get("content-length", 0)),
) as fout:
for chunk in response.iter_content(chunk_size=4096):
if chunk:
temp_size += len(chunk)
fout.write(chunk)
fout.flush()
def unzip_file(zip_src: str, dst_dir: str):
"""Step4: 解压下载的数据包
Args:
zip_src: 源压缩包路径
dst_dir: 指定解压的目录
Returns: bool
True: 表示解压成功,同时也包含该数据已经下载的信息
False: 表示解压失败,数据集没有成功下载
"""
try:
with zipfile.ZipFile(file=zip_src) as zip_file:
# Loop over each file
# print(f"Start unzip {zip_src.split('/')[-2]}")
for file in zip_file.namelist():
# Extract each file to another directory
# If you want to extract to current working directory, don't specify path
zip_file.extract(member=file, path=dst_dir)
print(f"{zip_src.split('/')[-2].upper()} has been unziped fully!")
os.remove(zip_src)
return True
except:
print("This is not zip file!")
return False
def download_annos(use_proxy=True):
"""Step5: 下载每一个 `sequence` 对应的 `annotation` 或者说 `groundtruth.txt`
Args:
use_proxy: 是否使用代理
"""
annos = pd.read_csv(csvfile["annos"], header=0, index_col=0, usecols=[0, 1, 2])
for fname, (url, state) in annos.iterrows():
folder_path = os.path.join(os.getcwd(), root, fname)
# 判断是否已经下载完成
if state:
print(
f"{fname} exists and have totaly been downloaded and unziped! Going to download and unzip the next."
)
else:
## download
download(url, folder_path, "groundtruth", use_proxy)
## unzip
zip_src = folder_path + "/groundtruth.zip"
dest_dir = os.path.join(os.getcwd(), root, fname)
state = unzip_file(zip_src, dest_dir)
## 将新的状态添加进入文件中
annos.loc[fname, "state"] = state
# print(annos.loc[fname][1])
annos.to_csv(csvfile["annos"], index=fnames, encoding="utf-8")
def download_sequences(use_proxy=True):
"""Step5: 下载 `video sequences`
Args:
use_proxy: 是否使用代理
"""
sequences = pd.read_csv(
csvfile["sequences"], header=0, index_col=0, usecols=[0, 1, 2]
)
for fname, (url, state) in sequences.iterrows():
folder_path = os.path.join(os.getcwd(), root, fname)
# 判断是否已经下载完成
if state:
print(
f"{fname} exists and have totaly been downloaded and unziped! Going to download and unzip the next."
)
else:
## download
download(url, folder_path, "color", use_proxy)
## unzip
zip_src = folder_path + "/color.zip"
dest_dir = os.path.join(os.getcwd(), root, fname, "color")
state = unzip_file(zip_src, dest_dir)
sequences.loc[fname, "state"] = state
sequences.to_csv(csvfile["annos"], index=fnames, encoding="utf-8")
def write2list(root, fnames):
"""Step5: 下载 `list`,存放在当前数据据目录下,后期读取数据集的时候会用到"""
if not os.path.exists(root):
os.makedirs(root)
flist = os.path.join(root, "list.txt")
with open(flist, "w") as fl:
fl.writelines([fname + "\n" for fname in fnames])
def write2squence(root, sequence):
"""Step5: 下载 `sequence`,存放在每一个 `sequence` 目录下,保存当前序列的相关信息"""
for fname in fnames:
## 添加 sequence 文件
fsequence = os.path.join(root, fname, "sequence")
# print(fsequence)
if not os.path.exists(fsequence):
fsequence = open(fsequence, encoding="utf-8", mode="w")
fsequence.writelines(sequence)
fsequence.writelines(f"name={fname}\r\n")
fsequence.flush()
fsequence.close()
if __name__ == "__main__":
VOT_DATASETS = {
"vot2013": "http://data.votchallenge.net/vot2013/dataset/description.json",
"vot2014": "http://data.votchallenge.net/vot2014/dataset/description.json",
"vot2015": "http://data.votchallenge.net/vot2015/dataset/description.json",
"vot-tir2015": "http://www.cvl.isy.liu.se/research/datasets/ltir/version1.0/ltir_v1_0_8bit.zip",
"vot2016": "http://data.votchallenge.net/vot2016/main/description.json",
"vot-tir2016": "http://data.votchallenge.net/vot2016/vot-tir2016.zip",
"vot2017": "http://data.votchallenge.net/vot2017/main/description.json",
"vot-st2018": "http://data.votchallenge.net/vot2018/main/description.json",
"vot-lt2018": "http://data.votchallenge.net/vot2018/longterm/description.json",
"vot-st2019": "http://data.votchallenge.net/vot2019/main/description.json",
"vot-lt2019": "http://data.votchallenge.net/vot2019/longterm/description.json",
"vot-rgbd2019": "http://data.votchallenge.net/vot2019/rgbd/description.json",
"vot-rgbt2019": "http://data.votchallenge.net/vot2019/rgbtir/meta/description.json",
"vot-st2020": "https://data.votchallenge.net/vot2020/shortterm/description.json",
"vot-rgbt2020": "http://data.votchallenge.net/vot2020/rgbtir/meta/description.json",
"vot-st2021": "https://data.votchallenge.net/vot2021/shortterm/description.json",
"test": "http://data.votchallenge.net/toolkit/test.zip",
"segmentation": "http://box.vicos.si/tracking/vot20_test_dataset.zip",
"vot2022/rgbd": "https://data.votchallenge.net/vot2022/rgbd/description.json",
"vot2022/depth": "https://data.votchallenge.net/vot2022/depth/description.json",
"vot2022/stb": "https://data.votchallenge.net/vot2022/stb/description.json",
"vot2022/sts": "https://data.votchallenge.net/vot2022/sts/description.json",
"vot2022/lt": "https://data.votchallenge.net/vot2022/lt/description.json",
}
## 0.初始化相关数据
# stack = "vot2022/lt"
# version = "VOT2022_LT"
stack = input("请输入要下载的数据集名称(e.g.vot2022/lt):") # 注意:这里的名称要和 `VOT_DATASETS` 里面的 `key` 对应
version = input("请输入要保存的数据集名称(e.g.VOT2022_LT):") # 这里随便起一个名字,易懂即可,比如 `votlt2022`
base_root = input("请输入数据集保存的路径(e.g./data):")
root = base_root + f"./{version}"
sequence = ["channels.color=color/%08d.jpg\r\n", "format=default\r\n", "fps=30\r\n"]
csvfile = {
"sequences": version + "_sequences.csv",
"annos": version + "_annos.csv",
}
## 1.获取下载链接
sequences_url, annos_url, fnames = get_data(VOT_DATASETS[stack])
# 输出下载链接
# for fname, url in zip(fnames, sequences_url):
# print(f"{fname}: {url}")
## 2.将下载链接保存到 csv 文件
run_writer(fnames, sequences_url, annos_url)
## 3.下载数据
download_annos(use_proxy=True) # 3.1 下载 groundtruth.txt
print("Done, groundtruth.txt has been downloaded!")
download_sequences(use_proxy=True) # 3.2 下载 sequences
print("Done, color.zip has been downloaded!")
write2list(root, fnames) # 3.3 下载 list
print("Done, list has been downloaded!")
write2squence(root, sequence) # 3.4 下载 sequence
print("Done, sequence has been downloaded!")