代码主要实现了将VisDrone2019-MOT数据集转换成COCO格式的功能。
注意点:
# coding:utf-8
import os
import cv2
import json
import argparse
from PIL import Image
def VisdroneMOT2coco():
root_path = "VisDrone2019-MOT-val" # 路径设置
print("Loading data from ", root_path)
assert os.path.exists(root_path)
originLabelsDir = os.path.join(root_path, 'annotations')
originVideosDir = os.path.join(root_path, 'sequences')
classes = '0'
# images dir name
indexes_video = os.listdir(originVideosDir)
dataset = {'categories': [], 'annotations': [], 'images': [], 'videos': []}
dataset['categories'] = [
{"id": 0, "name": "ignored regions"},
{"id": 1, "name": "pedestrian"},
{"id": 2, "name": "people"},
{"id": 3, "name": "bicycle"},
{"id": 4, "name": "car"},
{"id": 5, "name": "van"},
{"id": 6, "name": "truck"},
{"id": 7, "name": "tricycle"},
{"id": 8, "name": "awning-tricycle"},
{"id": 9, "name": "bus"},
{"id": 10, "name": "motor"},
{"id": 11, "name": "others"}
]
# 标注的id
ann_id = 0
video_id = 0
id_all = 0
for k_vid, index in enumerate(indexes_video):
print("------processing video-------:",index)
# 支持 png jpg 格式的图片。
dataset['videos'].append({'id': k_vid, 'file_name': index})
txtFile = str(index) + '.txt'
video_id = video_id + 1
frame_id = 0
indexes_image = os.listdir(os.path.join(originVideosDir, index))
id_record = id_all
for k_img, index1 in enumerate(indexes_image):
print("processing image:", index1)
path = os.path.join(root_path, 'sequences')
vid_path = os.path.join(path, index)
img_path = os.path.join(vid_path, index1)
# 读取图像的宽和高
im = cv2.imread(img_path)
height, width, _ = im.shape
# 处理id
id_all = id_all + 1
frame_id = frame_id + 1
prev_image_id = id_all - 1
next_image_id = id_all + 1
if k_img==0:
prev_image_id = -1
if k_img==len((indexes_image)) - 1:
next_image_id = -1
# 添加图像的信息
dataset['images'].append({'file_name': os.path.join(index,index1),
'id': id_all,
'frame_id': frame_id,
'prev_image_id': prev_image_id,
'next_image_id': next_image_id,
'video_id': video_id,
'width': width,
'height': height})
with open(os.path.join(originLabelsDir, txtFile), 'r') as fr:
labelList = fr.readlines()
for label in labelList:
label = label.strip().split(',')
x = float(label[2])
y = float(label[3])
w = float(label[4])
h = float(label[5])
H, W, _ = im.shape
# 标签序号从0开始计算
cls_id = int(label[7])
dataset['annotations'].append({
'id': ann_id,
'category_id': cls_id,
'image_id': int(label[0])+id_record,
'track_id': int(label[1]),
'conf': float(label[6]),
'area': w * h,
'bbox': [x, y, w, h],
})
ann_id += 1
json_str = json.dumps(dataset,indent=4)
with open('VisDrone2019-MOT-val/result.json', 'w') as json_file: # 路径设置
json_file.write(json_str)
if __name__ == "__main__":
VisdroneMOT2coco()