- Cityscapes数据集则是由奔驰主推,提供无人驾驶环境下的图像分割数据集。用于评估视觉算法在城区场景语义理解方面的性能。Cityscapes包含50个城市不同场景、不同背景、不同季节的街景,提供5000张精细标注的图像、20000张粗略标注的图像、30类标注物体。用PASCAL VOC标准的 intersection-over-union (IoU)得分来对算法性能进行评价。 Cityscapes数据集共有fine和coarse两套评测标准,前者提供5000张精细标注的图像,后者提供5000张精细标注外加20000张粗糙标注的图像。
- 该数据集包含如下:images_base和annotations_base分别对应这文件夹leftImg8bit(5,030 items, totalling 11.6 GB,factually 5000 items)和gtFine(30,030 items, totalling 1.1 GB)。里面都包含三个文件夹:train、val、test。总共5000张精细释,2975张训练图,500张验证图和1525张测试图。
- 在leftImg8bit/train下有18个子文件夹对应德国的16个城市,法国一个城市和瑞士一个城市;在leftImg8bit/val下有3个子文件夹对应德国的3个城市;在leftImg8bit/test下有6个子文件夹对应德国的6个城市。
- 在gtFine/train下有18个子文件夹对应leftImg8bit/train里面的文件夹,但是不一样的leftImg8bit里面的一张原图,对应着gtFine里面有6个文件分别是color.png、instanceIds.png、instanceTrainIds.png、labelIds.png、labelTrainIds.png、polygons.json(实际从官网下载到的数据集只有4个文件:color.png、instanceIds.png、labelIds.png、polygons.json)。
- 数据集没有测试集,只有验证集,想得到测试结果需要提交模型在线测试。
可以参考Cityscapes使用方法,需要进入官网注册登录之后进行下载。
下载原图leftImg8bit_trainvaltest.zip和标签gtFine_trainvaltest.zip,实际下载得到的原图和标签的压缩包大小分别为11.6GB和252.6MB。
下载完成后进行解压。
gtFine_trainvaltest目录结构
leftImg8bit_trainvaltest目录结构
详细的类别和数量信息可以参考Cityscapes数据集介绍。
参考cityscapes-to-voc,根据我下载的数据路径进行了修改,使用python进行转换的代码如下。
由于只有验证集,没有测试集,这里将原cityscapes的train转成了voc格式中的trainval,将原cityscapes的val转成了voc的test,方便在本地直接进行训练和测试。
from pascal_voc_writer import Writer
import matplotlib.pyplot as plt
import numpy as np
import os
import json
import glob
import time
from shutil import move, copy
#------------------------
#function to make folder
#------------------------
def make_dir(path):
if not os.path.isdir(path):
os.makedirs(path)
#----------------------------------------------------------------------------------------------------------------
#convert polygon to bounding box
#code from:
#https://stackoverflow.com/questions/46335488/how-to-efficiently-find-the-bounding-box-of-a-collection-of-points
#----------------------------------------------------------------------------------------------------------------
def polygon_to_bbox(polygon):
x_coordinates, y_coordinates = zip(*polygon)
return [min(x_coordinates), min(y_coordinates), max(x_coordinates), max(y_coordinates)]
# --------------------------------------------
# read a json file and convert to voc format
# --------------------------------------------
def read_json(file):
# if no relevant objects found in the image,
# don't save the xml for the image
relevant_file = False
data = []
with open(file, 'r') as f:
file_data = json.load(f)
for object in file_data['objects']:
label, polygon = object['label'], object['polygon']
# process only if label found in voc
if label in classes_keys:
polygon = np.array([x for x in polygon])
bbox = polygon_to_bbox(polygon)
data.append([classes[label]] + bbox)
# if relevant objects found in image, set the flag to True
if data:
relevant_file = True
return data, relevant_file
#---------------------------
#function to save xml file
#---------------------------
def save_xml(img_path, img_shape, data, save_path):
writer = Writer(img_path,img_shape[0], img_shape[1])
for element in data:
writer.addObject(element[0],element[1],element[2],element[3],element[4])
writer.save(save_path)
if __name__ == "__main__":
#----------
#arguments
#----------
cityscapes_dir = '/home/ecust/txx/dataset/object_detection_open_dataset/VOC/cityscapes'
save_path = os.path.join(cityscapes_dir,"cityscapes_voc_format")
cityscapes_dir_gt = os.path.join(cityscapes_dir, 'gtFine_trainvaltest', 'gtFine')
#--------------------------------------------------------------
# Valid classes dictionary.
# motorcycle is not renamed to motorbike, change the key if this is
# not is required
#--------------------------------------------------------------
classes = {'bicycle':'bicycle', 'bus':'bus', 'car':'car', 'motorcycle':'motorcycle',
'person':'person', 'rider': 'rider', 'train':'train', 'truck':'truck'}
classes_keys = list(classes.keys())
# ------------------------------------------
# reading json files from each subdirectory
# ------------------------------------------
valid_files = []
trainval_files = []
test_files = []
# make Annotations target directory if already doesn't exist
ann_dir = os.path.join(save_path, 'VOC2007', 'Annotations')
make_dir(ann_dir)
count=0
start = time.time()
for category in os.listdir(cityscapes_dir_gt):
# no GT for test data
if category == 'test': continue
for city in os.listdir(os.path.join(cityscapes_dir_gt, category)):
# read files
files = glob.glob(os.path.join(cityscapes_dir,'gtFine_trainvaltest', 'gtFine', category, city) + '/*.json')
# process json files
for file in files:
data, relevant_file = read_json(file)
if relevant_file:
count += 1
base_filename = os.path.basename(file)[:-21]
xml_filepath = os.path.join(ann_dir, base_filename + '_leftImg8bit.xml')
img_name = base_filename + '_leftImg8bit.png'
img_path = os.path.join(cityscapes_dir, 'leftImg8bit_trainvaltest', 'leftImg8bit', category, city,
base_filename + '_leftImg8bit.png')
img_shape = plt.imread(img_path).shape
valid_files.append([img_path, img_name])
# make list of trainval and test files for voc format
# lists will be stored in txt files
trainval_files.append(img_name[:-4]) if category == 'train' else test_files.append(img_name[:-4])
# save xml file
save_xml(img_path, img_shape, data, xml_filepath)
end = time.time() - start
print('Total Time taken: ', end)
print('file nums=',count)
# ----------------------------
# copy files into target path
# ----------------------------
images_savepath = os.path.join(save_path, 'VOC2007', 'JPEGImages')
make_dir(images_savepath)
start = time.time()
for file in valid_files:
copy(file[0], os.path.join(images_savepath, file[1]))
# print('Total Time taken: ', end)
# ---------------------------------------------
# create text files of trainval and test files
# ---------------------------------------------
print("len trainval=",len(trainval_files))
print("len test=", len(test_files))
textfiles_savepath = os.path.join(save_path, 'VOC2007', 'ImageSets', 'Main')
make_dir(textfiles_savepath)
traival_files_wr = [x + '\n' for x in trainval_files]
test_files_wr = [x + '\n' for x in test_files]
with open(os.path.join(textfiles_savepath, 'trainval.txt'), 'w') as f:
f.writelines(traival_files_wr)
with open(os.path.join(textfiles_savepath, 'test.txt'), 'w') as f:
f.writelines(test_files_wr)
Tips:
- 由于cityscapes的test中没有标签,因此在转换过程中没有使用test数据;
- 转换过程中剔除掉了负样本;
- 转换前cityscapes的train对应voc格式的trainval,共2965张图像;转换前cityscapes的test对应voc格式的test,共492张图像;
- 总数量为3457;
下载foggy_cityscapes dataset
下载leftImg8bit_trainvaltest_foggy.zip
解压后,leftImg8bit_trainvaltest_foggy的目录结构
foggy_cityscapes的标签就用cityscapes的标签gtFine_trainvaltest.zip,拷贝一份即可;
参考上述cityscapes转voc的流程进行转换,按照同样的数据集划分方式,得到转换后的trainval数量为2965,test的数量为492,总数量为3457。
区别如下:
- foggy_cityscapes没有标签,直接使用cityscapes的标签;
- cityscapes的一张原图对应到foggy_cityscapes中有3张图,beita的参数不同,分别为0.005,0.01和0.02,分别进行转换可以得到3个foggy_cityscapes数据集,根据需要进行选择;
转换代码如下:
from pascal_voc_writer import Writer
import matplotlib.pyplot as plt
import numpy as np
import os
import json
import glob
import time
from shutil import move, copy
#------------------------
#function to make folder
#------------------------
def make_dir(path):
if not os.path.isdir(path):
os.makedirs(path)
#----------------------------------------------------------------------------------------------------------------
#convert polygon to bounding box
#code from:
#https://stackoverflow.com/questions/46335488/how-to-efficiently-find-the-bounding-box-of-a-collection-of-points
#----------------------------------------------------------------------------------------------------------------
def polygon_to_bbox(polygon):
x_coordinates, y_coordinates = zip(*polygon)
return [min(x_coordinates), min(y_coordinates), max(x_coordinates), max(y_coordinates)]
# --------------------------------------------
# read a json file and convert to voc format
# --------------------------------------------
def read_json(file):
# if no relevant objects found in the image,
# don't save the xml for the image
relevant_file = False
data = []
with open(file, 'r') as f:
file_data = json.load(f)
for object in file_data['objects']:
label, polygon = object['label'], object['polygon']
# process only if label found in voc
if label in classes_keys:
polygon = np.array([x for x in polygon])
bbox = polygon_to_bbox(polygon)
data.append([classes[label]] + bbox)
# if relevant objects found in image, set the flag to True
if data:
relevant_file = True
return data, relevant_file
#---------------------------
#function to save xml file
#---------------------------
def save_xml(img_path, img_shape, data, save_path):
writer = Writer(img_path,img_shape[0], img_shape[1])
for element in data:
writer.addObject(element[0],element[1],element[2],element[3],element[4])
writer.save(save_path)
if __name__ == "__main__":
#----------
#arguments
#----------
# foggy_cityscapes
beta=0.02
foggy_cityscapes_dir = '/home/ecust/txx/dataset/object_detection_open_dataset/VOC/foggy_cityscapes'
save_path = os.path.join(foggy_cityscapes_dir,"foggy_cityscapes_beta_{}_voc_format".format(beta))
foggy_cityscapes_dir_gt = os.path.join(foggy_cityscapes_dir, 'gtFine_trainvaltest', 'gtFine')
#--------------------------------------------------------------
# Valid classes dictionary.
# motorcycle is not renamed to motorbike, change the key if this is
# not is required
#--------------------------------------------------------------
classes = {'bicycle':'bicycle', 'bus':'bus', 'car':'car', 'motorcycle':'motorcycle',
'person':'person', 'rider': 'rider', 'train':'train', 'truck':'truck'}
classes_keys = list(classes.keys())
# ------------------------------------------
# reading json files from each subdirectory
# ------------------------------------------
valid_files = []
trainval_files = []
test_files = []
# make Annotations target directory if already doesn't exist
ann_dir = os.path.join(save_path, 'VOC2007', 'Annotations')
make_dir(ann_dir)
count=0
start = time.time()
for category in os.listdir(foggy_cityscapes_dir_gt):
# no GT for test data
if category == 'test': continue
for city in os.listdir(os.path.join(foggy_cityscapes_dir_gt, category)):
# read files
files = glob.glob(os.path.join(foggy_cityscapes_dir,'gtFine_trainvaltest', 'gtFine', category, city) + '/*.json')
# process json files
for file in files:
data, relevant_file = read_json(file)
if relevant_file:
count += 1
base_filename = os.path.basename(file)[:-21]
xml_filepath = os.path.join(ann_dir, base_filename + '_leftImg8bit_foggy.xml')
img_name = base_filename + '_leftImg8bit_foggy.png'
img_path = os.path.join(foggy_cityscapes_dir, 'leftImg8bit_trainvaltest_foggy', 'leftImg8bit_foggy', category, city,
base_filename + '_leftImg8bit_foggy_beta_{}.png'.format(beta))
img_shape = plt.imread(img_path).shape
valid_files.append([img_path, img_name])
# make list of trainval and test files for voc format
# lists will be stored in txt files
trainval_files.append(img_name[:-4]) if category == 'train' else test_files.append(img_name[:-4])
# save xml file
save_xml(img_path, img_shape, data, xml_filepath)
end = time.time() - start
print('Total Time taken: ', end)
print('file nums=',count)
# ----------------------------
# copy files into target path
# ----------------------------
images_savepath = os.path.join(save_path, 'VOC2007', 'JPEGImages')
make_dir(images_savepath)
start = time.time()
for file in valid_files:
copy(file[0], os.path.join(images_savepath, file[1]))
# print('Total Time taken: ', end)
# ---------------------------------------------
# create text files of trainval and test files
# ---------------------------------------------
print("len trainval=",len(trainval_files))
print("len test=", len(test_files))
textfiles_savepath = os.path.join(save_path, 'VOC2007', 'ImageSets', 'Main')
make_dir(textfiles_savepath)
traival_files_wr = [x + '\n' for x in trainval_files]
test_files_wr = [x + '\n' for x in test_files]
with open(os.path.join(textfiles_savepath, 'trainval.txt'), 'w') as f:
f.writelines(traival_files_wr)
with open(os.path.join(textfiles_savepath, 'test.txt'), 'w') as f:
f.writelines(test_files_wr)
[1]:语义分割数据集详解(PASCAL-VOC2012,Vocbenchmark,Cityscapes)
[2]:深度学习数据集介绍及相互转换
[3]:Cityscapes数据集介绍
[4]:Cityscapes使用方法
[5]:cityscapes官网下载地址
[6]:cityscapes-to-voc