Semantic Drone Dataset数据集下载地址
该语义无人机数据集专注于城市场景的语义理解,以提高无人机自主飞行和着陆程序的安全性。该图像描绘了在距地面 5 至 30 米的高度从最低点(鸟瞰)视角拍摄的 20 多座房屋。高分辨率相机用于获取尺寸为 6000x4000px (24Mpx) 的图像。训练集包含 400 张公开可用的图像,测试集包含 200 张私有图像。
该语义分割数据集包括20个种类:
由于该数据集未提供训练集使用的掩模图,因此需要自己根据RGB值去还原掩膜图。可以通过以下代码转换成掩膜图或者给掩模图上色。
'''
colorTransformer.py
'''
import numpy as np
class ColorTransformer:
def __init__(self):
# color table.
self.clr_tab = self.createColorTable()
print(self.clr_tab)
print(self.clr_tab.keys())
# id table.
id_tab = {}
for k, v in self.clr_tab.items():
id_tab[k] = self.clr2id(v)
self.id_tab = id_tab
print(self.id_tab)
def createColorTable(self):
clr_tab = {}
clr_tab['unlabeled'] = [0, 0, 0]
clr_tab['paved-area'] = [128, 64, 128]
clr_tab['dirt'] = [130, 76, 0]
clr_tab['grass'] = [0, 102, 0]
clr_tab['gravel'] = [112, 103, 87]
clr_tab['water'] = [28, 42, 168]
clr_tab['rocks'] = [48, 41, 30]
clr_tab['pool'] = [0, 50, 89]
clr_tab['vegetation'] = [107, 142, 35]
clr_tab['roof'] = [70, 70, 70]
clr_tab['wall'] = [102, 102, 156]
clr_tab['window'] = [254, 228, 12]
clr_tab['door'] = [254, 148, 12]
clr_tab['fence'] = [190, 153, 153]
clr_tab['fence-pole'] = [153, 153, 153]
clr_tab['person'] = [255, 22, 96]
clr_tab['dog'] = [102, 51, 0]
clr_tab['car'] = [9, 143, 150]
clr_tab['bicycle'] = [119, 11, 32]
clr_tab['tree'] = [51, 51, 0]
clr_tab['bald-tree'] = [190, 250, 190]
clr_tab['ar-marker'] = [112, 150, 146]
clr_tab['obstacle'] = [2, 135, 115]
clr_tab['conflicting'] = [255, 0, 0]
return clr_tab
def colorTable(self):
return self.clr_tab
def clr2id(self, clr):
return clr[0]+clr[1]*255+clr[2]*255*255
#transform to uint8 integer label
def transform(self,label, dtype=np.int32):
height,width = label.shape[:2]
# default value is index of clutter.
newLabel = np.zeros((height, width), dtype=dtype)
id_label = label.astype(np.uint64)
id_label = id_label[:,:,0]+id_label[:,:,1]*255+id_label[:,:,2]*255*255
for tid,key in enumerate(self.clr_tab.keys()):
val = self.id_tab[key]
mask = (id_label == val)
newLabel[mask] = tid
return newLabel
#transform back to 3 channels uint8 label
def inverse_transform(self, label):
label_img = np.zeros(shape=(label.shape[0], label.shape[1],3),dtype=np.uint8)
values = list(self.clr_tab.values())
for tid,val in enumerate(values):
mask = (label==tid)
label_img[mask] = val
return label_img
'''
prepareTrainIdFiles.py
'''
import os
import os.path as osp
import numpy as np
from colorTransformer import ColorTransformer
from PIL import Image
clrEnc = ColorTransformer()
def prepareTrainIDForDir():
lbl_paths = "training_set/gt/semantic/label_images/"
saveDirPath = "training_set/gt/semantic/label_TrainId/"
for lbl_p in os.listdir(lbl_paths):
print(lbl_p)
lbl_path = lbl_paths + lbl_p
trainId_path = saveDirPath + lbl_p
gt = np.array(Image.open(lbl_path))
trainId = clrEnc.transform(gt, dtype=np.uint8)
Image.fromarray(trainId).save(trainId_path)
if __name__=='__main__':
prepareTrainIDForDir()
需要根据自己存放的路径调整lbl_paths,并且需要提前建立好掩模图像的保存文件夹,根据路径调整saveDirPath变量。
生成的掩模图如下:
如果想从掩膜图片中恢复出label图,可以通过以下代码:
'''
transformertest.py
'''
import os
import os.path as osp
import numpy as np
from colorTransformer import ColorTransformer
from PIL import Image
clrEnc = ColorTransformer()
def prepareTrainIDForDir():
lbl_paths = "598.png"
saveDirPath = "training_set/gt/semantic/label_TrainId/598.png"
gt = np.array(Image.open(saveDirPath))
trainId = clrEnc.inverse_transform(gt)
Image.fromarray(trainId).save(lbl_paths)
if __name__=='__main__':
prepareTrainIDForDir()
使用前需要修改路径,此代码只实现了单张图上色,可以根据需要改成批量处理的代码。如根据上述掩码图重新生成标签图,得到如下图像,与ground truth 没有差别。
'''
dataset.py
'''
# camera-ready
import torch
import torch.utils.data
import numpy as np
import cv2
import os
class DatasetTrain(torch.utils.data.Dataset):
def __init__(self, base_dir):
self.base_dir = base_dir
self.img_dir = base_dir + "images/"
self.label_dir = base_dir + "gt/semantic/label_TrainId/"
self.new_img_h = 512
self.new_img_w = 1024
self.examples = []
train_img_dir_path = self.img_dir
label_img__dir_path = self.label_dir
file_names = os.listdir(train_img_dir_path)
for file_name in file_names:
img_path = train_img_dir_path + file_name
label_img_path = label_img__dir_path + file_name.split(".jpg")[0] + ".png"
example = {}
example["img_path"] = img_path
example["label_img_path"] = label_img_path
self.examples.append(example)
self.num_examples = len(self.examples)
def __getitem__(self, index):
example = self.examples[index]
img_path = example["img_path"]
# print(img_path)
img = cv2.imread(img_path, -1)
img = cv2.resize(img, (self.new_img_w, self.new_img_h),
interpolation=cv2.INTER_NEAREST)
label_img_path = example["label_img_path"]
# print(label_img_path)
label_img = cv2.imread(label_img_path, cv2.IMREAD_GRAYSCALE)
label_img = cv2.resize(label_img, (self.new_img_w, self.new_img_h),
interpolation=cv2.INTER_NEAREST)
# normalize the img (with the mean and std for the pretrained ResNet):
img = img/255.0
img = img - np.array([0.485, 0.456, 0.406])
img = img/np.array([0.229, 0.224, 0.225])
img = np.transpose(img, (2, 0, 1))
img = img.astype(np.float32)
# convert numpy -> torch:
img = torch.from_numpy(img)
label_img = torch.from_numpy(label_img)
return (img, label_img)
def __len__(self):
return self.num_examples
if __name__ == "__main__":
base_dir = "training_set/"
train_dataset = DatasetTrain(base_dir = base_dir)
train_loader = torch.utils.data.DataLoader(dataset=train_dataset,
batch_size=32, shuffle=True,
num_workers=1,drop_last=True)
from torch.autograd import Variable
for step, (imgs, label_imgs) in enumerate(train_loader):
imgs = Variable(imgs).cuda() # (shape: (batch_size, 3, img_h, img_w))
# print(imgs.shape)
label_imgs = Variable(label_imgs.type(torch.LongTensor)).cuda() # (shape: (batch_size, img_h, img_w))
# print(label_imgs.shape)
需要根据自己的路径设置base_dir变量。