def __init__(self, root, list_path, max_iters=None, crop_size=(321, 321), mean=(128, 128, 128), scale=True, mirror=True, ignore_label=255):
self.root = root
self.list_path = list_path
self.crop_size = crop_size
self.scale = scale
self.ignore_label = ignore_label
self.mean = mean
self.is_mirror = mirror
# self.mean_bgr = np.array([104.00698793, 116.66876762, 122.67891434])
self.img_ids = [i_id.strip() for i_id in open(list_path)]
if not max_iters==None:
self.img_ids = self.img_ids * int(np.ceil(float(max_iters) / len(self.img_ids)))
self.files = []
self.id_to_trainid = {
7: 0, 8: 1, 11: 2, 12: 3, 13: 4, 17: 5,
19: 6, 20: 7, 21: 8, 22: 9, 23: 10, 24: 11, 25: 12,
26: 13, 27: 14, 28: 15, 31: 16, 32: 17, 33: 18}
# for split in ["train", "trainval", "val"]:
for name in self.img_ids:
img_file = osp.join(self.root, "images/%s" % name)
label_file = osp.join(self.root, "labels/%s" % name)
self.files.append({
"img": img_file,
"label": label_file,
"name": name
})
这里主要是写一下属性,
同时确定最大迭代次数——通过将最大迭代次数除以图片个数后的值的整型与图片个数相乘,得到
self.id_to_trainid:感觉像是映射表,将两个域的标签统一
for name in self.img_ids:
img_file = osp.join(self.root, "images/%s" % name)
label_file = osp.join(self.root, "labels/%s" % name)
self.files.append({
"img": img_file,
"label": label_file,
"name": name
})
在这里将索引放入列表的字典里看下:
files = []
image = [1,2,3]
lable1 = 2
name1 = 3
a = 1
files.append({
"img" : image,
"lable" : lable1,
"name" : name1})
files.append({
"a" : a
})
b = files[1]
print(files)
print(b)
结果
》》[{
'img': [1, 2, 3], 'lable': 2, 'name': 3}, {
'a': 1}]
》》{
'a': 1}
def __getitem__(self, index):
datafiles = self.files[index]
image = Image.open(datafiles["img"]).convert('RGB')
label = Image.open(datafiles["label"])
name = datafiles["name"]
# resize
image = image.resize(self.crop_size, Image.BICUBIC)
label = label.resize(self.crop_size, Image.NEAREST)
image = np.asarray(image, np.float32)
label = np.asarray(label, np.float32) #转换为32位的np数组
# re-assign labels to match the format of Cityscapes
label_copy = 255 * np.ones(label.shape, dtype=np.float32)
#建立一个全是255的掩膜
for k, v in self.id_to_trainid.items(): #item——遍历字典 键-值
label_copy[label == k] = v #np数组,将对应位置标签转换成目标域
size = image.shape
image = image[:, :, ::-1] # change to BGR
image -= self.mean
image = image.transpose((2, 0, 1))
return image.copy(), label_copy.copy(), np.array(size), name
Image.BICUBIC:三次样条插值,是resize方法,将图像缩放为321x321
Image.NEAREST :低质量,是resize方法,将图像缩放为321x321
介绍下转置操作transpose
本文链接:https://blog.csdn.net/xiuxin121/article/details/79082548
arr = np.arange(16).reshape((2, 2, 4))
arr的array是这样的
array([[[ 0, 1, 2, 3],
[ 4, 5, 6, 7]],
[[ 8, 9, 10, 11],
[12, 13, 14, 15]]])
我们对arr进行transpose转置,arr2 = arr.transpose((1,0,2)),结果是这样:
array([[[ 0, 1, 2, 3],
[ 8, 9, 10, 11]],
[[ 4, 5, 6, 7],
[12, 13, 14, 15]]])
这是怎么来的呢。
arr.transpose((1,0,2))的1,0,2三个数分别代表shape()的三个数的顺序,初始的shape是(2,2,4),也就是2维的2 x 4矩阵,索引分别是shape的[0],[1],[2],arr.transpose((1,0,2))之后,我们的索引就变成了shape[1][0][2],对应shape值是shape(2,2,4),所以矩阵形状不变。
与此同时,我们矩阵的索引也发生了类似变化,如arr中的4,索引是arr[0,1,0],arr中的5是arr[0,1,1],变成arr2后,4的位置应该是在[1,0,0],5的位置变成[1,0,1],同理8的索引从[1,0,0]变成[0,1,0]。
这里正对opencv和caffe图像格式转化说明一下:
caffe存储图片使用的是:C×H×W
opencv存储图片使用的是:H×W×C
假设通过opencv处理的图像的索引为image([0],[1],[2]).故转化成caffe的格式为image([2],[0],[1]).
使用的函数便是image.transpose(2,0,1)
————————————————
版权声明:本文为CSDN博主「xiuxin121」的原创文章,遵循 CC 4.0 BY-SA 版权协议,转载请附上原文出处链接及本声明。
原文链接:https://blog.csdn.net/xiuxin121/article/details/79082548
不过这里有问题。这里的图片展示是每个batch展示首张图片呢?还是整个数据集的首张图片进行展示?希望大佬能指点下!
if __name__ == '__main__':
dst = GTA5DataSet("./data",is_transform=True )
trainloader = data.DataLoader(dst, batch_size=4)
for i, data in enumerate(trainloader):
imgs, labels = data
if i == 0: #这里怎么展示8888看这里!嘿嘿!
img = torchvision.utils.make_grid(imgs).numpy()
img = np.transpose(img, (1, 2, 0))
img = img[:, :, ::-1]
plt.imshow(img)
plt.show()