1. SUN label.npy to tensor
BiSeNet 在 CamVid 数据集上输入的 label 是 RGB 图片,就是说每一个类都已经用对应的 R,G,B 替换了
然后在模型训练阶段,对于每个像素点,采用的是 one-hot 格式,一个 batch (5, 38, 480, 640) 是我们需要的格式
而 SUN 数据集的 label 是 npy 格式,每个文件 np.load 后是 (480,640)矩阵每个值 [0, 37]
所以需要先转化成 one-hot 格式
for i, sample in enumerate(train_loader):
image = sample['image'].cuda()
label = sample['label'].long().unsqueeze(3) # 每个数据单独成1个list (1, 530, 730) -> [1, 530, 730, 1]
label = torch.zeros(args.batch_size, image_h, image_w, args.num_classes).scatter_(3, label, 1) # 转成 one-hot
label = label.transpose(1, 3).transpose(2, 3) # 维度交换
label = label.cuda()
- 完整测试 npy → torch tensor
import numpy as np
import torch
np.set_printoptions(threshold=np.nan, linewidth=10000)
label = np.load('label.npy').astype('int') # [0,37]
print(label.shape) # (530, 730)
num_class = int(np.max(label) - np.min(label) + 1)
print(num_class) # 38
# print(label[0])
print(label[200])
# see some class value
print(label[200][0])
print(label[200][48])
print(label[200][240])
# (530, 730) -> # (1, 530, 730)
label = label[np.newaxis, :, :] # add new dim in any dim
print(label.shape)
# (1, 530, 730) -> [1, 530, 730, 1]
label = torch.LongTensor(label).unsqueeze(3)
print(label.shape)
# [1, 530, 730, 1] -> [1, 530, 730, 38]
label = torch.zeros(label.shape[0], label.shape[1], label.shape[2], num_class).scatter_(3, label, 1).long()
print(label.shape)
# see the class value -> one-hot tensor
print(label[0][200][0])
print(label[0][200][48])
print(label[0][200][240])
# [1, 530, 730, 38] -> [1, 38, 530, 730]
label = label.transpose(1, 3).transpose(2, 3)
print(label.shape)
- 关于 one-hot 向量的生成
label = torch.zeros(2, 2, 3, num_class).scatter_(3, label, 1)
import numpy as np
import torch
label = np.array([
[[1, 2, 3],
[4, 5, 6]],
[[1, 2, 3],
[4, 5, 6]]
])
label = torch.LongTensor(label)
print(label.shape)
label = torch.LongTensor(label).unsqueeze(3) # add dim
print(label.shape)
num_class = 7
label = torch.zeros(2, 2, 3, num_class).scatter_(3, label, 1)
print(label)
print(label.shape) # ([2, 2, 3, 7])
label = label.transpose(1, 3).transpose(2, 3)
print(label.shape)
2. 将 SUN label 写成 CamVid class_dict.csv 格式文件
write_csv.py
import csv
label_colors = [(0, 0, 0), # 0=background
(148, 65, 137), (255, 116, 69), (86, 156, 137), (202, 179, 158), (155, 99, 235),
(161, 107, 108), (133, 160, 103), (76, 152, 126), (84, 62, 35), (44, 80, 130),
(31, 184, 157), (101, 144, 77), (23, 197, 62), (141, 168, 145), (142, 151, 136),
(115, 201, 77), (100, 216, 255), (57, 156, 36), (88, 108, 129), (105, 129, 112),
(42, 137, 126), (155, 108, 249), (166, 148, 143), (81, 91, 87), (100, 124, 51),
(73, 131, 121), (157, 210, 220), (134, 181, 60), (221, 223, 147), (123, 108, 131),
(161, 66, 179), (163, 221, 160), (31, 146, 98), (99, 121, 30), (49, 89, 240),
(116, 108, 9), (161, 176, 169), (80, 29, 135), (177, 105, 197), (139, 110, 246)]
label_names = ['background',
'wall', 'floor', 'cabinet', 'bed', 'chair',
'sofa', 'table', 'door', 'window', 'bookshelf',
'picture', 'counter', 'blinds', 'desk', 'shelves',
'curtain', 'dresser', 'pillow', 'mirror', 'floor_mat',
'clothes', 'ceiling', 'books', 'fridge', 'tv',
'paper', 'towel', 'shower_curtain', 'box', 'whiteboard',
'person', 'night_stand', 'toilet', 'sink', 'lamp',
'bathtub', 'bag', '38', '39', '40']
f = open('seg37_class_dict.csv', 'w', newline='')
writer = csv.writer(f)
writer.writerow(['name', 'r', 'g', 'b'])
for i in range(1, 38):
row = list(label_colors[i])
row.insert(0, label_names[i])
writer.writerow(row)
f.close()
3. 将 SUN 数据集的图片 制作成 CamVid 格式
在已经有了图片路径的情况下,只需要 shutil.copyfile
就行了,其中
- train:5285
- test:5050
make_sun.py
import numpy as np
import shutil
import cv2
# train
img_dir_train_file = '../data/img_dir_train.txt'
depth_dir_train_file = '../data/depth_dir_train.txt'
label_dir_train_file = '../data/label_train.txt'
# test
img_dir_test_file = '../data/img_dir_test.txt'
depth_dir_test_file = '../data/depth_dir_test.txt'
label_dir_test_file = '../data/label_test.txt'
with open(img_dir_train_file, 'r') as f:
img_dir_train = f.read().splitlines()
with open(depth_dir_train_file, 'r') as f:
depth_dir_train = f.read().splitlines()
with open(label_dir_train_file, 'r') as f:
label_dir_train = f.read().splitlines()
with open(img_dir_test_file, 'r') as f:
img_dir_test = f.read().splitlines()
with open(depth_dir_test_file, 'r') as f:
depth_dir_test = f.read().splitlines()
with open(label_dir_test_file, 'r') as f:
label_dir_test = f.read().splitlines()
# out train path
train_img_path = '/temp_disk/xs/sun/train/image/'
train_depth_path = '/temp_disk/xs/sun/train/depth/'
train_label_path = '/temp_disk/xs/sun/train/label/'
# out test path
test_img_path = '/temp_disk/xs/sun/test/image/'
test_depth_path = '/temp_disk/xs/sun/test/depth/'
test_label_path = '/temp_disk/xs/sun/test/label/'
for i in range(len(img_dir_train)):
shutil.copyfile(src=img_dir_train[i], dst=train_img_path + str(i) + '.jpg')
shutil.copyfile(src=depth_dir_train[i], dst=train_depth_path + str(i) + '.png')
shutil.copyfile(src=label_dir_train[i], dst=train_label_path + str(i) + '.npy')
print(i)
for i in range(len(img_dir_test)):
shutil.copyfile(src=img_dir_test[i], dst=test_img_path + str(i) + '.jpg')
shutil.copyfile(src=depth_dir_test[i], dst=test_depth_path + str(i) + '.png')
shutil.copyfile(src=label_dir_test[i], dst=test_label_path + str(i) + '.npy')
print(i)
测试,从新保存的路径下,随机取图片展示
test_sun.py
import numpy as np
import matplotlib.pyplot as plt
import cv2
label_colors = [(0, 0, 0), # 0=background
(148, 65, 137), (255, 116, 69), (86, 156, 137), (202, 179, 158), (155, 99, 235),
(161, 107, 108), (133, 160, 103), (76, 152, 126), (84, 62, 35), (44, 80, 130),
(31, 184, 157), (101, 144, 77), (23, 197, 62), (141, 168, 145), (142, 151, 136),
(115, 201, 77), (100, 216, 255), (57, 156, 36), (88, 108, 129), (105, 129, 112),
(42, 137, 126), (155, 108, 249), (166, 148, 143), (81, 91, 87), (100, 124, 51),
(73, 131, 121), (157, 210, 220), (134, 181, 60), (221, 223, 147), (123, 108, 131),
(161, 66, 179), (163, 221, 160), (31, 146, 98), (99, 121, 30), (49, 89, 240),
(116, 108, 9), (161, 176, 169), (80, 29, 135), (177, 105, 197), (139, 110, 246)]
label_names = ['background',
'wall', 'floor', 'cabinet', 'bed', 'chair',
'sofa', 'table', 'door', 'window', 'bookshelf',
'picture', 'counter', 'blinds', 'desk', 'shelves',
'curtain', 'dresser', 'pillow', 'mirror', 'floor_mat',
'clothes', 'ceiling', 'books', 'fridge', 'tv',
'paper', 'towel', 'shower_curtain', 'box', 'whiteboard',
'person', 'night_stand', 'toilet', 'sink', 'lamp',
'bathtub', 'bag', '38', '39', '40']
# out train path
train_img_path = '/temp_disk/xs/sun/train/image/'
train_depth_path = '/temp_disk/xs/sun/train/depth/'
train_label_path = '/temp_disk/xs/sun/train/label/'
# out test path
test_img_path = '/temp_disk/xs/sun/test/image/'
test_depth_path = '/temp_disk/xs/sun/test/depth/'
test_label_path = '/temp_disk/xs/sun/test/label/'
num_train = 5285
num_test = 5050
def random_image(test=False):
if test:
idx = np.random.randint(0, num_test)
img_path = test_img_path
depth_path = test_depth_path
label_path = test_label_path
else:
idx = np.random.randint(0, num_train)
img_path = train_img_path
depth_path = train_depth_path
label_path = train_label_path
print(idx)
idx = 2059
img = cv2.imread(img_path + str(idx) + '.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
plt.show()
depth = cv2.imread(depth_path + str(idx) + '.png')
plt.imshow(depth)
plt.show()
label = np.load(label_path + str(idx) + '.npy').astype('int')
class_set = set(label.flatten().tolist())
for i in class_set:
print('%2d %s' % (i, label_names[i]))
colored_label = np.vectorize(lambda x: label_colors[int(x)]) # define a function, class[0-37] maps color
colored = np.asarray(colored_label(label)).squeeze().astype('uint8')
colored = colored.transpose((1, 2, 0)) # RGB
plt.imshow(colored)
plt.show()
colored = cv2.cvtColor(colored, cv2.COLOR_RGB2BGR)
cv2.imwrite('test1.png', colored)
if __name__ == '__main__':
random_image(test=True)
后来又将 label npy 文件转化成了 img 重新保存,更直观
这样做了之后,文件夹的名字也改了
- label.npy 对应 label_npy
- label.png 对应 label
colored_label = np.vectorize(lambda x: label_colors[int(x)]) # 将 RGB 映射到 class_idx
label_colors = [(0, 0, 0), # 0=background
(148, 65, 137), (255, 116, 69), (86, 156, 137), (202, 179, 158), (155, 99, 235),
(161, 107, 108), (133, 160, 103), (76, 152, 126), (84, 62, 35), (44, 80, 130),
(31, 184, 157), (101, 144, 77), (23, 197, 62), (141, 168, 145), (142, 151, 136),
(115, 201, 77), (100, 216, 255), (57, 156, 36), (88, 108, 129), (105, 129, 112),
(42, 137, 126), (155, 108, 249), (166, 148, 143), (81, 91, 87), (100, 124, 51),
(73, 131, 121), (157, 210, 220), (134, 181, 60), (221, 223, 147), (123, 108, 131),
(161, 66, 179), (163, 221, 160), (31, 146, 98), (99, 121, 30), (49, 89, 240),
(116, 108, 9), (161, 176, 169), (80, 29, 135), (177, 105, 197), (139, 110, 246)]
colored_label = np.vectorize(lambda x: label_colors[int(x)]) # define a function, class[0-37] maps color
for i in range(len(label_dir_train)):
label = np.load(train_label_path + str(i) + '.npy').astype('int')
colored = np.asarray(colored_label(label)).squeeze().astype('uint8')
colored = colored.transpose((1, 2, 0)) # RGB
colored = cv2.cvtColor(colored, cv2.COLOR_RGB2BGR)
cv2.imwrite(train_label_img_path + str(i) + '.png', colored)
print(i)
for i in range(len(label_dir_test)):
label = np.load(test_label_path + str(i) + '.npy').astype('int')
colored = np.asarray(colored_label(label)).squeeze().astype('uint8')
colored = colored.transpose((1, 2, 0)) # RGB
colored = cv2.cvtColor(colored, cv2.COLOR_RGB2BGR)
cv2.imwrite(test_label_img_path + str(i) + '.png', colored)
print(i)
4. 传统的 validset 制作方式
在机器学习算法中,我们常常将原始数据集分为三部分:training data、validation data、testing data。
- training data:计算梯度更新权重
- validation data:避免过拟合,在 public set 上效果好,而在 private set 上效果差
- testing data:给出一个 accuracy 以判断网络的好坏
在训练过程中,通常用 validation data 来确定一些超参数,比如根据 validation data 上的 accuracy 来确定 early stopping 的 epoch 大小、根据 validation data 确定learning rate 等等。
那为啥不直接在 testing data 上做这些呢?因为如果在 testing data 做这些,那么随着训练的进行,我们的网络实际上就是在一点一点地overfitting 我们的 testing data,导致最后得到的 testing accuracy 没有任何参考意义。
根据 validation 上的 accuracy 选择模型,然后在 test 上验证准确度。
传统制作方式:给定 train, test 都是 label 好的,然后设定一个比例,比如 0.6 from trainset
make_val.py
import numpy as np
import shutil
import os
"""
random create num_val imgs with train ratio
"""
# train path
train_img_path = '/temp_disk/xs/sun/train/image/'
train_depth_path = '/temp_disk/xs/sun/train/depth/'
train_label_path = '/temp_disk/xs/sun/train/label/'
train_label_npy_path = '/temp_disk/xs/sun/train/label_npy/'
# test path
test_img_path = '/temp_disk/xs/sun/test/image/'
test_depth_path = '/temp_disk/xs/sun/test/depth/'
test_label_path = '/temp_disk/xs/sun/test/label/'
test_label_npy_path = '/temp_disk/xs/sun/test/label_npy/'
# val path
val_path = '/temp_disk/xs/sun/val'
val_img_path = '/temp_disk/xs/sun/val/image/'
val_depth_path = '/temp_disk/xs/sun/val/depth/'
val_label_path = '/temp_disk/xs/sun/val/label/'
val_label_npy_path = '/temp_disk/xs/sun/val/label_npy/'
if os.path.exists(val_path):
shutil.rmtree(val_path)
os.mkdir(val_path)
os.mkdir(val_img_path)
os.mkdir(val_depth_path)
os.mkdir(val_label_path)
os.mkdir(val_label_npy_path)
num_val = 1000
ratio = 0.6
num_val_from_train = int(num_val * ratio)
num_train = 5285
num_test = 5050
train_idx = []
test_idx = []
print('train')
for i in range(num_val_from_train):
idx = np.random.randint(0, num_train)
while idx in train_idx: # create new
idx = np.random.randint(0, num_train)
train_idx.append(idx)
shutil.copyfile(src=train_img_path + str(idx) + '.jpg', dst=val_img_path + str(idx) + '.jpg')
shutil.copyfile(src=train_depth_path + str(idx) + '.png', dst=val_depth_path + str(idx) + '.png')
shutil.copyfile(src=train_label_path + str(idx) + '.png', dst=val_label_path + str(idx) + '.png')
shutil.copyfile(src=train_label_npy_path + str(idx) + '.npy', dst=val_label_npy_path + str(idx) + '.npy')
print(i, idx)
print('test')
for i in range(num_val_from_train, num_val):
idx = np.random.randint(0, num_test)
while idx in test_idx: # create new
idx = np.random.randint(0, num_test)
test_idx.append(idx)
shutil.copyfile(src=test_img_path + str(idx) + '.jpg', dst=val_img_path + str(idx) + 't.jpg')
shutil.copyfile(src=test_depth_path + str(idx) + '.png', dst=val_depth_path + str(idx) + 't.png')
shutil.copyfile(src=test_label_path + str(idx) + '.png', dst=val_label_path + str(idx) + 't.png')
shutil.copyfile(src=test_label_npy_path + str(idx) + '.npy', dst=val_label_npy_path + str(idx) + 't.npy')
print(i, idx)
后来想扩大 trainset,就把 test 中已经标记好的图片也都移了过去
mv_test2_train.py
import shutil
# train path
train_img_path = '/temp_disk/xs/sun/train/image/'
train_depth_path = '/temp_disk/xs/sun/train/depth/'
train_label_path = '/temp_disk/xs/sun/train/label/'
train_label_npy_path = '/temp_disk/xs/sun/train/label_npy/'
# test path
test_img_path = '/temp_disk/xs/sun/test/image/'
test_depth_path = '/temp_disk/xs/sun/test/depth/'
test_label_path = '/temp_disk/xs/sun/test/label/'
test_label_npy_path = '/temp_disk/xs/sun/test/label_npy/'
num_train = 5285
num_test = 5050
for i in range(num_test):
shutil.move(src=test_img_path + str(i) + '.jpg', dst=train_img_path + str(num_train + i) + '.jpg')
shutil.move(src=test_depth_path + str(i) + '.png', dst=train_depth_path + str(num_train + i) + '.png')
shutil.move(src=test_label_path + str(i) + '.png', dst=train_label_path + str(num_train + i) + '.png')
shutil.move(src=test_label_npy_path + str(i) + '.npy', dst=train_label_npy_path + str(num_train + i) + '.npy')
print(num_train + i)
但这样就得重新制作 validset 了。
5. 每次验证模型时 随机从 trainset 取出 num_val 张图片作为 validset
mk_random_val.py
import numpy as np
import shutil
import os
"""
randomly get img from train set to make val set
"""
def mk_random_val(num_val):
# train path
train_img_path = '/temp_disk/xs/sun/train/image/'
# train_depth_path = '/temp_disk/xs/sun/train/depth/'
train_label_path = '/temp_disk/xs/sun/train/label/'
# train_label_npy_path = '/temp_disk/xs/sun/train/label_npy/'
# val path
val_path = '/temp_disk/xs/sun/val'
val_img_path = '/temp_disk/xs/sun/val/image/'
# val_depth_path = '/temp_disk/xs/sun/val/depth/'
val_label_path = '/temp_disk/xs/sun/val/label/'
# val_label_npy_path = '/temp_disk/xs/sun/val/label_npy/'
if os.path.exists(val_path):
shutil.rmtree(val_path)
os.mkdir(val_path)
os.mkdir(val_img_path)
# os.mkdir(val_depth_path)
os.mkdir(val_label_path)
# os.mkdir(val_label_npy_path)
num_train = 10335
train_idx = []
for i in range(num_val):
idx = np.random.randint(0, num_train)
while idx in train_idx: # create new
idx = np.random.randint(0, num_train)
train_idx.append(idx)
shutil.copyfile(src=train_img_path + str(idx) + '.jpg', dst=val_img_path + str(idx) + '.jpg')
# shutil.copyfile(src=train_depth_path + str(idx) + '.png', dst=val_depth_path + str(idx) + '.png')
shutil.copyfile(src=train_label_path + str(idx) + '.png', dst=val_label_path + str(idx) + '.png')
# shutil.copyfile(src=train_label_npy_path + str(idx) + '.npy', dst=val_label_npy_path + str(idx) + '.npy')
# print(i, idx)
6. 其他功能
- 下载的 CamVid Seq1 里面有原始图片和添加了后缀 _L 的分割 label 图,分别拼接成 avi 视频
import cv2
import os
img_dir = r'C:\Users\Shuai\Desktop\CamSeq01'
video_w, video_h = 960, 720
fourcc = cv2.VideoWriter_fourcc(*'XVID')
vw1 = cv2.VideoWriter('cam.avi', fourcc, 20, (video_w, video_h))
vw2 = cv2.VideoWriter('cam_label.avi', fourcc, 20, (video_w, video_h))
for img_name in os.listdir(img_dir): # 有序的
img = cv2.imread(os.path.join(img_dir, img_name))
if '_L' in img_name: # label
vw2.write(img)
else: # ori img
vw1.write(img)
print(img_name)
vw1.release()
vw2.release()
- cv2 读取的图片用 plt 显示,要先进行 BGR2RGB
import cv2
import matplotlib.pyplot as plt
with open('img_path.txt', 'r') as f: # 多张图片
img_path = f.read().splitlines()
for p in img_path:
img = cv2.imread(p)
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
plt.imshow(img)
plt.show()
print(p)
- 制作测试集,将SUNRGBDTest 图片移到同一位置,并按数目重命名,目录结构如下:
import glob
import os
"""
1. 创建 txt 图片路径 文件
"""
f_img = open('test_img.txt', 'w')
f_depth = open('test_depth.txt', 'w')
cnt = 0
for img_path in glob.glob(os.path.join(r'E:\Dataset\SUN-RGBD\SUNRGBDLSUNTest\SUNRGBDv2Test\*\*\image\*.jpg')): # 谨慎 glob 是无序的
depth_path = img_path.replace('image', 'depth').replace('jpg', 'png')
f_img.write(img_path + '\r') # 写入1个换行
f_depth.write(depth_path + '\r')
print(cnt, img_path)
print(cnt, depth_path)
cnt += 1
"""
2. 移动图片,使用 shutil copyfile 即可
"""