BiSeNet on Sun-RGBD utils

1. SUN label.npy to tensor

BiSeNet 在 CamVid 数据集上输入的 label 是 RGB 图片，就是说每一个类都已经用对应的 R,G,B 替换了
然后在模型训练阶段，对于每个像素点，采用的是 one-hot 格式，一个 batch (5, 38, 480, 640) 是我们需要的格式

而 SUN 数据集的 label 是 npy 格式，每个文件 np.load 后是（480,640）矩阵每个值 [0, 37]
所以需要先转化成 one-hot 格式

for i, sample in enumerate(train_loader):
    image = sample['image'].cuda()
    label = sample['label'].long().unsqueeze(3)  # 每个数据单独成1个list (1, 530, 730) -> [1, 530, 730, 1]
    label = torch.zeros(args.batch_size, image_h, image_w, args.num_classes).scatter_(3, label, 1)  # 转成 one-hot
    label = label.transpose(1, 3).transpose(2, 3) # 维度交换
    label = label.cuda()

完整测试 npy → torch tensor

import numpy as np
import torch

np.set_printoptions(threshold=np.nan, linewidth=10000)

label = np.load('label.npy').astype('int')  # [0,37]

print(label.shape)  # (530, 730)

num_class = int(np.max(label) - np.min(label) + 1)
print(num_class)  # 38

# print(label[0])
print(label[200])
# see some class value
print(label[200][0])
print(label[200][48])
print(label[200][240])

# (530, 730) -> # (1, 530, 730)
label = label[np.newaxis, :, :]  # add new dim in any dim
print(label.shape)

# (1, 530, 730) -> [1, 530, 730, 1]
label = torch.LongTensor(label).unsqueeze(3)
print(label.shape)

# [1, 530, 730, 1] -> [1, 530, 730, 38]
label = torch.zeros(label.shape[0], label.shape[1], label.shape[2], num_class).scatter_(3, label, 1).long()
print(label.shape)

# see the class value -> one-hot tensor
print(label[0][200][0])
print(label[0][200][48])
print(label[0][200][240])

# [1, 530, 730, 38] -> [1, 38, 530, 730]
label = label.transpose(1, 3).transpose(2, 3)

print(label.shape)

关于 one-hot 向量的生成

label = torch.zeros(2, 2, 3, num_class).scatter_(3, label, 1)

import numpy as np
import torch

label = np.array([
    [[1, 2, 3],
     [4, 5, 6]],
    [[1, 2, 3],
     [4, 5, 6]]
])

label = torch.LongTensor(label)
print(label.shape)

label = torch.LongTensor(label).unsqueeze(3)  # add dim
print(label.shape)

num_class = 7

label = torch.zeros(2, 2, 3, num_class).scatter_(3, label, 1)

print(label)
print(label.shape)  # ([2, 2, 3, 7])

label = label.transpose(1, 3).transpose(2, 3)
print(label.shape)

2. 将 SUN label 写成 CamVid class_dict.csv 格式文件

write_csv.py

import csv

label_colors = [(0, 0, 0),  # 0=background
                (148, 65, 137), (255, 116, 69), (86, 156, 137), (202, 179, 158), (155, 99, 235),
                (161, 107, 108), (133, 160, 103), (76, 152, 126), (84, 62, 35), (44, 80, 130),
                (31, 184, 157), (101, 144, 77), (23, 197, 62), (141, 168, 145), (142, 151, 136),
                (115, 201, 77), (100, 216, 255), (57, 156, 36), (88, 108, 129), (105, 129, 112),
                (42, 137, 126), (155, 108, 249), (166, 148, 143), (81, 91, 87), (100, 124, 51),
                (73, 131, 121), (157, 210, 220), (134, 181, 60), (221, 223, 147), (123, 108, 131),
                (161, 66, 179), (163, 221, 160), (31, 146, 98), (99, 121, 30), (49, 89, 240),
                (116, 108, 9), (161, 176, 169), (80, 29, 135), (177, 105, 197), (139, 110, 246)]

label_names = ['background',
               'wall', 'floor', 'cabinet', 'bed', 'chair',
               'sofa', 'table', 'door', 'window', 'bookshelf',
               'picture', 'counter', 'blinds', 'desk', 'shelves',
               'curtain', 'dresser', 'pillow', 'mirror', 'floor_mat',
               'clothes', 'ceiling', 'books', 'fridge', 'tv',
               'paper', 'towel', 'shower_curtain', 'box', 'whiteboard',
               'person', 'night_stand', 'toilet', 'sink', 'lamp',
               'bathtub', 'bag', '38', '39', '40']

f = open('seg37_class_dict.csv', 'w', newline='')
writer = csv.writer(f)

writer.writerow(['name', 'r', 'g', 'b'])

for i in range(1, 38):
    row = list(label_colors[i])
    row.insert(0, label_names[i])
    writer.writerow(row)

f.close()

seg37_class_dict.csv

3. 将 SUN 数据集的图片制作成 CamVid 格式

在已经有了图片路径的情况下，只需要 shutil.copyfile 就行了，其中

train：5285
test：5050

make_sun.py

import numpy as np
import shutil
import cv2

# train
img_dir_train_file = '../data/img_dir_train.txt'
depth_dir_train_file = '../data/depth_dir_train.txt'
label_dir_train_file = '../data/label_train.txt'

# test
img_dir_test_file = '../data/img_dir_test.txt'
depth_dir_test_file = '../data/depth_dir_test.txt'
label_dir_test_file = '../data/label_test.txt'

with open(img_dir_train_file, 'r') as f:
    img_dir_train = f.read().splitlines()
with open(depth_dir_train_file, 'r') as f:
    depth_dir_train = f.read().splitlines()
with open(label_dir_train_file, 'r') as f:
    label_dir_train = f.read().splitlines()

with open(img_dir_test_file, 'r') as f:
    img_dir_test = f.read().splitlines()
with open(depth_dir_test_file, 'r') as f:
    depth_dir_test = f.read().splitlines()
with open(label_dir_test_file, 'r') as f:
    label_dir_test = f.read().splitlines()

# out train path
train_img_path = '/temp_disk/xs/sun/train/image/'
train_depth_path = '/temp_disk/xs/sun/train/depth/'
train_label_path = '/temp_disk/xs/sun/train/label/'

# out test path
test_img_path = '/temp_disk/xs/sun/test/image/'
test_depth_path = '/temp_disk/xs/sun/test/depth/'
test_label_path = '/temp_disk/xs/sun/test/label/'

for i in range(len(img_dir_train)):
    shutil.copyfile(src=img_dir_train[i], dst=train_img_path + str(i) + '.jpg')
    shutil.copyfile(src=depth_dir_train[i], dst=train_depth_path + str(i) + '.png')
    shutil.copyfile(src=label_dir_train[i], dst=train_label_path + str(i) + '.npy')
    print(i)

for i in range(len(img_dir_test)):
    shutil.copyfile(src=img_dir_test[i], dst=test_img_path + str(i) + '.jpg')
    shutil.copyfile(src=depth_dir_test[i], dst=test_depth_path + str(i) + '.png')
    shutil.copyfile(src=label_dir_test[i], dst=test_label_path + str(i) + '.npy')
    print(i)

测试，从新保存的路径下，随机取图片展示

test_sun.py

import numpy as np
import matplotlib.pyplot as plt
import cv2

label_colors = [(0, 0, 0),  # 0=background
                (148, 65, 137), (255, 116, 69), (86, 156, 137), (202, 179, 158), (155, 99, 235),
                (161, 107, 108), (133, 160, 103), (76, 152, 126), (84, 62, 35), (44, 80, 130),
                (31, 184, 157), (101, 144, 77), (23, 197, 62), (141, 168, 145), (142, 151, 136),
                (115, 201, 77), (100, 216, 255), (57, 156, 36), (88, 108, 129), (105, 129, 112),
                (42, 137, 126), (155, 108, 249), (166, 148, 143), (81, 91, 87), (100, 124, 51),
                (73, 131, 121), (157, 210, 220), (134, 181, 60), (221, 223, 147), (123, 108, 131),
                (161, 66, 179), (163, 221, 160), (31, 146, 98), (99, 121, 30), (49, 89, 240),
                (116, 108, 9), (161, 176, 169), (80, 29, 135), (177, 105, 197), (139, 110, 246)]
label_names = ['background',
               'wall', 'floor', 'cabinet', 'bed', 'chair',
               'sofa', 'table', 'door', 'window', 'bookshelf',
               'picture', 'counter', 'blinds', 'desk', 'shelves',
               'curtain', 'dresser', 'pillow', 'mirror', 'floor_mat',
               'clothes', 'ceiling', 'books', 'fridge', 'tv',
               'paper', 'towel', 'shower_curtain', 'box', 'whiteboard',
               'person', 'night_stand', 'toilet', 'sink', 'lamp',
               'bathtub', 'bag', '38', '39', '40']

# out train path
train_img_path = '/temp_disk/xs/sun/train/image/'
train_depth_path = '/temp_disk/xs/sun/train/depth/'
train_label_path = '/temp_disk/xs/sun/train/label/'

# out test path
test_img_path = '/temp_disk/xs/sun/test/image/'
test_depth_path = '/temp_disk/xs/sun/test/depth/'
test_label_path = '/temp_disk/xs/sun/test/label/'

num_train = 5285
num_test = 5050


def random_image(test=False):
    if test:
        idx = np.random.randint(0, num_test)
        img_path = test_img_path
        depth_path = test_depth_path
        label_path = test_label_path
    else:
        idx = np.random.randint(0, num_train)
        img_path = train_img_path
        depth_path = train_depth_path
        label_path = train_label_path
    print(idx)
    idx = 2059

    img = cv2.imread(img_path + str(idx) + '.jpg')
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
    plt.show()

    depth = cv2.imread(depth_path + str(idx) + '.png')
    plt.imshow(depth)
    plt.show()

    label = np.load(label_path + str(idx) + '.npy').astype('int')
    class_set = set(label.flatten().tolist())
    for i in class_set:
        print('%2d %s' % (i, label_names[i]))

    colored_label = np.vectorize(lambda x: label_colors[int(x)])  # define a function, class[0-37] maps color
    colored = np.asarray(colored_label(label)).squeeze().astype('uint8')
    colored = colored.transpose((1, 2, 0))  # RGB
    plt.imshow(colored)
    plt.show()
    colored = cv2.cvtColor(colored, cv2.COLOR_RGB2BGR)
    cv2.imwrite('test1.png', colored)


if __name__ == '__main__':
    random_image(test=True)

后来又将 label npy 文件转化成了 img 重新保存，更直观
这样做了之后，文件夹的名字也改了

label.npy 对应 label_npy
label.png 对应 label

colored_label = np.vectorize(lambda x: label_colors[int(x)])  # 将 RGB 映射到 class_idx

label_colors = [(0, 0, 0),  # 0=background
                (148, 65, 137), (255, 116, 69), (86, 156, 137), (202, 179, 158), (155, 99, 235),
                (161, 107, 108), (133, 160, 103), (76, 152, 126), (84, 62, 35), (44, 80, 130),
                (31, 184, 157), (101, 144, 77), (23, 197, 62), (141, 168, 145), (142, 151, 136),
                (115, 201, 77), (100, 216, 255), (57, 156, 36), (88, 108, 129), (105, 129, 112),
                (42, 137, 126), (155, 108, 249), (166, 148, 143), (81, 91, 87), (100, 124, 51),
                (73, 131, 121), (157, 210, 220), (134, 181, 60), (221, 223, 147), (123, 108, 131),
                (161, 66, 179), (163, 221, 160), (31, 146, 98), (99, 121, 30), (49, 89, 240),
                (116, 108, 9), (161, 176, 169), (80, 29, 135), (177, 105, 197), (139, 110, 246)]

colored_label = np.vectorize(lambda x: label_colors[int(x)])  # define a function, class[0-37] maps color

for i in range(len(label_dir_train)):
    label = np.load(train_label_path + str(i) + '.npy').astype('int')
    colored = np.asarray(colored_label(label)).squeeze().astype('uint8')
    colored = colored.transpose((1, 2, 0))  # RGB
    colored = cv2.cvtColor(colored, cv2.COLOR_RGB2BGR)
    cv2.imwrite(train_label_img_path + str(i) + '.png', colored)
    print(i)

for i in range(len(label_dir_test)):
    label = np.load(test_label_path + str(i) + '.npy').astype('int')
    colored = np.asarray(colored_label(label)).squeeze().astype('uint8')
    colored = colored.transpose((1, 2, 0))  # RGB
    colored = cv2.cvtColor(colored, cv2.COLOR_RGB2BGR)
    cv2.imwrite(test_label_img_path + str(i) + '.png', colored)
    print(i)

4. 传统的 validset 制作方式

在机器学习算法中，我们常常将原始数据集分为三部分：training data、validation data、testing data。

training data：计算梯度更新权重
validation data：避免过拟合，在 public set 上效果好，而在 private set 上效果差
testing data：给出一个 accuracy 以判断网络的好坏

在训练过程中，通常用 validation data 来确定一些超参数，比如根据 validation data 上的 accuracy 来确定 early stopping 的 epoch 大小、根据 validation data 确定learning rate 等等。

那为啥不直接在 testing data 上做这些呢？因为如果在 testing data 做这些，那么随着训练的进行，我们的网络实际上就是在一点一点地overfitting 我们的 testing data，导致最后得到的 testing accuracy 没有任何参考意义。

根据 validation 上的 accuracy 选择模型，然后在 test 上验证准确度。

传统制作方式：给定 train, test 都是 label 好的，然后设定一个比例，比如 0.6 from trainset

make_val.py

import numpy as np
import shutil
import os

"""
random create num_val imgs with train ratio
"""

# train path
train_img_path = '/temp_disk/xs/sun/train/image/'
train_depth_path = '/temp_disk/xs/sun/train/depth/'
train_label_path = '/temp_disk/xs/sun/train/label/'
train_label_npy_path = '/temp_disk/xs/sun/train/label_npy/'

# test path
test_img_path = '/temp_disk/xs/sun/test/image/'
test_depth_path = '/temp_disk/xs/sun/test/depth/'
test_label_path = '/temp_disk/xs/sun/test/label/'
test_label_npy_path = '/temp_disk/xs/sun/test/label_npy/'

# val path
val_path = '/temp_disk/xs/sun/val'
val_img_path = '/temp_disk/xs/sun/val/image/'
val_depth_path = '/temp_disk/xs/sun/val/depth/'
val_label_path = '/temp_disk/xs/sun/val/label/'
val_label_npy_path = '/temp_disk/xs/sun/val/label_npy/'

if os.path.exists(val_path):
    shutil.rmtree(val_path)

os.mkdir(val_path)
os.mkdir(val_img_path)
os.mkdir(val_depth_path)
os.mkdir(val_label_path)
os.mkdir(val_label_npy_path)

num_val = 1000
ratio = 0.6
num_val_from_train = int(num_val * ratio)

num_train = 5285
num_test = 5050

train_idx = []
test_idx = []

print('train')
for i in range(num_val_from_train):
    idx = np.random.randint(0, num_train)
    while idx in train_idx:  # create new
        idx = np.random.randint(0, num_train)
    train_idx.append(idx)
    shutil.copyfile(src=train_img_path + str(idx) + '.jpg', dst=val_img_path + str(idx) + '.jpg')
    shutil.copyfile(src=train_depth_path + str(idx) + '.png', dst=val_depth_path + str(idx) + '.png')
    shutil.copyfile(src=train_label_path + str(idx) + '.png', dst=val_label_path + str(idx) + '.png')
    shutil.copyfile(src=train_label_npy_path + str(idx) + '.npy', dst=val_label_npy_path + str(idx) + '.npy')
    print(i, idx)

print('test')
for i in range(num_val_from_train, num_val):
    idx = np.random.randint(0, num_test)
    while idx in test_idx:  # create new
        idx = np.random.randint(0, num_test)
    test_idx.append(idx)
    shutil.copyfile(src=test_img_path + str(idx) + '.jpg', dst=val_img_path + str(idx) + 't.jpg')
    shutil.copyfile(src=test_depth_path + str(idx) + '.png', dst=val_depth_path + str(idx) + 't.png')
    shutil.copyfile(src=test_label_path + str(idx) + '.png', dst=val_label_path + str(idx) + 't.png')
    shutil.copyfile(src=test_label_npy_path + str(idx) + '.npy', dst=val_label_npy_path + str(idx) + 't.npy')
    print(i, idx)

后来想扩大 trainset，就把 test 中已经标记好的图片也都移了过去

mv_test2_train.py

import shutil

# train path
train_img_path = '/temp_disk/xs/sun/train/image/'
train_depth_path = '/temp_disk/xs/sun/train/depth/'
train_label_path = '/temp_disk/xs/sun/train/label/'
train_label_npy_path = '/temp_disk/xs/sun/train/label_npy/'

# test path
test_img_path = '/temp_disk/xs/sun/test/image/'
test_depth_path = '/temp_disk/xs/sun/test/depth/'
test_label_path = '/temp_disk/xs/sun/test/label/'
test_label_npy_path = '/temp_disk/xs/sun/test/label_npy/'

num_train = 5285
num_test = 5050

for i in range(num_test):
    shutil.move(src=test_img_path + str(i) + '.jpg', dst=train_img_path + str(num_train + i) + '.jpg')
    shutil.move(src=test_depth_path + str(i) + '.png', dst=train_depth_path + str(num_train + i) + '.png')
    shutil.move(src=test_label_path + str(i) + '.png', dst=train_label_path + str(num_train + i) + '.png')
    shutil.move(src=test_label_npy_path + str(i) + '.npy', dst=train_label_npy_path + str(num_train + i) + '.npy')
    print(num_train + i)

但这样就得重新制作 validset 了。

5. 每次验证模型时随机从 trainset 取出 num_val 张图片作为 validset

mk_random_val.py

import numpy as np
import shutil
import os

"""
randomly get img from train set to make val set
"""


def mk_random_val(num_val):
    # train path
    train_img_path = '/temp_disk/xs/sun/train/image/'
    # train_depth_path = '/temp_disk/xs/sun/train/depth/'
    train_label_path = '/temp_disk/xs/sun/train/label/'
    # train_label_npy_path = '/temp_disk/xs/sun/train/label_npy/'

    # val path
    val_path = '/temp_disk/xs/sun/val'
    val_img_path = '/temp_disk/xs/sun/val/image/'
    # val_depth_path = '/temp_disk/xs/sun/val/depth/'
    val_label_path = '/temp_disk/xs/sun/val/label/'
    # val_label_npy_path = '/temp_disk/xs/sun/val/label_npy/'

    if os.path.exists(val_path):
        shutil.rmtree(val_path)

    os.mkdir(val_path)
    os.mkdir(val_img_path)
    # os.mkdir(val_depth_path)
    os.mkdir(val_label_path)
    # os.mkdir(val_label_npy_path)

    num_train = 10335

    train_idx = []

    for i in range(num_val):
        idx = np.random.randint(0, num_train)
        while idx in train_idx:  # create new
            idx = np.random.randint(0, num_train)
        train_idx.append(idx)
        shutil.copyfile(src=train_img_path + str(idx) + '.jpg', dst=val_img_path + str(idx) + '.jpg')
        # shutil.copyfile(src=train_depth_path + str(idx) + '.png', dst=val_depth_path + str(idx) + '.png')
        shutil.copyfile(src=train_label_path + str(idx) + '.png', dst=val_label_path + str(idx) + '.png')
        # shutil.copyfile(src=train_label_npy_path + str(idx) + '.npy', dst=val_label_npy_path + str(idx) + '.npy')
        # print(i, idx)

6. 其他功能

下载的 CamVid Seq1 里面有原始图片和添加了后缀 _L 的分割 label 图，分别拼接成 avi 视频

import cv2
import os

img_dir = r'C:\Users\Shuai\Desktop\CamSeq01'

video_w, video_h = 960, 720

fourcc = cv2.VideoWriter_fourcc(*'XVID')
vw1 = cv2.VideoWriter('cam.avi', fourcc, 20, (video_w, video_h))
vw2 = cv2.VideoWriter('cam_label.avi', fourcc, 20, (video_w, video_h))

for img_name in os.listdir(img_dir):  # 有序的
    img = cv2.imread(os.path.join(img_dir, img_name))
    if '_L' in img_name:  # label
        vw2.write(img)
    else:  # ori img
        vw1.write(img)
    print(img_name)

vw1.release()
vw2.release()

cv2 读取的图片用 plt 显示，要先进行 BGR2RGB

import cv2
import matplotlib.pyplot as plt

with open('img_path.txt', 'r') as f:  # 多张图片
    img_path = f.read().splitlines()

for p in img_path:
    img = cv2.imread(p)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    plt.imshow(img)
    plt.show()
    print(p)

制作测试集，将SUNRGBDTest 图片移到同一位置，并按数目重命名，目录结构如下：

目录结构

import glob
import os

"""
1. 创建 txt 图片路径 文件
"""
f_img = open('test_img.txt', 'w')
f_depth = open('test_depth.txt', 'w')

cnt = 0
for img_path in glob.glob(os.path.join(r'E:\Dataset\SUN-RGBD\SUNRGBDLSUNTest\SUNRGBDv2Test\*\*\image\*.jpg')):  # 谨慎 glob 是无序的
    depth_path = img_path.replace('image', 'depth').replace('jpg', 'png')
    f_img.write(img_path + '\r')  # 写入1个换行
    f_depth.write(depth_path + '\r')
    print(cnt, img_path)
    print(cnt, depth_path)
    cnt += 1

"""
2. 移动图片，使用 shutil copyfile 即可
"""