我们在阅读小样本分割(FSS)论文的时候,经常会看到在实验过程中使用了Pascal-5i和COCO的数据集,但是一般的开源代码中并没有提及这个数据集是怎么来的,所以构建这个数据集也是一个问题。
Pascal-5i是在One-shot learning for semantic segmentation.这篇论文中提出来的。
#!/usr/bin/env python
#encoding: utf-8
# Martin Kersner, [email protected]
# 2016/03/17
from __future__ import print_function
import os
import sys
import glob
from PIL import Image as PILImage
from utils import mat2png_hariharan
def main():
input_path, output_path = process_arguments(sys.argv)
if os.path.isdir(input_path) and os.path.isdir(output_path):
# glob.blob 返回所有匹配的文件路径列表
mat_files = glob.glob(os.path.join(input_path, '*.mat'))
convert_mat2png(mat_files, output_path)
else:
help('Input or output path does not exist!\n')
def process_arguments(argv):
#
num_args = len(argv)
#
input_path = None
output_path = None
if num_args == 3:
input_path = argv[1]
output_path = argv[2]
else:
help()
return input_path, output_path
def convert_mat2png(mat_files, output_path):
if not mat_files:
help('Input directory does not contain any Matlab files!\n')
for mat in mat_files:
numpy_img = mat2png_hariharan(mat)
pil_img = PILImage.fromarray(numpy_img)
pil_img.save(os.path.join(output_path, modify_image_name(mat, 'png')))
# Extract name of image from given path, replace its extension with specified one
# and return new name only, not path.
def modify_image_name(path, ext):
return os.path.basename(path).split('.')[0] + '.' + ext
def help(msg=''):
print(msg +
'Usage: python mat2png.py INPUT_PATH OUTPUT_PATH\n'
'INPUT_PATH denotes path containing Matlab files for conversion.\n'
'OUTPUT_PATH denotes path where converted Png files ar going to be saved.'
, file=sys.stderr)
exit()
if __name__ == '__main__':
main()
python mat2png.py 你的cls文件夹地址 你的cls_aug文件夹地址
比如我的是:
python mat2png.py E:\deepLearningTest\datasets\benchmark_RELEASE\dataset\cls E:\deepLearningTest\datasets\benchmark_RELEASE\dataset\cls_aug
等到程序运行完成即可,这个时候就会发现cls_aug里面有很多图片了
from __future__ import print_function
import os
import sys
import numpy as np
from PIL import Image
def main():
##
ext = '.png'
##
path, txt_file, path_converted = process_arguments(sys.argv)
# Create dir for converted labels
if not os.path.isdir(path_converted):
os.makedirs(path_converted)
with open(txt_file, 'r') as f:
for img_name in f:
img_base_name = img_name.strip()
print(img_base_name)
img_name = os.path.join(path, img_base_name) + ext
mask = Image.open(img_name)
mask = np.array(mask).astype('int32')
mask = Image.fromarray(mask.astype('uint8'))
mask.save(os.path.join(path_converted, img_base_name + ext))
def process_arguments(argv):
if len(argv) != 4:
help()
path = argv[1]
list_file = argv[2]
new_path = argv[3]
return path, list_file, new_path
def help():
print('Usage: python convert_labels.py PATH LIST_FILE NEW_PATH\n'
'PATH points to directory with segmentation image labels.\n'
'LIST_FILE denotes text file containing names of images in PATH.\n'
'Names do not include extension of images.\n'
'NEW_PATH points to directory where converted labels will be stored.'
, file=sys.stderr)
exit()
if __name__ == '__main__':
main()
python convert_labels.py 你的SegmentationClass文件夹地址 你的ImageSets/Segmentation/trainval.txt文件地址 你的SegClassGray文件夹地址
比如我的是:
python convert_labels.py E:\deepLearningTest\datasets\VOCdevkit\VOC2012\SegmentationClass E:\deepLearningTest\datasets\VOCdevkit\VOC2012\ImageSets\Segmentation\trainval.txt E:\deepLearningTest\datasets\VOCdevkit\VOC2012\SegClassGray
等到程序运行完成即可,这个时候就会发现SegClassGray里面有很多图片了
最终SegmentationClassAug的图片有12031张,包含有20个类的物体,每个物体对应的灰度值在1-20之间。
本篇文章代码参考这里
第2步中所引用的utils代码
#!/usr/bin/env python
# Martin Kersner, [email protected]
# 2016/03/11
import scipy.io
import struct
import numpy as np
def pascal_classes():
classes = {'aeroplane': 1, 'bicycle': 2, 'bird': 3, 'boat': 4,
'bottle': 5, 'bus': 6, 'car': 7, 'cat': 8,
'chair': 9, 'cow': 10, 'diningtable': 11, 'dog': 12,
'horse': 13, 'motorbike': 14, 'person': 15, 'potted-plant': 16,
'sheep': 17, 'sofa': 18, 'train': 19, 'tv/monitor': 20}
return classes
def pascal_palette():
palette = {(0, 0, 0): 0,
(128, 0, 0): 1,
(0, 128, 0): 2,
(128, 128, 0): 3,
(0, 0, 128): 4,
(128, 0, 128): 5,
(0, 128, 128): 6,
(128, 128, 128): 7,
(64, 0, 0): 8,
(192, 0, 0): 9,
(64, 128, 0): 10,
(192, 128, 0): 11,
(64, 0, 128): 12,
(192, 0, 128): 13,
(64, 128, 128): 14,
(192, 128, 128): 15,
(0, 64, 0): 16,
(128, 64, 0): 17,
(0, 192, 0): 18,
(128, 192, 0): 19,
(0, 64, 128): 20}
return palette
def pascal_palette_invert():
palette_list = pascal_palette().keys()
palette = ()
for color in palette_list:
palette += color
return palette
def pascal_mean_values():
return np.array([103.939, 116.779, 123.68], dtype=np.float32)
def strstr(str1, str2):
if str1.find(str2) != -1:
return True
else:
return False
# Mat to png conversion for http://www.cs.berkeley.edu/~bharath2/codes/SBD/download.html
# 'GTcls' key is for class segmentation
# 'GTinst' key is for instance segmentation
def mat2png_hariharan(mat_file, key='GTcls'):
mat = scipy.io.loadmat(mat_file, mat_dtype=True, squeeze_me=True, struct_as_record=False)
return mat[key].Segmentation
def convert_segmentation_mat2numpy(mat_file):
np_segm = load_mat(mat_file)
return np.rot90(np.fliplr(np.argmax(np_segm, axis=2)))
def load_mat(mat_file, key='data'):
mat = scipy.io.loadmat(mat_file, mat_dtype=True, squeeze_me=True, struct_as_record=False)
return mat[key]
# Python version of script in code/densecrf/my_script/LoadBinFile.m
def load_binary_segmentation(bin_file, dtype='int16'):
with open(bin_file, 'rb') as bf:
rows = struct.unpack('i', bf.read(4))[0]
cols = struct.unpack('i', bf.read(4))[0]
channels = struct.unpack('i', bf.read(4))[0]
num_values = rows * cols # expect only one channel in segmentation output
out = np.zeros(num_values, dtype=np.uint8) # expect only values between 0 and 255
for i in range(num_values):
out[i] = np.uint8(struct.unpack('h', bf.read(2))[0])
return np.rot90(np.fliplr(out.reshape((cols, rows))))
def convert_from_color_segmentation(arr_3d):
arr_2d = np.zeros((arr_3d.shape[0], arr_3d.shape[1]), dtype=np.uint8)
palette = pascal_palette()
for c, i in palette.items():
m = np.all(arr_3d == np.array(c).reshape(1, 1, 3), axis=2)
arr_2d[m] = i
return arr_2d
def create_lut(class_ids, max_id=256):
# Index 0 is the first index used in caffe for denoting labels.
# Therefore, index 0 is considered as default.
lut = np.zeros(max_id, dtype=np.uint8)
new_index = 1
for i in class_ids:
lut[i] = new_index
new_index += 1
return lut
def get_id_classes(classes):
all_classes = pascal_classes()
id_classes = [all_classes[c] for c in classes]
return id_classes