在一些计算机视觉任务中,需要对模型的输出做一些后处理以优化视觉效果,连通域就是一种常见的后处理方式。尤其对于分割任务,有时的输出mask会存在一些假阳(小的无用轮廓),通过3D连通域找出面积较小的独立轮廓并去除可以有效地提升视觉效果。
二维图像连通域一般包括 4连通、8连通。对于三维数据一般包括6连通、18连通和26联通。
下面的代码只保留最大3D连通域。
# -*- coding : UTF-8 -*-
# @file : prob2label.py
# @Time : 2021-10-19 9:35
# @Author : wmz
import os
import SimpleITK as sitk
from glob import glob
import numpy as np
def getFiles(path, suffix):
return [os.path.join(root, file) for root, dirs, files in os.walk(path) for file in files if file.endswith(suffix)]
def connected_domain_2(image, mask=True):
cca = sitk.ConnectedComponentImageFilter()
cca.SetFullyConnected(True)
_input = sitk.GetImageFromArray(image.astype(np.uint8))
output_ex = cca.Execute(_input)
stats = sitk.LabelShapeStatisticsImageFilter()
stats.Execute(output_ex)
num_label = cca.GetObjectCount()
num_list = [i for i in range(1, num_label+1)]
area_list = []
for l in range(1, num_label +1):
area_list.append(stats.GetNumberOfPixels(l))
num_list_sorted = sorted(num_list, key=lambda x: area_list[x-1])[::-1]
largest_area = area_list[num_list_sorted[0] - 1]
final_label_list = [num_list_sorted[0]]
# for idx, i in enumerate(num_list_sorted[1:]): # 大于第一个的十分之一的都保留,注释掉之后只保留最大连通域
# if area_list[i-1] >= (largest_area//10):
# final_label_list.append(i)
# else:
# break
output = sitk.GetArrayFromImage(output_ex)
for one_label in num_list:
if one_label in final_label_list:
continue
x, y, z, w, h, d = stats.GetBoundingBox(one_label)
one_mask = (output[z: z + d, y: y + h, x: x + w] != one_label)
output[z: z + d, y: y + h, x: x + w] *= one_mask
if mask:
output = (output > 0).astype(np.uint8)
else:
output = ((output > 0)*255.).astype(np.uint8)
return output
def save_prob2label(prob_dir, save_labeldir):
# all_prob_seg = glob(os.path.join(prob_dir, "*.nrrd"))
all_prob_seg = getFiles(prob_dir, ".nrrd")
for index, file in enumerate(all_prob_seg):
print("processing", index + 1, '/', len(all_prob_seg), file)
label_file = file.replace(prob_dir, save_labeldir).replace(".nrrd", ".nii.gz")
prob_img = sitk.ReadImage(file)
prob_arr = sitk.GetArrayFromImage(prob_img)
label_arr = (prob_arr > Dice_value) * 1
label_arr = connected_domain_2(label_arr)
label_img = sitk.GetImageFromArray(label_arr)
label_img.SetOrigin(prob_img.GetOrigin())
label_img.SetDirection(prob_img.GetDirection())
dst_dir = label_file.rsplit('\\', 1)[0]
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
sitk.WriteImage(label_img, label_file)
if __name__ == '__main__':
prob_nrrd_dir = r'C:\Users\wmz\Desktop\input'
save_label_dir = r'C:\Users\wmz\Desktop\test'
Dice_value = 0.5
save_prob2label(prob_nrrd_dir, save_label_dir)
参考:python实现3D连通域后处理
1.内容
步骤:
1.读取Dicom序列
2.设置固定阈值为100,把骨骼和心脏及主动脉都分割出来
3.形态学开运算+最大连通域提取,粗略的心脏和主动脉图像
4.将step1的结果与step2的结果相减,得到骨骼部分
5.最大连通域提取,去除小连接
6.将得到的图像与原始图像进行逻辑与操作
数据地址:
链接:https://pan.baidu.com/s/198H5g30LSKrKInJfgV1xFQ
提取码:a3nw
import SimpleITK as sitk
# 最大连通域提取
def GetLargestConnectedCompont(binarysitk_image):
cc = sitk.ConnectedComponent(binarysitk_image)
stats = sitk.LabelIntensityStatisticsImageFilter()
stats.SetGlobalDefaultNumberOfThreads(8)
stats.Execute(cc, binarysitk_image)
maxlabel = 0
maxsize = 0
for l in stats.GetLabels():
size = stats.GetPhysicalSize(l)
if maxsize < size:
maxlabel = l
maxsize = size
labelmaskimage = sitk.GetArrayFromImage(cc)
outmask = labelmaskimage.copy()
outmask[labelmaskimage == maxlabel] = 255
outmask[labelmaskimage != maxlabel] = 0
outmask_sitk = sitk.GetImageFromArray(outmask)
outmask_sitk.SetDirection(binarysitk_image.GetDirection())
outmask_sitk.SetSpacing(binarysitk_image.GetSpacing())
outmask_sitk.SetOrigin(binarysitk_image.GetOrigin())
return outmask_sitk
# 逻辑与操作
def GetMaskImage(sitk_src, sitk_mask, replacevalue=0):
array_src = sitk.GetArrayFromImage(sitk_src)
array_mask = sitk.GetArrayFromImage(sitk_mask)
array_out = array_src.copy()
array_out[array_mask == 0] = replacevalue
outmask_sitk = sitk.GetImageFromArray(array_out)
outmask_sitk.SetDirection(sitk_src.GetDirection())
outmask_sitk.SetSpacing(sitk_src.GetSpacing())
outmask_sitk.SetOrigin(sitk_src.GetOrigin())
return outmask_sitk
# 读取Dicom序列
pathDicom = 'D:/PyCharm 2019.3.3/data/LIDC_nodul'
reader = sitk.ImageSeriesReader()
filenamesDICOM = reader.GetGDCMSeriesFileNames(pathDicom)
reader.SetFileNames(filenamesDICOM)
sitk_src = reader.Execute()
# step1.设置固定阈值为100,把骨骼和心脏及主动脉都分割出来
sitk_seg = sitk.BinaryThreshold(sitk_src, lowerThreshold=100, upperThreshold=3000, insideValue=255, outsideValue=0)
sitk.WriteImage(sitk_seg, 'step1.mha')
# step2.形态学开运算+最大连通域提取,粗略的心脏和主动脉图像
sitk_open = sitk.BinaryMorphologicalOpening(sitk_seg != 0, 2)
sitk_open = GetLargestConnectedCompont(sitk_open)
sitk.WriteImage(sitk_open, 'step2.mha')
# step3.再将step1的结果与step2的结果相减,得到骨骼部分
array_open = sitk.GetArrayFromImage(sitk_open)
array_seg = sitk.GetArrayFromImage(sitk_seg)
array_mask = array_seg - array_open
sitk_mask = sitk.GetImageFromArray(array_mask)
sitk_mask.SetDirection(sitk_seg.GetDirection())
sitk_mask.SetSpacing(sitk_seg.GetSpacing())
sitk_mask.SetOrigin(sitk_seg.GetOrigin())
sitk.WriteImage(sitk_mask, 'step3.mha')
# step4.最大连通域提取,去除小连接
skeleton_mask = GetLargestConnectedCompont(sitk_mask)
sitk.WriteImage(skeleton_mask, 'step4.mha')
# step5.将得到的图像与原始图像进行逻辑与操作
sitk_skeleton = GetMaskImage(sitk_src, skeleton_mask, replacevalue=-1500)
sitk.WriteImage(sitk_skeleton, 'step5.mha')
参考:【医学图像处理】之腹部骨骼提取(SimpleITK)