Dice
其实就是F1-score
,即预测predicate
和实际gt
区域的overlap的面积(area)(或体积(volume))与二者union
区域的面积(area)(或体积(volume))的比值的2倍。Dice一般是没有单位的。
def dice(seg, gt):
if seg.sum() + gt.sum() == 0:
return 0
dice = 2 *(seg * gt).sum() / (seg.sum() + gt.sum())
或者使用medpy库
pip install medpy
from medpy.metric import binary
def dice(seg, gt):
return binary.dc(seg, gt)
如图所示,ASSD计算公式为:
A S S D = ∑ x ∈ X m i n y ∈ Y d ( x , y ) + ∑ y ∈ Y m i n x ∈ X d ( y , x ) l e n ( X ) + l e n ( Y ) ASSD=\frac{\sum_{x\in X}min_{y\in Y}d(x, y)+\sum_{y\in Y}min_{x\in X}d(y, x)}{len(X) + len(Y)} ASSD=len(X)+len(Y)∑x∈Xminy∈Yd(x,y)+∑y∈Yminx∈Xd(y,x)
或者
s u m x ∈ X m i n y ∈ Y d ( x , y ) l e n ( X ) + ∑ y ∈ Y m i n x ∈ X d ( y , x ) l e n ( Y ) 2 \frac{\frac{\\sum_{x\in X}min_{y\in Y}d(x, y)}{len(X)}+\frac{\sum_{y\in Y}min_{x\in X}d(y, x)}{len(Y)}}{2} 2len(X)sumx∈Xminy∈Yd(x,y)+len(Y)∑y∈Yminx∈Xd(y,x)
即对于每个在X
上的点,求其与边缘Y
的最小距离,然后将这些距离求和。对于每个在Y
上的点,求其与边缘X
的最小距离,然后将这些距离求和。最后两项和相加除以X
和Y
中点的总数目。
医学图像中,医生最后需要告诉病人哪个部位有多大,因此需要一个单位来度量大小,比如毫米(mm)等。而产生的医学图像在计算机中查看时只能用像素或体素来度量,因此最后求得的像素值需要转换为世界坐标系中来度量。转换过程其实就是一个线性的变换,以一个像素点(像素原点)为参照点,又称为仿射变换(Affine transformation):
x = i ∗ s p a c i n g x + x o r i g i n y = j ∗ s p a c i n g y + y o r i g i n z = k ∗ s p a c i n g z + z o r i g i n x = i * spacing_x + x_{origin} \\ y = j * spacing_y + y_{origin} \\ z = k * spacing_z + z_{origin} x=i∗spacingx+xoriginy=j∗spacingy+yoriginz=k∗spacingz+zorigin
反过来由现实世界中的坐标点转为计算机系统中的像素点为:
i = ( x − x o r i g i n ) / s p a c i n g x j = ( y − y o r i g i n ) / s p a c i n g y k = ( z − z o r i g i n ) / s p a c i n g z i = (x-x_{origin}) / spacing_x \\ j = (y-y_{origin}) / spacing_y \\ k = (z-z_{origin}) / spacing_z i=(x−xorigin)/spacingxj=(y−yorigin)/spacingyk=(z−zorigin)/spacingz
相关代码为:
def voxelToReal(pt):
affine_matrix = np.array([[spacing[0], 0, 0, origin[0],
[0, spacing[1], 0, origin[1],
[0, 0, spacing[1], origin[2],
[0, 0, 0, 1]])
real = affine_matrix * pt
return real[:3]
def realToVoxel(real):
affine_matrix = np.array([[spacing[0], 0, 0, origin[0],
[0, spacing[1], 0, origin[1],
[0, 0, spacing[1], origin[2],
[0, 0, 0, 1]])
affine_matrix_inv = np.linalg.inv(affine_matrix)
real = affine_matrix_inv * real
return real[: 3]
使用KD树来快速查找和搜索:
from sklearn.neighbors import KDTree
def distance_A_to_B(A, B):
tree_B = KDTree(np.array(B))
# 取出A中的每个元素,然后在B中寻找距离最近的元素,所以最终返回的数组的大小为A数组的长度*k
distance_A_to_B, indices = tree_B.query(np.array(A, k=1))
return distance_A_to_B, indices
求ASSD的完整代码:
from scipy import ndimage
def voxelToReal(pt, affine_matrix=None):
affine_matrix = np.array([[spacing[0], 0, 0, origin[0],
[0, spacing[1], 0, origin[1],
[0, 0, spacing[1], origin[2],
[0, 0, 0, 1]])
real = affine_matrix * pt
return real[:3]
def distance_A_to_B(A, B):
tree_B = KDTree(np.array(B))
distance_A_to_B, indices = tree_B.query(np.array(A))
return distance_A_to_B, indices
def ASSD(seg, gt):
struct = ndimage.generate_binary_structure(3, 1)
ref_border = gt ^ ndimage.binary_erosion(gt, struct, border_value=0)
ref_border_voxels = np.array(np.where(ref_border)) # 获取gt边界点的坐标,为一个n*dim的数组
seg_border = seg ^ ndimage.binary_erosion(seg, struct, border_value=0)
seg_border_voxels = np.array(np.where(seg_border)) # 获取seg边界点的坐标,为一个n*dim的数组
# 将边界点的坐标转换为实数值,单位一般为mm
ref_real = voxelToReal(seg_border_voxels, affine_matrix)
gt_real = voxelToReal(ref_border_voxels, affine_matrix)
tree_ref = KDTree(np.array(ref_border_voxels_real))
dist_seg_to_ref, ind = tree_ref.query(seg_border_voxels_real, k=1)
tree_seg = KDTree(np.array(seg_border_voxels_real))
dist_ref_to_seg, ind2 = tree_seg.query(ref_border_voxels_real, k=1)
assd = (dist_seg_to_ref.sum() + dist_ref_to_seg.sum()) / (len(dist_seg_to_ref) + len(dist_ref_to_seg))
return assd
使用pydicom
包
import pydicom
ds = pydicom.dcmread(filename)
ds.keys() # 获取所有键值
print(ds[("0008", "0060")]) # 获取图像类型
print(ds["Modality"])
具体键值对信息可见dicom general series
注意计算z-spacing
时千万不能使用ds["SliceThickness"]
,因为不同的slice之间可能有重叠或者有gap
,因此正确的计算z-spacing
的方法是使用ImageOrientation
和ImagePosition
:
position_1 = ds_1["ImagePosition"]
position_n = ds_n["ImagePosition"]
zspacing = (position_n - position_1) / (n-1)
所以上述完整的affine_matrix
的构建方式为:
def voxelToRead(dicom_files):
ds_first = pydicom.dcmread(dicom_files[0])
ds_last = pydicom.dcmread(dicom_files[-1])
position_0 = ds_first.ImagePositionPatient
position_n = ds_last.ImagePositionPatient
x_origin, y_origin, z_origin = position_0[0], position_0[1], position_0[2]
x_last y_last, z_last = position_n[0], position_n[1], position_n[2]
x_spacing, y_spacing = ds_first.PixelSpacing
z_spacing = (z_last-z_origin) / (n-1)
img_orientation = ds_first.ImageOrientationPatient
row_ori = img_orientation[:3] # usually (1, 0, 0)
col_ori = img_orientation[3:] # usually (0, 1, 0)
affine_matrix = np.array([
[row_ori[0]*x_spacing, col_ori[0]*y_spacing, (x_last-x_origin)/(n-1), x_origin],
[row_ori[1]*x_spacing, col_ori[1]*y_spacing, (y_last-y_origin)/(n-1), y_origin],
[row_ori[2]*x_spacing, col_ori[2]*y_spacing, (z_last-z_origin)/(n-1), z_origin],
[0, 0, 0, 1]
])
real = affine * np.array([i, j, k])
通常情况下,affine_matrix
为:
[[x_spacing, 0, 0, x_origin],
[0, y_spacing, 0, y_origin],
[0, 0, z_spacing, z_origin],
[0, 0, 0, 1]]
或者使用medpy库
from medpy.metric import binary
binary.assd(Vseg, Vref, voxelspacing=voxelspacing, connectivity=1)
Hausdorff的计算方法和上述ASSD计算方法类似,只不过不是求平均,而是求最大值。
from scipy import ndimage
def voxelToReal(pt, affine_matrix=None):
affine_matrix = np.array([[spacing[0], 0, 0, origin[0],
[0, spacing[1], 0, origin[1],
[0, 0, spacing[1], origin[2],
[0, 0, 0, 1]])
real = affine_matrix * pt
return real[:3]
def distance_A_to_B(A, B):
tree_B = KDTree(np.array(B))
distance_A_to_B, indices = tree_B.query(np.array(A))
return distance_A_to_B, indices
def hd(seg, gt):
struct = ndimage.generate_binary_structure(3, 1)
ref_border = gt ^ ndimage.binary_erosion(gt, struct, border_value=0)
ref_border_voxels = np.array(np.where(ref_border)) # 获取gt边界点的坐标,为一个n*dim的数组
seg_border = seg ^ ndimage.binary_erosion(seg, struct, border_value=0)
seg_border_voxels = np.array(np.where(seg_border)) # 获取seg边界点的坐标,为一个n*dim的数组
# 将边界点的坐标转换为实数值,单位一般为mm
ref_real = voxelToReal(seg_border_voxels, affine_matrix)
gt_real = voxelToReal(ref_border_voxels, affine_matrix)
tree_ref = KDTree(np.array(ref_border_voxels_real))
dist_seg_to_ref, ind = tree_ref.query(seg_border_voxels_real, k=1)
tree_seg = KDTree(np.array(seg_border_voxels_real))
dist_ref_to_seg, ind2 = tree_seg.query(ref_border_voxels_real, k=1)
hd = np.stack((dist_seg_to_ref, dist_ref_to_seg)).max()
return hd
而计算Hausdorff95则不是直接取最大值,而是排序后求95
分位的值:
from scipy import ndimage
def voxelToReal(pt, affine_matrix=None):
affine_matrix = np.array([[spacing[0], 0, 0, origin[0]],
[0, spacing[1], 0, origin[1]],
[0, 0, spacing[1], origin[2]],
[0, 0, 0, 1]])
real = affine_matrix * pt
return real[:3]
def distance_A_to_B(A, B):
tree_B = KDTree(np.array(B))
distance_A_to_B, indices = tree_B.query(np.array(A))
return distance_A_to_B, indices
def hd95(seg, gt):
struct = ndimage.generate_binary_structure(3, 1)
ref_border = gt ^ ndimage.binary_erosion(gt, struct, border_value=0)
ref_border_voxels = np.array(np.where(ref_border)) # 获取gt边界点的坐标,为一个n*dim的数组
seg_border = seg ^ ndimage.binary_erosion(seg, struct, border_value=0)
seg_border_voxels = np.array(np.where(seg_border)) # 获取seg边界点的坐标,为一个n*dim的数组
# 将边界点的坐标转换为实数值,单位一般为mm
ref_real = voxelToReal(seg_border_voxels, affine_matrix)
gt_real = voxelToReal(ref_border_voxels, affine_matrix)
tree_ref = KDTree(np.array(ref_border_voxels_real))
dist_seg_to_ref, ind = tree_ref.query(seg_border_voxels_real, k=1)
tree_seg = KDTree(np.array(seg_border_voxels_real))
dist_ref_to_seg, ind2 = tree_seg.query(ref_border_voxels_real, k=1)
hd = np.percentile(np.vstack((dist_seg_to_ref, dist_ref_to_seg)).ravel(), 95)
return hd
注意np.percentile的计算方法为
:
例如有一个数组[ 2, 3, 4, 6, 7, 11, 14]
,则2为0
分位, 14
为100
分位,中间间隔为100/6
, 因此计算60分位数公式为: 60 100 / 6 = 3.6 \frac{60}{100/6}=3.6 100/660=3.6,也就是第3.6个数,其值为: 6 + 0.6 × ( 7 − 6 ) 1 = 6.6 6+\frac{0.6\times (7-6)}{1}=6.6 6+10.6×(7−6)=6.6。而70分位数为: 70 100 / 6 = 4.2 \frac{70}{100/6}=4.2 100/670=4.2,因此值为: 7 + 0.2 × ( 11 − 7 ) 1 = 7.8 7+\frac{0.2\times(11-7)}{1}=7.8 7+10.2×(11−7)=7.8。
或者使用medpy
计算
from medpy.metric import binary
hd=binary.hd(Vseg, Vref, voxelspacing=voxelspacing)
hd95=binary.hd95(Vseg, Vref, voxelspacing=voxelspacing)
注意voxelspacing
的顺序。
使用erosion
和dilation
struct = ndimage.generate_binary_structure(3, 1)
ref_border = gt ^ ndimage.binary_erosion(gt, struct, border_value=0)
ref_border_voxels = np.array(np.where(ref_border)) # 获取gt边界点的坐标,为一个n*dim的数组
参考: 图像分割评估指标