Digital Imaging and Communications in Medicine (DICOM)是医学标准格式的医学图像
2加载第三方包
IS_LOCAL = False
import numpy as np
import pandas as pd
from skimage.io import imread
import seaborn as sns
import matplotlib.pyplot as plt
from glob import glob
if(IS_LOCAL):
import pydicom as dicom
else:
import dicom
import os
2.参数设置和文件路径
if(IS_LOCAL):
PATH="../input/siim-medical-image/"
else:
PATH="../input/"
print(os.listdir(PATH))
3.读取数据
data_df = pd.read_csv(os.path.join(PATH,"overview.csv"))
Hide
In [4]:
print("CT Medical images - rows:",data_df.shape[0]," columns:", data_df.shape[1])
4读取TIFF格式数据
print("Number of TIFF images:", len(os.listdir(os.path.join(PATH,"tiff_images"))))
tiff_data = pd.DataFrame([{'path': filepath} for filepath in glob(PATH+'tiff_images/*.tif')])
5.处理TIFF数据
def process_data(path):
data = pd.DataFrame([{'path': filepath} for filepath in glob(PATH+path)])
data['file'] = data['path'].map(os.path.basename)
data['ID'] = data['file'].map(lambda x: str(x.split('_')[1]))
data['Age'] = data['file'].map(lambda x: int(x.split('_')[3]))
data['Contrast'] = data['file'].map(lambda x: bool(int(x.split('_')[5])))
data['Modality'] = data['file'].map(lambda x: str(x.split('_')[6].split('.')[-2]))
return data
tiff_data = process_data('tiff_images/*.tif')
6.检查数据集信息
tiff_data.head(10)
7.读取DICOM数据集
print("Number of DICOM files:", len(os.listdir(PATH+"dicom_dir")))
8.处理DICOM数据
dicom_data = process_data('dicom_dir/*.dcm')
9.检查信息
dicom_data.head(10)
10.检查数据的一致性,
def countplot_comparison(feature):
fig, (ax1, ax2, ax3) = plt.subplots(1,3, figsize = (16, 4))
s1 = sns.countplot(data_df[feature], ax=ax1)
s1.set_title("Overview data")
s2 = sns.countplot(tiff_data[feature], ax=ax2)
s2.set_title("Tiff files data")
s3 = sns.countplot(dicom_data[feature], ax=ax3)
s3.set_title("Dicom files data")
plt.show()
11.显示TIFF图像
def show_images(data, dim=16, imtype='TIFF'):
img_data = list(data[:dim].T.to_dict().values())
f, ax = plt.subplots(4,4, figsize=(16,20))
for i,data_row in enumerate(img_data):
if(imtype=='TIFF'):
data_row_img = imread(data_row['path'])
elif(imtype=='DICOM'):
data_row_img = dicom.read_file(data_row['path'])
if(imtype=='TIFF'):
ax[i//4, i%4].matshow(data_row_img,cmap='gray')
elif(imtype=='DICOM'):
ax[i//4, i%4].imshow(data_row_img.pixel_array, cmap=plt.cm.bone)
ax[i//4, i%4].axis('off')
ax[i//4, i%4].set_title('Modality: {Modality} Age: {Age}\nSlice: {ID} Contrast: {Contrast}'.format(**data_row))
plt.show()
应用函数,显示图像
show_images(tiff_data,16,'TIFF')
12.显示DICOM数据
# extract voxel data
def extract_voxel_data(list_of_dicom_files):
datasets = [dicom.read_file(f) for f in list_of_dicom_files]
try:
voxel_ndarray, ijk_to_xyz = dicom_numpy.combine_slices(datasets)
except dicom_numpy.DicomImportException as e:
# invalid DICOM data
raise
return voxel_ndarray
13.查看DICOMM更多的信息
dicom_file_path = list(dicom_data[:1].T.to_dict().values())[0]['path']
dicom_file_dataset = dicom.read_file(dicom_file_path)
dicom_file_dataset
更多的信息:哪个医院,病人年龄、性别、名字、病人的ID、诊断方式
14.我们可以修改可视化函数,来显示参数
def show_dicom_images(data):
img_data = list(data[:16].T.to_dict().values())
f, ax = plt.subplots(4,4, figsize=(16,20))
for i,data_row in enumerate(img_data):
data_row_img = dicom.read_file(data_row['path'])
modality = data_row_img.Modality
age = data_row_img.PatientAge
ax[i//4, i%4].imshow(data_row_img.pixel_array, cmap=plt.cm.bone)
ax[i//4, i%4].axis('off')
ax[i//4, i%4].set_title('Modality: {} Age: {}\nSlice: {} Contrast: {}'.format(
modality, age, data_row['ID'], data_row['Contrast']))
plt.show()
参考资料https://www.kaggle.com/gpreda/visualize-ct-dicom-data