处理实验室数据时遇到新的需求:统计所有病人每次MRI都有哪些序列?
***本来已经实现的功能是在单个study下保存每个病人的每次MRI的序列,并生成.csv文件。原始数据文件结构如下:
python代码实现-在每个病例中生成存储MRI序列的.csv文件
1.导入包
import glob
import os
import pydicom
import re
import pandas as pd
import json
2.在单个病例号下生成csv文件,保存每个病人的每次MRI的序列
def get_Dcm_info(folderPath):
count_study = 0
for every_study in os.listdir(folderPath):
count_study +=1
last_paths = glob.glob(os.path.join(folderPath,every_study,"MR","**"))#list
save_MRIInfo_path = os.path.join(folderPath,every_study,"MR")
MRI_list = os.listdir(os.path.join(folderPath,every_study,"MR"))
j = 0
for i in range(len(MRI_list)):
search = re.search(r'\~\$.*|.*\.csv',MRI_list[j])
if(search!=None):
MRI_list.remove(search[0])
else:
j +=1
dcm_info = pd.DataFrame(columns=([every_MRI for every_MRI in MRI_list]))#PANDA的列名
print(len(MRI_list),MRI_list)
for s in range(len(MRI_list)):
tmp_MRI= os.path.join(folderPath,every_study,"MR",MRI_list[s])
tmp_MRI_dicoms = os.listdir(tmp_MRI)
tmp_ser_names = []
print(tmp_MRI)
for j in range(len(tmp_MRI_dicoms)):
dcm = pydicom.read_file(os.path.join(tmp_MRI,tmp_MRI_dicoms[j]))
SeriesDescription = dcm.SeriesDescription
tmp_ser_names.append(SeriesDescription)
tmp_ser_names = list(set(tmp_ser_names))#对列表中的元素进行去重
dcm_info[MRI_list[s]] = tmp_ser_names
dcm_info.to_csv(save_MRIInfo_path+"\\"+every_study+"_Seriers_Info.csv",encoding="utf_8_sig")
print('共有',count_study,'个study')
3.设置文件夹路径,调用函数执行
MRfolder_path =r'D:\FinishedDataTotal\part3_3.0T\2019-12-11'
if os.path.isdir(MRfolder_path):#判断是否是文件夹
get_Dcm_info(MRfolder_path)#调用函数
else:
print(MRfolder_path,"is not dir")
4.生成的csv文件和结果
结果说明:00225718为病例号,20140903、20141124为不同时期病人的MRI影像文件夹,生成的csv表格中有不同的序列有t1、t2、DWI、DCE增强序列等。
重点来了:python代码实现-统计所有病人的MRI序列,并最终保存在一个json文件中
5.函数定义
def get_dcmSeries_info(folderPath):
count_study = 0
total_List = []
for every_study in os.listdir(folderPath):
#final_study是提前定义的一个list,存放先要寻找的病例号
if every_study in final_study:
#print(type(every_study),every_study) # 00270431 00265856
count_study +=1
MRI_list = os.listdir(os.path.join(folderPath,every_study,"MR"))
j = 0
for i in range(len(MRI_list)):
search = re.search(r'\~\$.*|.*\.csv',MRI_list[j])
if(search!=None):
MRI_list.remove(search[0])
else:
j +=1
print(every_study,len(MRI_list),MRI_list)
tmp_allSeries_dict = {}
tmp_allSeries_List =[]
for s in range(len(MRI_list)):
tmp_MRI= os.path.join(folderPath,every_study,"MR",MRI_list[s])
tmp_MRI_dicoms = os.listdir(tmp_MRI)
tmp_ser_names = []
tmp_dict = {}
#print(tmp_MRI) #D:\FinishedDataTotal\part3_3.0T\2019-12-11\00225718\MR\20140903
for j in range(len(tmp_MRI_dicoms)):
dcm = pydicom.read_file(os.path.join(tmp_MRI,tmp_MRI_dicoms[j]))
SeriesDescription = dcm.SeriesDescription
tmp_ser_names.append(SeriesDescription)
tmp_ser_names = list(set(tmp_ser_names))#对列表中的元素进行去重
tmp_dict[MRI_list[s]] = tmp_ser_names
tmp_allSeries_List.append(tmp_dict)
tmp_allSeries_dict[every_study] = tmp_allSeries_List
#break #测试一个study
total_List.append(tmp_allSeries_dict)
#print(total_List)
with open("D:\\FinishedDataTotal\\Series_result.json",'a') as f:#数据写入json文件,a表示循环写入,不覆盖之前的内容
json.dump(total_List,f ,indent = 4)
print('共有',count_study,'个study')
5.定义文件路径并调用函数
MRfolder_path =r'D:\FinishedDataTotal\part3_3.0T\2019-12-11'
if os.path.isdir(MRfolder_path):#判断是否是文件夹
get_dcmSeries_info(MRfolder_path)#调用函数
else:
print(MRfolder_path,"is not dir")
6.生成的josn文件和结果
结果说明:多个病人的多次MRI序列全部存储在json文件中,00216624为病例号,20120917、20120306为两次MRI,红色方框内的是对应MRI的序列。
why json文件,半结构化数据形式,层次清晰,便于程序写入和读取。
说明:本文为原创文章,转载或引用请注明网址;欢迎一起学习交流,有不正确之处欢迎批评指正[email protected]