# common utils
import os
import math
import itertools
import warnings
import numpy as np
import pandas as pd
from collections import Counter
import multiprocessing
import pydicom # 用于读取dcm文件
import glob
import scipy.misc
import functools
# pytorch utils
import torch
import torchvision
from torch.utils.data import DataLoader,Dataset
import torch.nn as nn
import cv2
from torchvision import models
import torch.nn.functional as F
import pytorch_lightning as pl
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import train_test_split
# others
import matplotlib.pyplot as plt
from PIL import Image
%matplotlib inline
plt.rcParams['figure.figsize'] = (3.5, 2.5)
%config InlineBackend.figure_format = 'jpg'
warnings.filterwarnings("ignore")
end = ''
DataPath = '/media/yuansh/14THHD/mimic_cxr/dataset/'
OutPath = '/media/yuansh/14THHD/mimic_cxr/out_path/'
# os.listdir(DataPath + '/p15/p15/p15610631/s59150383')
#
# # 获取所有样本图片路径并保存
# dcm_list = glob.glob(DataPath + '/p*/p*/p*/s*/*.dcm')
# dcm_list = np.array(dcm_list)
# np.save(OutPath+'dcm_list.npy',dcm_list)
dcm_list = list(np.load(OutPath+'dcm_list.npy')) #读取文件
jpg_list = [i.replace('dcm','jpg') for i in dcm_list] # jpg文件路径
file_id = [os.path.split(i)[-1][:-4] for i in dcm_list] # 获取文件名
from skimage.io import imread
# Plot the data
f, ax = plt.subplots(2,4, figsize=(12,4))
for i in range(8):
img = imread(dcm_list[i])
ax[i//4, i%4].imshow(img, cmap='gray')
if i<4:
ax[i//4, i%4].set_title(str(i))
else:
ax[i//4, i%4].set_title(str(i))
ax[i//4, i%4].axis('off')
ax[i//4, i%4].set_aspect('auto')
plt.show();
[外链图片转存失败,源站可能有防盗链机制,建议将图片保存下来直接上传(img-iOeWzfgL-1619081767426)(output_5_0.jpg)]
# 初始化信息字典
@functools.lru_cache(1) # 加快数据的读取速度
def getDicomInfoAndSaveJpg(filename,file_id,out_path):
dcm = pydicom.read_file(filename)
jpg_name = filename.replace('dcm','jpg')
# 获取头文件
info = {}
if hasattr(dcm, 'PatientID'):
info["PatientID"] = dcm.PatientID # 患者ID
else :
info["PatientID"] = -999
if hasattr(dcm, 'StudyID'):
info["StudyID"] = dcm.StudyID # 检查ID
else :
info["StudyID"] = -999
if hasattr(dcm, 'StudyDate'):
info["StudyDate"] = dcm.StudyDate # 检查日期
else :
info["StudyDate"] = -999
if hasattr(dcm, 'StudyTime'):
info["StudyTime"] = dcm.StudyTime # 检查时间
else :
info["StudyTime"] = -999
# 获取矩阵信息
img = dcm.pixel_array
scipy.misc.imsave(out_path,img)
# ids = pd.DataFrame.from_dict(info,orient='index',columns=[file_id]).T
return file_id,info
args_list
# 多进进程处理
p = multiprocessing.Pool(28)
infos = p.starmap(getDicomInfoAndSaveJpg, args_list)
p.close() # 记得关闭
p.join()
# if not os.path.exists('/media/yuansh/1THHD/Yuansh_Share/mimic_cxr/jpg'):
# os.makedirs('/media/yuansh/1THHD/Yuansh_Share/mimic_cxr/jpg')
# import shutil
# args_list = [
# ('/media/yuansh/1THHD/Yuansh_Share/mimic_cxr/jpg/' + file_id[i] + '.jpg',
# jpg_list[i]) for i in range(len(jpg_list))
# ]
#
# def CopyJPG(to_file, jpg_list):
# shutil.copy(jpg_list, to_file)
#
#
# p = multiprocessing.Pool(28)
# p.starmap(CopyJPG, args_list)
# p.close() # 记得关闭
# p.join()
Name_info = [i[0] for i in infos]
targ_info = [i[1] for i in infos]
PatientID = [i[]for i in targ_info]
StudyID = [i['StudyID']for i in targ_info]
StudyDate = [i['StudyDate']for i in targ_info]
StudyTime = [i['StudyTime']for i in targ_info]
df = pd.DataFrame({'PatientID':PatientID,
'StudyID':StudyID,
'StudyDate':StudyDate,
'StudyTime':StudyTime
},index=Name_info)
df.to_csv('/media/yuansh/1THHD/Yuansh_Share/mimic_cxr/infos/Sample_infos.csv')