影像组学特征提取 — 原始图像为dicom格式,mask图像为nrrd格式

原始图像为dicom格式,mask图像为nrrd格式

1. 读取 dicom 和 nrrd

2. 设置 logger 和 setting

3. 初始化特征提取器,设置图像空间和特征类型

4. 特征提取和保存显示

单个subject影像组学特征提取

from __future__ import print_function
import logging
import SimpleITK as sitk
import radiomics
from radiomics import featureextractor
import six

# The original image is in dicom format
readerC = sitk.ImageSeriesReader()
dicom_names = readerC.GetGDCMSeriesFileNames('/project/patient/000000')
readerC.SetFileNames(dicom_names)
readerC.MetaDataDictionaryArrayUpdateOn()
readerC.LoadPrivateTagsOn()
imageName = readerC.Execute()

# The mask image is in nrrd format
maskName = sitk.ReadImage('/project/mask/000000/mask.nrrd')

# Logger setting
logger = radiomics.logger
logger.setLevel(logging.DEBUG)  
handler = logging.FileHandler(filename='testLog.txt', mode='w')
formatter = logging.Formatter("%(levelname)s:%(name)s: %(message)s")
handler.setFormatter(formatter)
logger.addHandler(handler)

# Define settings for signature calculation
# These are currently set equal to the respective default values
settings = {}
settings['binWidth'] = 25
settings['resampledPixelSpacing'] = None  # [3,3,3] is an example for defining resampling (voxels with size 3x3x3mm)
settings['interpolator'] = sitk.sitkBSpline
settings['correctMask'] = True
settings['geometryTolerance'] = 1


# Initialize feature extractor
extractor = featureextractor.RadiomicsFeatureExtractor(**settings)

# Enable the image type
extractor.enableAllImageTypes()
# extractor.enableImageTypes(Original={}, LoG={}, Wavelet={})

# Enable the feature type
# extractor.disableAllFeatures()
extractor.enableFeatureClassByName('firstorder')
# extractor.enableFeaturesByName(firstorder=['Mean', 'Skewness'])
# extractor.enableAllFeatures()

# Output the result
print("Calculating features")
featureVector = extractor.execute(imageName, maskName)
radiomicsList = []
header = []
for key, val in six.iteritems(featureVector):
    if not key.startswith('diagnostics'):
        header.append(key)
        radiomicsList.append(str(val))
print(header)
print(radiomicsList)

for featureName in featureVector.keys():
    print("Computed %s: %s" % (featureName, featureVector[featureName]))

从excel中读取subject名字,进行批量处理

问题一:由于一个 subject 存在多个 mask,所以 excel 中的 subject ID 一列存在重复项

解决方案:读取到一个新的 subject ID 之后,遍历该文件夹内的所有 mask,进行组学特征提取;若当前读取的subject ID为已处理过的重复项,则跳过

问题二:原始图像为 dicom,mask为 nrrd ,存在 dicom 尺寸和 mask 尺寸不匹配的情况

解决方案:读取 dicom 序列文件,计算其层数和尺寸,读取nrrd文件,计算其层数和尺寸,如果两者相等,则计算组学特征并保存,如果不相等,则跳过

问题三:由于影像组学计算过程中需要 image 和 mask 的严格位置匹配,但是由于保存原因,dicom 和 nrrd 的坐标中心可能存在极微小的差异,比如 2.99999 和 3.0 这样子

解决方案:在 setting 中设置 correct mask 和 geometry tolerence 选项,进行位置自动匹配调整

问题四:需要将 subject ID,mask name,和计算出来的组学特征写入 csv 文件,并在第一行保存标题

解决方案:要考虑到问题二中的情况,不匹配的就只写入subject ID 和 mask name;要设置一个count,当 count = 0 才保存标题;要读取 mask 的名字,写入 mask name

import numpy as np
import xlrd
import os
import nrrd
import six
import csv
import SimpleITK as sitk
from radiomics import featureextractor


def count_file_number(filepath, filetype):
    count = 0
    for root, dirname, filenames in os.walk(filepath):
        for filename in filenames:
            if os.path.splitext(filename)[1] == filetype:
                count += 1
    return count, filenames


def dcmseriesread(dicompath):
    readerC = sitk.ImageSeriesReader()
    dicom_names = readerC.GetGDCMSeriesFileNames(dicompath)
    readerC.SetFileNames(dicom_names)
    readerC.MetaDataDictionaryArrayUpdateOn()
    readerC.LoadPrivateTagsOn()
    dicomImage = readerC.Execute()
    return dicomImage


def radiomics_feature_extractor(image, mask):
    settings = {}
    settings['binWidth'] = 25
    settings['resampledPixelSpacing'] = None  # [3,3,3] is an example for defining resampling (voxels with size 3x3x3mm)
    settings['interpolator'] = sitk.sitkBSpline
    settings['correctMask'] = True
    settings['geometryTolerance'] = 1
    extractor = featureextractor.RadiomicsFeatureExtractor(**settings)
    extractor.enableAllImageTypes()
    # extractor.enableFeatureClassByName('firstorder')
    # extractor.enableFeatureClassByName('shape', 'texture')
    featureVector = extractor.execute(image, mask)
    radiomicsList = []
    header = []
    for key, val in six.iteritems(featureVector):
        if not key.startswith('diagnostics'):
            header.append(key)
            radiomicsList.append(str(val))
    return header, radiomicsList


path = '/project/'
SKindex = xlrd.open_workbook(os.path.join(path, 'index.xlsx')).sheets()[0]
subIDtemp = np.array(SKindex.col_values(0))[12:]
subID = [x[:-2].zfill(6) for x in subIDtemp]
# print(subID)
counter = 0

for i in range(len(subID)):
    if i > 0 and subID[i] == subID[i-1]:
        pass
    else:
        patientPath = os.path.join(path, 'patient', subID[i])
        maskPath = os.path.join(path, 'mask', subID[i])
        dicomSlices, dicomNames = count_file_number(patientPath, '.dcm')
        originalImage = dcmseriesread(patientPath)
        maskNumber, maskNames = count_file_number(maskPath, '.nrrd')

        for maskName in maskNames:
            print([str(subID[i]), maskName, 'processing....'])
            maskMatrix, options = nrrd.read(os.path.join(maskPath, maskName))
            maskSlices = maskMatrix.shape[-1]

            if maskSlices == dicomSlices:
                maskImage = sitk.ReadImage(os.path.join(maskPath, maskName))
                header, radiomicsList = radiomics_feature_extractor(originalImage, maskImage)
                with open(os.path.join('/Desktop/data_files', 'radiomics_feature_all.csv'), 'a', newline='') as outcsv:
                    writer = csv.writer(outcsv)
                    if counter == 0:
                        writer.writerow(['patientID', 'maskName'] + header)
                    writer.writerow([str(subID[i]), maskName] + radiomicsList)
                    counter += 1

            else:
                with open(os.path.join('/Desktop/data_files', 'radiomics_feature_all.csv'), 'a', newline='') as outcsv:
                    writer = csv.writer(outcsv)
                    writer.writerow([str(subID[i]), maskName])

你可能感兴趣的:(Python应用,python)