1、【ADNI】数据预处理(1)SPM,CAT12
2、【ADNI】数据预处理(2)获取 subject slices
3、【ADNI】数据预处理(3)CNNs
4、【ADNI】数据预处理(4)Get top k slices according to CNNs
5、【ADNI】数据预处理(5)Get top k slices (pMCI_sMCI) according to CNNs
6、【ADNI】数据预处理(6)ADNI_slice_dataloader ||| show image
Author: Chaoqun Hou
where: hcq_research
[ hcq@research:~/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset$ ]
data: 20180407
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import re
import time
import datetime
import shutil
import random
from hcq_lib import *
train_percentage = 0.75
val_percentage = 0.2
test_percentage = 0.05
## AD/NC
# root_txt_path = "/home/hcq/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/AD_NC/AD_NC_GM_subject_id"
# dataset_path = os.path.join(root_txt_path, "AD_NC_GM_subject_id_20180403")
## pMCI/sMCI
root_txt_path = "/home/hcq/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset"
dataset_path = os.path.join(root_txt_path, "sMCI_pMCI_GM_subject_id_20180405")
top_k_silces_id_txt = os.path.join(root_txt_path, "top_k_slices.txt")
log_path = os.path.join(root_txt_path, "random_log", "random_log.txt")
root_new_path = "/home/hcq/alzheimer_disease/ADNI_825/experiments_FineTunning/"
dataset_name = "majority_select_slices_folder_01_pMCI_sMCI"
def partition_slice_train_val_test(silce_txt, dataset_dir, label):
## rules ##
## old_name = root_path + subject_id + top_k_slices_id
## subject_id: random select
## top_k_slices_id: majority select by CNNs
## step1: get the subject id and partition these subject into train/val/test folders as the ratio (7.5 : 2 : 0.05)
## added by hcq 20180404
train_subject_id = []
val_subject_id = []
test_subject_id = []
# get subject_id list
subject_id_list = []
with open(silce_txt, "r") as silce_txt_list:
for item in silce_txt_list:
item = item.replace("\n", "")
item = item.replace("\r", "")
# print(item)
subject_id = item.split('/')[3]
if(subject_id not in subject_id_list):
subject_id_list.append(subject_id)
num_train = 0
num_val = 0
num_test = 0
len_slice_list = len(subject_id_list)
rondom_list = random.sample(range(0, len_slice_list), len_slice_list)
hcq_write(log_path, True, True, "rondom_list [{}]".format(label))
hcq_write(log_path, False, False, rondom_list)
for i in range(len_slice_list):
random_id = rondom_list[i]
if(num_train < int(len_slice_list*train_percentage)):
# print("[Train] {}".format(subject_id_list[random_id]))
train_subject_id.append(subject_id_list[random_id])
num_train += 1
elif(num_val < int(len_slice_list*val_percentage)):
# print("[val] {}".format(subject_id_list[random_id]))
val_subject_id.append(subject_id_list[random_id])
num_val += 1
else:
# print("[test] {}".format(subject_id_list[random_id]))
test_subject_id.append(subject_id_list[random_id])
num_test += 1
# print("[len_slice_list] {}".format(len_slice_list))
# print("[num_train] {}".format(num_train))
# print("[num_val] {}".format(num_val))
# print("[num_test] {}".format(num_test))
hcq_write(log_path, True, True, "[len_slice_list] {}".format(len_slice_list))
hcq_write(log_path, True, True, "[num_train] {}".format(num_train))
hcq_write(log_path, True, True, "[num_val] {}".format(num_val))
hcq_write(log_path, True, True, "[num_test] {} \n".format(num_test))
### step2: according to top_k_silces_id_txt, majority select top k slices;
### added by hcq 20180404
move_slice(train_subject_id, dataset_dir, "train", label)
move_slice(val_subject_id, dataset_dir, "validation", label)
move_slice(test_subject_id, dataset_dir, "test", label)
def move_slice(subject_id_folder_list, dataset_dir, folder_name, label):
new_name_path = os.path.join(root_new_path, dataset_name, folder_name, label)
hcq_create_dir(new_name_path)
for subject_id in subject_id_folder_list:
with open(top_k_silces_id_txt, "r") as top_k_silces_id_txt_list:
for item in top_k_silces_id_txt_list:
item = item.replace("\n", "")
item = item.replace("\r", "")
slice_id = item.split('|||')[0]
slice_id = slice_id + ".jpg"
if "X" in slice_id:
old_name = os.path.join(dataset_dir, subject_id, "XSlice", slice_id)
elif("Y" in slice_id):
old_name = os.path.join(dataset_dir, subject_id, "YSlice", slice_id)
elif("Z" in slice_id):
old_name = os.path.join(dataset_dir, subject_id, "ZSlice", slice_id)
slice_name = subject_id + "_" + slice_id
new_name = os.path.join(new_name_path, slice_name)
# print(old_name)
# print(new_name)
hcq_write(log_path, True, True, new_name)
shutil.copyfile(old_name, new_name)
if __name__=="__main__":
hcq_write(log_path, True, True, "="*40)
###
pMCI_silce_txt = os.path.join(root_txt_path, "pMCI_gray_matter_Slices_path.txt")
sMCI_silce_txt = os.path.join(root_txt_path, "sMCI_gray_matter_Slices_path.txt")
print("pMCI_silce_txt = {}".format(pMCI_silce_txt))
print("sMCI_silce_txt = {}".format(sMCI_silce_txt))
dataset_pMCI = os.path.join(dataset_path, "pMCI_gray_matter_Slices")
dataset_sMCI = os.path.join(dataset_path, "sMCI_gray_matter_Slices")
partition_slice_train_val_test(pMCI_silce_txt, dataset_pMCI, "pMCI")
partition_slice_train_val_test(sMCI_silce_txt, dataset_sMCI, "sMCI")
hcq@research:~/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset$ ls
get_top_k_slices_MCI.py hcq_lib.pyc random_log sMCI_pMCI_GM_subject_id_20180405 top_k_slices.txt
hcq_lib.py pMCI_gray_matter_Slices_path.txt sMCI_gray_matter_Slices_path.txt sMCI_pMCI_GM_subject_id_20180405.zip
hcq@research:~/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset$
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/014_S_0563/YSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/014_S_0563/ZSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/014_S_0563/XSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/051_S_1331/YSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/051_S_1331/ZSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/051_S_1331/XSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/126_S_1077/YSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/126_S_1077/ZSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/126_S_1077/XSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/136_S_0695/YSlice
./sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices/136_S_0695/ZSlice
hcq@research:~/alzheimer_disease/alzheimer_disease_DL/ADNI_majority_selection/pMCI_sMCI/dataset/sMCI_pMCI_GM_subject_id_20180405/pMCI_gray_matter_Slices$ tree -L 2
.
├── 002_S_0729
│ ├── XSlice
│ ├── YSlice
│ └── ZSlice
├── 002_S_0954
│ ├── XSlice
│ ├── YSlice
│ └── ZSlice
├── 002_S_1070
│ ├── XSlice
│ ├── YSlice
│ └── ZSlice
├── 003_S_1057
hcq@research:~/alzheimer_disease/ADNI_825/experiments_FineTunning/majority_select_slices_folder_01_pMCI_sMCI$ tree -L 2
.
├── test
│ ├── pMCI
│ └── sMCI
├── train
│ ├── pMCI
│ └── sMCI
└── validation
├── pMCI
└── sMCI
9 directories, 0 files