clear;clc
NumROI = 37; % ROI数目
NumCon = 605; %连接数目
% 选择病人组数据文件夹
% path1 = spm_select(1,'dir','please select patients dir');
path1 ='D:\siying\42ML_day3\nnnnnn\FC\Patient'; %和第7行用一个即可,为手动改路径
file1 = dir([path1,filesep, '*.txt']);
if isempty(file1)
file1 = dir([path1,filesep, '*.txt']);
end
% 选择正常人组数据文件夹
% path2 = spm_select(1,'dir','please select hc dir');
path2 ='D:\siying\42ML_day3\nnnnnn\FC\HC';
file2 = dir([path2,filesep, '*.txt']);
if isempty(file2)
file2 = dir([path2,filesep, '*.txt']);
end
% 是否选择连接mask %这部分mask的部分,使用全脑或者自己选定的mask,要根据差异连接的位置,事先制作mask,step1有制作方法
choice = questdlg('Add connection mask?', 'Mask', 'YES','NO','YES');
if isequal(choice,'YES')
[FileName,PathName] = uigetfile('*.mat','select connection mask');
load([PathName,filesep,FileName]);
conn_msk = double(R~=0);; %mask矩阵的名字是R
clear PathName FileName
else
conn_msk = ones(NumROI);
end
Ind_01 = find(triu(ones(NumROI),1));
Ind_02 = find(conn_msk(Ind_01) ~= 0);
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
%% 读取功能连接数据
DATA_sad = zeros(length(file1), length(Ind_02));
for i = 1:length(file1)
if strcmp(file1(i).name(end-3:end),'.mat')
load([path1,filesep, file1(i).name])
DATA_sad(i,:) = corROI4(Ind_01(Ind_02)); %功能连接矩阵名字是corROI4
elseif strcmp(file1(i).name(end-3:end),'.txt')
corROI4 = load([path1,filesep, file1(i).name]);
DATA_sad(i,:) = corROI4(Ind_01(Ind_02));
end
end
DATA_hc = zeros(length(file2), length(Ind_02));
for i = 1:length(file2)
if strcmp(file2(i).name(end-3:end),'.mat')
load([path2,filesep, file2(i).name])
DATA_hc(i,:) = corROI4(Ind_01(Ind_02)); %功能连接矩阵名字是corROI4
elseif strcmp(file2(i).name(end-3:end),'.txt')
corROI4 = load([path2,filesep, file2(i).name]);
DATA_hc(i,:) = corROI4(Ind_01(Ind_02));
end
end
% 两组特征合并
DATA = [DATA_sad;DATA_hc];
label = [ones(size(DATA_sad,1),1); -1*ones(size(DATA_hc,1),1)];% 病人为1,对照为-1
%% svm分类
% 留一法交叉验证
% 创建空数据,用于保存每一折的特征权重
w = zeros(size(DATA,1), NumCon);
h = waitbar(0,'please wait..');
for i = 1:size(DATA,1)
waitbar(i/size(DATA,1),h,[num2str(i),'/',num2str(size(DATA,1))])
new_DATA = DATA;
new_label = label;
test_data = DATA(i,:);new_DATA(i,:) = []; train_data = new_DATA;
test_label = label(i,:);new_label(i,:) = [];train_label = new_label;
% F_score feature selection
% training data in the sad group
data_sad = train_data(train_label==1,:);
% training data in the hc group
data_hc = train_data(train_label==-1,:);
x_sad = mean(data_sad);
x_hc = mean(data_hc);
x_data = mean(train_data);
for k = 1:size(train_data,2)
for j = 1:size(train_data,1)
if j<=size(data_sad,1)
AA_sad(j) = (train_data(j,k)-x_sad(k))^2;
sum_sad(k) = sum(AA_sad);
else
BB_hc(j-size(data_sad,1)) = (train_data(j,k)-x_hc(k))^2;
sum_hc(k) = sum(BB_hc);
end
end
F(i,k) = ((x_sad(k)-x_data(k))^2+(x_hc(k)-x_data(k))^2)/((sum_sad(k)/(size(data_sad,1)-1))+(sum_hc(k)/(size(data_hc,1)-1)));
end
clear x_sad x_hc x_data AA_sad sum_sad BB_hc sum_hc
% % t-score for feature selection
% [~,~,~,stat] = ttest2(train_data(train_label==1,:),train_data(train_label==-1,:));
% F(i,:) = abs(stat.tstat);
% clear stat
% % correlation for feature selection
% r = corr(train_data, train_label,'type','Spearman');
% F(i,:) = abs(r);
[~,IX] = sort(F(i,:),'descend');
% 保存第i折的特征F分数排序
order(i,:) = IX;
% % parameter selection%替换 复制之前的
% [bestacc,bestc] = SVMcgForClass_NoDisplay_linear(train_label,train_data(:,IX(1:NumCon)),-5,5,5,1);
% cmd = ['-t 0', '-c ',num2str(bestc)];
% % cmd = '-t 0';
% parameter selection %分类的超参数寻优
bestcv = 0; % 初始化%mse mse(做预测)是越小越好,所以一开始设定的是inf,为无穷大,做分类是0开始
for log2c = -5:0.2:5 %根据自己的研究而定,为网格搜索的数值和步长,注意为指数形式
% -v 交叉验证参数(即折数),寻找训练集最高分类mse
% -c SVR中的惩罚系数
% -p SVR中的epsilon
cmd = ['-v 5 -s 0 -t 0 -c ', num2str(2^log2c)];
% 使用以上参数在训练集上进行训练,返回预测mse
cv = svmtrain(train_label, train_data(:,IX(1:NumCon)), cmd);%%如果仍是x的情况,显示不可以识别,老师在step1把NumCon改为x,这里有具体的NumCon的值,要替换回来
% 如果使用 当前参数得到的mse 比 之前参数得到的最低的mse 还低,则记录下来
if (cv > bestcv)
bestcv = cv; bestc = 2^log2c;
fprintf('%g %g %g (best c=%g, rate=%g)\n', log2c, cv, bestc, bestcv);
end
end
% 保存最优超参数
bestcmd=['-s 0 -t 0 -c ',num2str(bestc)]; %-s n -t 0 -cq其中n=0时做的是分类 n=3是回归
% 使用时最优超参数做训练和预测,并汇总特征权重 %保留 在这之前做F检验和参数寻优
model = svmtrain(train_label,train_data(:,IX(1:NumCon)), bestcmd);
w(i,:) = model.SVs'*model.sv_coef; %支持向量的权重
[predicted_label, accuracy, deci] = svmpredict(test_label,test_data(:,IX(1:NumCon)),model);
acc(i) = accuracy(1);
deci_value(i) = deci;
p_label(i)=predicted_label;
clear k j model cmd
end
close(h)
acc_final = mean(acc);
% %AUC
[X,Y,T,AUC] = perfcurve(label,deci_value,1);
disp(['AUC= ', num2str(AUC)]);
figure;plot(X,Y);hold on;plot(X,X,'-');
xlabel('False positive rate'); ylabel('True positive rate');
for i=1:length(X)
Cut_off(i,1) = (1-X(i))*Y(i);
end
[~,maxind] = max(Cut_off);
disp(['Specificity= ', num2str(1-X(maxind))]);
disp(['Sensitivty= ', num2str(Y(maxind))]);
% 混淆矩阵
figure;
cmc = confusionchart(label, p_label);
% cm = cmc.NormalizedValues;
cm = confusionmat(label, p_label);
sensitivity = cm(2,2) / sum(cm(2,:));
specificity = cm(1,1) / sum(cm(1,:));
disp(['sensitivity: ',num2str(sensitivity)]);
disp(['specificity: ',num2str(specificity)]);
%% find consensus feature
% 对于交叉验证的每一折 %对于每一折都要转换连接矩阵,并找到对应位置
for i=1:size(order,1)
% 创建一个空的FC矩阵
A = zeros(NumROI);
% 在上三角、mask内、筛选出的特征的位置,放入其权重值
A(Ind_01(Ind_02((order(i,1:NumCon))))) = w(i,:);
% 上三角矩阵变为对称矩阵
A = A+A';
% 保存第i折的特征权重矩阵
cons_feature(:,:,i) = A;
clear A
end
cons_feature_mask = double(sum(cons_feature ~= 0,3)==size(DATA,1)); %计算一致性特征矩阵mask,判断是否等于折数就是一致性特征
cons_feature_sign = sign(mean(cons_feature,3)).*cons_feature_mask; %得到(正负权重)连接mask 权重根据核函数算出来的
cons_feature_mean = mean(cons_feature,3).*cons_feature_mask; %所有折的平均权重
% 一致性特征平均权重的绝对值
cons_feature_abs = abs(mean(cons_feature,3)).*cons_feature_mask;
figure;imagesc(cons_feature_mean);colorbar;
% 上三角中有权重特征位置和权重
[lx,ly,lz]=find(triu(cons_feature_mean));
cons_feature_label=[lx,ly,lz]; %lx 所在的行,ly所在的列,lz权重的大小 %以上代码跑起来都没问题
%% permutation test
permut = 10;
acc_final_rand = zeros(permut,1);
h = waitbar(0,'please wait..');
for i=1:permut
waitbar(i/permut,h,['permutation:',num2str(i),'/',num2str(permut)]);
randlabel = randperm(length(label));
label_r = label(randlabel);
for j=1:size(DATA,1)
new_DATA = DATA;
new_label = label_r;
test_data = new_DATA(j,:); new_DATA(j,:) = []; train_data = new_DATA;
test_label = new_label(j,:); new_label(j,:) = []; train_label = new_label;
% F_score feature selection
% training data in the sad group
data_sad = train_data(train_label==1,:);
% training data in the hc group
data_hc = train_data(train_label==-1,:);
x_sad = mean(data_sad);
x_hc = mean(data_hc);
x_data = mean(train_data);
for k = 1:size(train_data,2)
for jj = 1:size(train_data,1)
if jj<=size(data_sad,1)
AA_sad(jj) = (train_data(jj,k)-x_sad(k))^2;
sum_sad(k) = sum(AA_sad);
else
BB_hc(jj-size(data_sad,1)) = (train_data(jj,k)-x_hc(k))^2;
sum_hc(k) = sum(BB_hc);
end
end
F(i,k) = ((x_sad(k)-x_data(k))^2+(x_hc(k)-x_data(k))^2)/((sum_sad(k)/(size(data_sad,1)-1))+(sum_hc(k)/(size(data_hc,1)-1)));
end
clear x_sad x_hc x_data AA_sad sum_sad BB_hc sum_hc
% % t-score for feature selection
% [~,~,~,stat] = ttest2(train_data(train_label==1,:),train_data(train_label==-1,:));
% F(i,:) = abs(stat.tstat);
% clear stat
% % correlation for feature selection
% r = corr(train_data, train_label,'type','Spearman');
% F(i,:) = abs(r);
[~,IX] = sort(F,'descend');
% parameter selection %改成F检验
% [bestacc,bestc] = SVMcgForClass_NoDisplay_linear(train_label,train_data(:,IX(1:NumCon)),-5,5,5,1);
% cmd = ['-t 0', '-c ',num2str(bestc)];
% cmd = '-t 0';
% parameter selection %分类的超参数寻优
bestcv = 0; % 初始化mse mse是越小越好,所以一开始设定的是inf,为无穷大
for log2c = -5:0.2:5
% -v 交叉验证参数(即折数),寻找训练集最高分类mse
% -c SVR中的惩罚系数
% -p SVR中的epsilon
cmd = ['-v 5 -s 0 -t 0 -c ', num2str(2^log2c)];
% 使用以上参数在训练集上进行训练,返回预测mse
cv = svmtrain(train_label, train_data(:,IX(1:NumCon)), cmd);% X是功能连接数量
% 如果使用 当前参数得到的mse 比 之前参数得到的最低的mse 还低,则记录下来
if (cv > bestcv)
bestcv = cv; bestc = 2^log2c;
fprintf('%g %g %g (best c=%g, rate=%g)\n', log2c, cv, bestc, bestcv);
end
end
% 保存最优超参数
bestcmd=['-s 0 -t 0 -c ',num2str(bestc)]; %-s n -t 0 -cq其中n=0时做的是分类 n=3是回归
model = svmtrain(train_label,train_data(:,IX(1:NumCon)), bestcmd);
[predicted_label accuracy deci] = svmpredict(test_label,test_data(:,IX(1:NumCon)),model);
acc_r(j) = accuracy(1);
clear B IX order strength cmd bestacc bestc F
end
acc_final_rand(i) = mean(acc_r);
clear randlabel acc_r label_r
end
close(h);
acc_pvalue = mean([acc_final_rand; acc_final] >= acc_final);
% save('FC_svm_roc_result.mat','acc_final','acc_pvalue','AUC','cons_feature_mean','cons_feature_mask','cons_feature_label');
这段MATLAB代码的主要功能是基于功能连接(FC)数据,利用支持向量机(SVM)进行分类,并通过留一法交叉验证、特征选择、超参数寻优、一致性特征分析以及置换检验等步骤,评估分类性能和特征的显著性。以下是对代码功能的详细介绍:
NumROI
和连接数量 NumCon
。.txt
或 .mat
文件。DATA
,对应的标签合并为 label
。c
)。acc
、决策值 deci_value
和预测标签 p_label
。acc_final
。cons_feature
。cons_feature_mask
,得到正负权重连接掩码 cons_feature_sign
和平均权重矩阵 cons_feature_mean
。cons_feature_label
。permut
次)置换检验,每次随机打乱标签顺序,重复上述特征选择、超参数寻优和模型训练过程,记录每次置换的平均分类准确率 acc_final_rand
。acc_pvalue
,评估分类结果的显著性。FC_svm_roc_result.mat
文件中。