# -*- coding: UTF-8 -*-
import scipy.io as sio
import numpy as np
from scipy import stats
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2
from sklearn.feature_selection.univariate_selection import f_classif
test_set = sio.loadmat('/run/media/zlf/WMU1/Examples/PathologicalSection/PulmonarySquamousCellCarcinoma/OpticalParameters/CrossValidation1/WithoutAugment/SVM/dataset_test.mat')
print( test_set.keys() )
test_set = test_set['dataset_test'];
train_set = sio.loadmat('/run/media/zlf/WMU1/Examples/PathologicalSection/PulmonarySquamousCellCarcinoma/OpticalParameters/CrossValidation1/WithoutAugment/SVM/dataset_train.mat')
print( train_set.keys() )
train_set = train_set['dataset_train'];
data_set = np.vstack((test_set,train_set))
a = data_set.shape
print(a)
cancer_num = 0;
normal_num = 0;
for i in range(a[0]):
if data_set[i][0] == 0:
normal_num = normal_num + 1
else:
cancer_num = cancer_num + 1
normal = np.zeros((normal_num, a[1]))
cancer = np.zeros((cancer_num, a[1]))
n=0
c=0
for i in range(a[0]):
if data_set[i][0] == 0:
normal[n,:] = data_set[i,:]
n = n+1;
else:
cancer[c,:] = data_set[i,:]
c = c+1;
print("normal mean", np.mean(normal, axis=0))
print("cancer mean", np.mean(cancer, axis=0))
print( "normal std", np.std(normal, axis=0) )
print( "cancer std", np.std(cancer, axis=0) )
print("levene检验P值:")
for i in range(a[1]-1):
#检验结果为p>0.05所以,可以认为方差是相等的
print stats.levene(normal[:, i+1], cancer[:, i+1])
print("T检验P值:")
for i in range(a[1]-1):
print stats.ttest_ind(normal[:, i+1], cancer[:, i+1], equal_var=False)
model1 = SelectKBest(chi2, k=2)#选择k个最佳特征
model1.fit_transform(data_set[:,1:], data_set[:,0])
print("卡方检验P值:")
print(model1.pvalues_)
#F检验又叫方差齐性检验。在两样本t检验中要用到F检验。
#从两研究总体中随机抽取样本,要对这两个样本进行比较的时候,首先要判断两总体方差是否相同,即方差齐性。若两总体方差相等,则直接用t检验,若不等,可采用t"检验或变量变换或秩和检验等方法。
#其中要判断两总体方差是否相等,就可以用F检验。
model1 = SelectKBest(f_classif, k=2)#选择k个最佳特征
model1.fit_transform(data_set[:,1:], data_set[:,0])
print("F检验P值:")
print(model1.pvalues_)
运行结果:
['dataset_test', '__version__', '__header__', '__globals__']
['__version__', '__header__', 'dataset_train', '__globals__']
(1391, 6)
('normal mean', array([ 0. , 0.33674211, 0.04838059, 0.98179978, 1.46426259,
1.99153562]))
('cancer mean', array([ 1. , 0.2734623 , 0.06471866, 0.97679533, 1.63713706,
2.23947017]))
('normal std', array([ 0. , 0.12841319, 0.01553056, 0.00642903, 0.30787207,
0.40030149]))
('cancer std', array([ 0. , 0.10698916, 0.01872662, 0.00916271, 0.27047442,
0.41267499]))
levene检验P值:
LeveneResult(statistic=29.085042884065722, pvalue=8.1320081891629079e-08)
LeveneResult(statistic=22.282187812606857, pvalue=2.5915957909583776e-06)
LeveneResult(statistic=33.988727314573119, pvalue=6.883408903605969e-09)
LeveneResult(statistic=19.719964608688482, pvalue=9.676453499580445e-06)
LeveneResult(statistic=0.053959413955603178, pvalue=0.81634571171496351)
T检验P值:
Ttest_indResult(statistic=9.4405805829989511, pvalue=2.8221198413129869e-20)
Ttest_indResult(statistic=-17.538192041319256, pvalue=9.8692548824623449e-62)
Ttest_indResult(statistic=11.924177538503574, pvalue=3.0575194937779589e-31)
Ttest_indResult(statistic=-10.585331590339845, pvalue=7.1713527494476559e-25)
Ttest_indResult(statistic=-11.038431648240406, pvalue=5.8138480585835281e-27)
卡方检验P值:
[ 0.03618659 0.22331863 0.92726836 0.01286069 0.00226231]
F检验P值:
[ 2.65290709e-22 1.96399866e-57 8.74814259e-27 9.18920268e-27
7.60914713e-27]