python levene检验、T检验、卡方检验、F检验


# -*- coding: UTF-8 -*-
import scipy.io as sio
import numpy as np
from scipy import stats
from sklearn.feature_selection import SelectKBest  
from sklearn.feature_selection import chi2 
from sklearn.feature_selection.univariate_selection import f_classif

test_set = sio.loadmat('/run/media/zlf/WMU1/Examples/PathologicalSection/PulmonarySquamousCellCarcinoma/OpticalParameters/CrossValidation1/WithoutAugment/SVM/dataset_test.mat')
print( test_set.keys() )
test_set = test_set['dataset_test'];


train_set = sio.loadmat('/run/media/zlf/WMU1/Examples/PathologicalSection/PulmonarySquamousCellCarcinoma/OpticalParameters/CrossValidation1/WithoutAugment/SVM/dataset_train.mat')
print( train_set.keys() )
train_set = train_set['dataset_train'];


data_set = np.vstack((test_set,train_set))
a = data_set.shape
print(a)

cancer_num = 0;
normal_num = 0;
for i in range(a[0]):
    if data_set[i][0] == 0:
        normal_num = normal_num + 1
    else:
        cancer_num = cancer_num + 1


normal = np.zeros((normal_num, a[1]))
cancer = np.zeros((cancer_num, a[1]))

n=0
c=0
for i in range(a[0]):
    if data_set[i][0] == 0:
        normal[n,:] = data_set[i,:]
        n = n+1;
    else:
        cancer[c,:] = data_set[i,:]
        c = c+1;


print("normal mean", np.mean(normal, axis=0))
print("cancer mean", np.mean(cancer, axis=0))

print( "normal std", np.std(normal, axis=0) )
print( "cancer std", np.std(cancer, axis=0) )


print("levene检验P值:")
for i in range(a[1]-1):
    #检验结果为p>0.05所以,可以认为方差是相等的
    print stats.levene(normal[:, i+1], cancer[:, i+1])


print("T检验P值:")
for i in range(a[1]-1):
    print stats.ttest_ind(normal[:, i+1], cancer[:, i+1], equal_var=False)  


model1 = SelectKBest(chi2, k=2)#选择k个最佳特征  
model1.fit_transform(data_set[:,1:], data_set[:,0])
print("卡方检验P值:")
print(model1.pvalues_)

#F检验又叫方差齐性检验。在两样本t检验中要用到F检验。
#从两研究总体中随机抽取样本,要对这两个样本进行比较的时候,首先要判断两总体方差是否相同,即方差齐性。若两总体方差相等,则直接用t检验,若不等,可采用t"检验或变量变换或秩和检验等方法。
#其中要判断两总体方差是否相等,就可以用F检验。
model1 = SelectKBest(f_classif, k=2)#选择k个最佳特征  
model1.fit_transform(data_set[:,1:], data_set[:,0])
print("F检验P值:")
print(model1.pvalues_)

运行结果:

['dataset_test', '__version__', '__header__', '__globals__']
['__version__', '__header__', 'dataset_train', '__globals__']
(1391, 6)
('normal mean', array([ 0.        ,  0.33674211,  0.04838059,  0.98179978,  1.46426259,
        1.99153562]))
('cancer mean', array([ 1.        ,  0.2734623 ,  0.06471866,  0.97679533,  1.63713706,
        2.23947017]))
('normal std', array([ 0.        ,  0.12841319,  0.01553056,  0.00642903,  0.30787207,
        0.40030149]))
('cancer std', array([ 0.        ,  0.10698916,  0.01872662,  0.00916271,  0.27047442,
        0.41267499]))
levene检验P值:
LeveneResult(statistic=29.085042884065722, pvalue=8.1320081891629079e-08)
LeveneResult(statistic=22.282187812606857, pvalue=2.5915957909583776e-06)
LeveneResult(statistic=33.988727314573119, pvalue=6.883408903605969e-09)
LeveneResult(statistic=19.719964608688482, pvalue=9.676453499580445e-06)
LeveneResult(statistic=0.053959413955603178, pvalue=0.81634571171496351)
T检验P值:
Ttest_indResult(statistic=9.4405805829989511, pvalue=2.8221198413129869e-20)
Ttest_indResult(statistic=-17.538192041319256, pvalue=9.8692548824623449e-62)
Ttest_indResult(statistic=11.924177538503574, pvalue=3.0575194937779589e-31)
Ttest_indResult(statistic=-10.585331590339845, pvalue=7.1713527494476559e-25)
Ttest_indResult(statistic=-11.038431648240406, pvalue=5.8138480585835281e-27)
卡方检验P值:
[ 0.03618659  0.22331863  0.92726836  0.01286069  0.00226231]
F检验P值:
[  2.65290709e-22   1.96399866e-57   8.74814259e-27   9.18920268e-27
   7.60914713e-27]



你可能感兴趣的:(Python零基础)