接上一篇代谢组数据,简单的写了一个T检验的程序,若需要优化,望告知!
#导入python统计包
#import numpy as np
import scipy.stats as stats
#import scipy.optimize as opt
#打开文件
f1 = open("./metabolites.txt", "r")
#a代表追加,不覆盖
f2 = open("./T.txt", "a")
#按照行读取
lines = f1.readlines()
#每行进行读取
i = 1
j = 7
for line in lines [1:]:
LINE_1 = line.strip().split(" ")
for i in range(1, len(LINE), 3):
G1 = LINE_1[i:i+3]
G2 = LINE_1[i+6:i+9]
#转换为浮点数列表
X = [float(f) for f in G1]
Y = [float(y) for y in G2]
#显著相关及P值
#r, p = stats.pearsonr(X, Y)
#T检验,若样本大小、方差与比较组不同,在运用t检验时需要使用Welch"s t-test
#即指定ttest_ind中的equal_var=False。
stat_var, p_var = stats.ttest_ind(X, Y, equal_var=False)
print("%s %d %d: %f,%f"%(LINE_1[0], i, i+6, stat_var, p_var))
#print("%s %d %d: %f,%f"%(LINE_1[0],i,i+6,r,p))
#print(X, Y)
#Results_T = LINE[0] + " "+str(i)+" "+str(i+6)+ ":" + str(stat_var) + "," + str(p_var)
f2.write("%s %d %d: %f,%f "%(LINE_1[0], i, i+6, stat_var, p_var))
if i >= 28:
break
f1.close()
f2.close()