DNA甲基化芯片可以自行提取GenomeStudio检测的数据,通常有40多万行,每一行是一个CpG位点,每一列是一个标本,需要按列分组进行差异分析
一些其他微阵列芯片数据,按列分组进行差异分析
其他需要对多行分别进行按列分组差异分析的情况
Low1 <- runif(50, min = 0, max = 0.333)
Low2 <- runif(50, min = 0, max = 0.333)
Low3 <- runif(50, min = 0, max = 0.333)
Low4 <- runif(50, min = 0, max = 0.333)
Low5 <- runif(50, min = 0, max = 0.333)
Low6 <- runif(50, min = 0, max = 0.333)
Low7 <- runif(50, min = 0, max = 0.333)
Low8 <- runif(50, min = 0, max = 0.333)
Low9 <- runif(50, min = 0, max = 0.333)
Low10 <- runif(50, min = 0, max = 0.333)
Med1 <- runif(50, min = 0.334, max = 0.666)
Med2 <- runif(50, min = 0.334, max = 0.666)
Med3 <- runif(50, min = 0.334, max = 0.666)
Med4 <- runif(50, min = 0.334, max = 0.666)
Med5 <- runif(50, min = 0.334, max = 0.666)
Med6 <- runif(50, min = 0.334, max = 0.666)
Med7 <- runif(50, min = 0.334, max = 0.666)
Med8 <- runif(50, min = 0.334, max = 0.666)
Med9 <- runif(50, min = 0.334, max = 0.666)
Med10 <- runif(50, min = 0.334, max = 0.666)
High1 <- runif(50, min = 0.667, max = 1)
High2 <- runif(50, min = 0.667, max = 1)
High3 <- runif(50, min = 0.667, max = 1)
High4 <- runif(50, min = 0.667, max = 1)
High5 <- runif(50, min = 0.667, max = 1)
High6 <- runif(50, min = 0.667, max = 1)
High7 <- runif(50, min = 0.667, max = 1)
High8 <- runif(50, min = 0.667, max = 1)
High9 <- runif(50, min = 0.667, max = 1)
High10 <- runif(50, min = 0.667, max = 1)
data<-data.frame(Low1,Low2,Low3,Low4,Low5,Low6,Low7,Low8,Low9,Low10,
Med1,Med2,Med3,Med4,Med5,Med6,Med7,Med8,Med9,Med10,
High1,High2,High3,High4,High5,High6,High7,High8,High9,High10
)
cn<-colnames(data)#获取数据的原始列名,产生一列新的向量
g<-c('low','low','low','low','low','low','low','low','low','low',
'medium','medium','medium','medium','medium','medium','medium','medium','medium','medium',
'high','high','high','high','high','high','high','high','high','high')#针对每一列重新按照分组,产生新的一列命名向量
design<-data.frame(cn,g)#合并两个向量,作为后续按列分组的设计数据框
x<-as.data.frame(data)#虽然之前是按照数据框合并的向量,但是很多情况下数据是矩阵,需要转成数据框格式
PvalueF<-c(rep(0,nrow(x)))
Pvaluekw<-c(rep(0,nrow(x)))
按照每一行分开,分行提取数据为ab,按照设计按列分组,前十列为低组,中间十列为中等组,后十列为高组,每一行的数据和每一行的设计分组合并成一个数据框aa,后续针对aa进行差异分析。
for(i in 1:nrow(x))
{
ab<-as.numeric(x[i,1:30])
b<-design$g
aa<-data.frame(ab,b)
y<-aov(ab~b,data=aa)
PvalueF[i]<-summary(y)[[1]][,5][1]
}
for(i in 1:nrow(x))
{
ab<-as.numeric(x[i,1:30])
b<-design$g
aa<-data.frame(ab,b)
y1=kruskal.test(ab~b,data=aa)
Pvaluekw[i]<-y1$p.value
}
for(i in 1:nrow(x))
{
ab<-as.numeric(x[i,1:30])
b<-design$g
aa<-data.frame(ab,b)
y<-aov(ab~b,data=aa)
PvalueF[i]<-summary(y)[[1]][,5][1]
y1=kruskal.test(ab~b,data=aa)
Pvaluekw[i]<-y1$p.value
}
Totaloutcome<-cbind(data,PvalueF,Pvaluekw)
wantF<-Totaloutcome[Totaloutcome$PvalueF<0.05,]
wantKW<-Totaloutcome[Totaloutcome$Pvaluekw<0.05,]
[1]: 网址 https://mp.weixin.qq.com/s?__biz=MzUzMjYyMDE2OQ==&mid=2247484291&idx=1&sn=39b8f25c31286632ba096b59e955c558&chksm=fab13565cdc6bc7337d400a054ac142697605f6178ec495d8e8ed0c5efc1966d6b76f940fe34&mpshare=1&scene=1&srcid=0519ovSQdmsTXJhq2T1ur5tr&key=680668475bb5566e0b020314321fb7c4a3405700348f55b5013a5317da1b7eec0d2e455db638cba02d8bc0746a0241b76e4cf2d351fc91f1eea274440a0961a7188091704bdde399f698c8fcb10a8498&ascene=0&uin=MjUwNzM1NzUwMQ%3D%3D&devicetype=iMac+MacBookAir7%2C2+OSX+OSX+10.12.6+build(16G29)&version=12010310&nettype=WIFI&lang=zh_CN&fontScale=100&pass_ticket=kKe%2FY0RakLyhPzNU1UAzu%2BNgUZ99e6LQVYB5P8O0uangpGcpSMXna8zFInUJ7kw0
何世伟
厦门大学公共卫生硕士
研究方向:表观遗传流行病学、循证医学、生物信息学
联系方式:[email protected]