转录组专题:edgeR以raw count数据为起点做差异表达分析

参考:

  • https://www.jianshu.com/p/5f94ae79f298
library(edgeR)
#载入row count文件
raw_count<-read.csv(file.choose(),header = T,sep = ",") 
head(raw_count)

#构建DGEList对象
group<-factor(c(rep("ASD",2),rep("Healthy",4),rep("ASD",1),rep("Healthy",2),rep("ASD",3)),
                levels = c("ASD","Healthy"))
genelist<-DGEList(counts = as.matrix(raw_count), group = group)

#过滤low counts数据:利用CPM标准化
keep<-rowSums(cpm(genelist) > 0.5 ) >=2
table(keep)
genelist.filted<-genelist[keep, ,keep.lib.sizes=FALSE]

#使用TMM算法对DGEList标准化
genelist.norm<-calcNormFactors(genelist.filted)

#实验设计矩阵
design<-model.matrix(~0+group)
colnames(design)<-levels(group)
design

#估计离散值(Dispersion)
genelist.Disp<-estimateDisp(genelist.norm, design, robust = TRUE)
plotBCV(genelist.Disp)

#quasi-likelihood (QL)拟合NB模型:用于解释生物学和技术性导致的基因特异性变异
fit<-glmQLFit(genelist.Disp, design, robust=TRUE)
head(fit$coefficients)

#差异表达检验:构建比较矩阵
cntr.vs.KD<-makeContrasts(control-ASD, levels = design)
res<-glmQLFTest(fit, contrast = cntr.vs.KD)
ig.edger<-res$table[p.adjust(res$table$PValue, method = "BH") < 0.05, ]

#提取显著性差异的基因
topTags(res,n=10)
is.de <- decideTestsDGE(res)
summary(is.de)

plotMD(res, status=is.de, values=c(1,-1), col=c("red","blue"),
       legend="topright")

你可能感兴趣的:(转录组专题:edgeR以raw count数据为起点做差异表达分析)