TCGA数据库挖掘(2)DEG


title: "三大R包差异分析"
output: html_document
editor_options:
chunk_output_type: console


knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
knitr::opts_chunk$set(fig.width = 8,fig.height = 6,collapse = TRUE)
knitr::opts_chunk$set(message = FALSE)

三大R包差异分析

if(!require(stringr))install.packages('stringr') #检查R包安装了没
if(!require(ggplotify))install.packages("ggplotify")
if(!require(patchwork))install.packages("patchwork")
if(!require(cowplot))install.packages("cowplot")
if(!require(DESeq2))BiocManager::install('DESeq2')
if(!require(edgeR))BiocManager::install('edgeR')
if(!require(limma))BiocManager::install('limma')

rm(list = ls())
load("TCGA-CHOL_gdc.Rdata") ##加载数据
table(Group)  ##查看分组数据
#deseq2----
library(DESeq2)
colData <- data.frame(row.names =colnames(exp),   ##生成一个数据框,数据框是以表达矩阵的列名和分组信息来组成的
                      condition=Group)
if(!file.exists(paste0(cancer_type,"_dd.Rdata"))){
  dds <- DESeqDataSetFromMatrix(
  countData = exp,
  colData = colData,
  design = ~ condition)
  dds <- DESeq(dds)
  save(dds,file = paste0(cancer_type,"_dd.Rdata"))
}
load(paste0(cancer_type,"_dd.Rdata"))

# 两两比较
res <- results(dds, contrast = c("condition",rev(levels(Group))))
resOrdered <- res[order(res$pvalue),] # 按照P值排序
DEG <- as.data.frame(resOrdered)
head(DEG)

#添加change列标记基因上调下调
logFC_cutoff <- with(DEG,mean(abs(log2FoldChange)) + 2*sd(abs(log2FoldChange)) )
#logFC_cutoff <- 2
k1 = (DEG$pvalue < 0.05)&(DEG$log2FoldChange < -logFC_cutoff)
k2 = (DEG$pvalue < 0.05)&(DEG$log2FoldChange > logFC_cutoff)
DEG$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
table(DEG$change)
head(DEG)

DESeq2_DEG <- DEG
#edgeR----
library(edgeR)

dge <- DGEList(counts=exp,group=Group)
dge$samples$lib.size <- colSums(dge$counts)
dge <- calcNormFactors(dge) 

design <- model.matrix(~0+Group)
rownames(design)<-colnames(dge)
colnames(design)<-levels(Group)

dge <- estimateGLMCommonDisp(dge, design)
dge <- estimateGLMTrendedDisp(dge, design)
dge <- estimateGLMTagwiseDisp(dge, design)

fit <- glmFit(dge, design)
fit2 <- glmLRT(fit, contrast=c(-1,1)) 

DEG=topTags(fit2, n=nrow(exp))
DEG=as.data.frame(DEG)
logFC_cutoff <- with(DEG,mean(abs(logFC)) + 2*sd(abs(logFC)) )

#logFC_cutoff <- 2
k1 = (DEG$PValue < 0.05)&(DEG$logFC < -logFC_cutoff)
k2 = (DEG$PValue < 0.05)&(DEG$logFC > logFC_cutoff)
DEG$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))

head(DEG)
table(DEG$change)
edgeR_DEG <- DEG
###limma----
library(limma)

design <- model.matrix(~0+Group)
colnames(design)=levels(Group)
rownames(design)=colnames(exp)

dge <- DGEList(counts=exp)
dge <- calcNormFactors(dge)

v <- voom(dge,design, normalize="quantile")
fit <- lmFit(v, design)

constrasts = paste(rev(levels(Group)),collapse = "-")
cont.matrix <- makeContrasts(contrasts=constrasts,levels = design) 
fit2=contrasts.fit(fit,cont.matrix)
fit2=eBayes(fit2)

DEG = topTable(fit2, coef=constrasts, n=Inf)
DEG = na.omit(DEG)

logFC_cutoff <- with(DEG,mean(abs(logFC)) + 2*sd(abs(logFC)) )

#logFC_cutoff <- 2
k1 = (DEG$P.Value < 0.05)&(DEG$logFC < -logFC_cutoff)
k2 = (DEG$P.Value < 0.05)&(DEG$logFC > logFC_cutoff)
DEG$change = ifelse(k1,"DOWN",ifelse(k2,"UP","NOT"))
table(DEG$change)
head(DEG)

limma_voom_DEG <- DEG
tj = data.frame(deseq2 = as.integer(table(DESeq2_DEG$change)),
           edgeR = as.integer(table(edgeR_DEG$change)),
           limma_voom = as.integer(table(limma_voom_DEG$change)),
           row.names = c("down","not","up")
          );tj
save(DESeq2_DEG,edgeR_DEG,limma_voom_DEG,Group,tj,file = paste0(cancer_type,"_DEG.Rdata"))

可视化

rm(list = ls())
load("TCGA-CHOL_gdc.Rdata")
load("TCGA-CHOL_DEG.Rdata")
if(!require(tinyarray))devtools::install_local("tinyarray-master.zip",upgrade = F)
library(ggplot2)
library(tinyarray)
exp[1:4,1:4]
dat = log2(exp+1)
pca.plot = draw_pca(dat,Group);pca.plot
save(pca.plot,file = paste0(cancer_type,"_pcaplot.Rdata"))

cg1 = rownames(DESeq2_DEG)[DESeq2_DEG$change !="NOT"]
cg2 = rownames(edgeR_DEG)[edgeR_DEG$change !="NOT"]
cg3 = rownames(limma_voom_DEG)[limma_voom_DEG$change !="NOT"]

h1 = draw_heatmap(dat[cg1,],Group,scale_before = T)
h2 = draw_heatmap(dat[cg2,],Group,scale_before = T)
h3 = draw_heatmap(dat[cg3,],Group,scale_before = T)

m2d = function(x){
  mean(abs(x))+2*sd(abs(x))
}

v1 = draw_volcano(DESeq2_DEG,pkg = 1,logFC_cutoff = m2d(DESeq2_DEG$log2FoldChange))
v2 = draw_volcano(edgeR_DEG,pkg = 2,logFC_cutoff = m2d(edgeR_DEG$logFC))
v3 = draw_volcano(limma_voom_DEG,pkg = 3,logFC_cutoff = m2d(limma_voom_DEG$logFC))


library(patchwork)
(h1 + h2 + h3) / (v1 + v2 + v3) +plot_layout(guides = 'collect') &theme(legend.position = "none")

ggsave(paste0(cancer_type,"_heat_vo.png"),width = 15,height = 10)

三大R包差异基因对比

rm(list = ls())
load("TCGA-CHOL_gdc.Rdata")
load("TCGA-CHOL_DEG.Rdata")
load("TCGA-CHOL_pcaplot.Rdata")
UP=function(df){
  rownames(df)[df$change=="UP"]
}
DOWN=function(df){
  rownames(df)[df$change=="DOWN"]
}

up = intersect(intersect(UP(DESeq2_DEG),UP(edgeR_DEG)),UP(limma_voom_DEG))
down = intersect(intersect(DOWN(DESeq2_DEG),DOWN(edgeR_DEG)),DOWN(limma_voom_DEG))
dat = log2(exp+1)
hp = draw_heatmap(dat[c(up,down),],Group,scale_before = T)

#上调、下调基因分别画维恩图
up_genes = list(Deseq2 = UP(DESeq2_DEG),
          edgeR = UP(edgeR_DEG),
          limma = UP(limma_voom_DEG))

down_genes = list(Deseq2 = DOWN(DESeq2_DEG),
          edgeR = DOWN(edgeR_DEG),
          limma = DOWN(limma_voom_DEG))

up.plot <- draw_venn(up_genes,"UPgene")
down.plot <- draw_venn(down_genes,"DOWNgene")

#维恩图拼图,终于搞定

library(patchwork)
#up.plot + down.plot
# 拼图
pca.plot + hp+up.plot +down.plot+ plot_layout(guides = "collect")
ggsave(paste0(cancer_type,"_heat_ve_pca.png"),width = 15,height = 10)

GEO转录组数据的差异分析

image.png
library(GEOquery)
eSet = getGEO("GSE162550",
              destdir = ".",
              getGPL = F)
pd = pData(eSet[[1]])
tmp = read.table("GSE162550_gene_sample_count_with_symbol.xls",
                  sep = "\t",row.names=1,
                 header = T)
exp = as.matrix(tmp[,-(1:2)])
Group = str_remove(pd$title,"\\d")
Group = factor(Group,levels = c("DMSO","DHA"))

exp = exp[apply(exp, 1, function(x) sum(x > 0) > 3), ]
dim(exp)

save(exp,Group,file = "GSE162550.Rdata")

你可能感兴趣的:(TCGA数据库挖掘(2)DEG)