帮老婆写代谢组的热图和火山图

rm(list=ls())
setwd('/Users/zhangjuxiang/Desktop/R time seq/')
#Bioconductor 安装 edgeR
#install.packages('BiocManager')  #需要首先安装 BiocManager,如果尚未安装请先执行该步
BiocManager::install('edgeR',force=T)
#读取基因表达矩阵
targets <- read.csv('rawdata.csv')
as.matrix(targets)
rownames(targets) <- targets[,1]
targets <- targets[,-1]
#指定分组,注意要保证表达矩阵中的样本顺序和这里的分组顺序是一一对应的
#对照组在前,处理组在后
group <- rep(c('BC', 'EC'),c(121,192))





library(edgeR)
#数据预处理
#(1)构建 DGEList 对象
dgelist <- DGEList(counts = targets, group = group)

#(2)过滤 low count 数据,例如 CPM 标准化(推荐)
keep <- rowSums(cpm(dgelist) > 1 ) >= 2
dgelist <- dgelist[keep,, keep.lib.sizes = FALSE]

#(3)标准化,以 TMM 标准化为例
dgelist_norm <- calcNormFactors(dgelist, method = 'TMM')






#差异表达基因分析
#首先根据分组信息构建试验设计矩阵,分组信息中一定要是对照组在前,处理组在后
design <- model.matrix(~group)

#(1)估算基因表达值的离散度
dge <- estimateDisp(dgelist_norm, design, robust = TRUE)

#(2)模型拟合,edgeR 提供了多种拟合算法
#负二项广义对数线性模型
fit <- glmFit(dge, design, robust = TRUE)
lrt <- topTags(glmLRT(fit), n = nrow(dgelist$counts))

write.table(lrt, 'control_treat.glmLRT.txt', sep = '\t', col.names = NA, quote = FALSE)

#拟似然负二项广义对数线性模型
fit <- glmQLFit(dge, design, robust = TRUE)
lrt <- topTags(glmQLFTest(fit), n = nrow(dgelist$counts))

write.table(lrt, 'control_treat.glmQLFit.txt', sep = '\t', col.names = NA, quote = FALSE)








##筛选差异表达基因
#读取上述输出的差异倍数计算结果
gene_diff <- read.delim('control_treat.glmLRT.txt', row.names = 1, sep = '\t', check.names = FALSE)

#首先对表格排个序,按 FDR 值升序排序,相同 FDR 值下继续按 log2FC 降序排序
gene_diff <- gene_diff[order(gene_diff$FDR, gene_diff$logFC, decreasing = c(FALSE, TRUE)), ]

#log2FC≥1 & FDR<0.01 标识 up,代表显著上调的基因
#log2FC≤-1 & FDR<0.01 标识 down,代表显著下调的基因
#其余标识 none,代表非差异的基因
gene_diff[which(gene_diff$logFC >= 1 & gene_diff$FDR < 0.05),'sig'] <- 'up'
gene_diff[which(gene_diff$logFC <= -1 & gene_diff$FDR < 0.05),'sig'] <- 'down'
gene_diff[which(abs(gene_diff$logFC) <= 1 | gene_diff$FDR >= 0.05),'sig'] <- 'none'

#输出选择的差异基因总表
gene_diff_select <- subset(gene_diff, sig %in% c('up', 'down'))
write.table(gene_diff_select, file = 'control_treat.glmQLFit.select.txt', sep = '\t', col.names = NA, quote = FALSE)

#根据 up 和 down 分开输出
gene_diff_up <- subset(gene_diff, sig == 'up')
gene_diff_down <- subset(gene_diff, sig == 'down')

write.table(gene_diff_up, file = 'control_treat.glmQLFit.up.txt', sep = '\t', col.names = NA, quote = FALSE)
write.table(gene_diff_down, file = 'control_treat.glmQLFit.down.txt', sep = '\t', col.names = NA, quote = FALSE)







install.packages('pheatmap')
library(pheatmap)
{
  tmp = gene_diff_select[gene_diff_select$PValue < 0.05,]
  #差异结果需要先根据p值挑选
  nrDEG_Z = tmp[ order( tmp$logFC ), ]
  nrDEG_F = tmp[ order( -tmp$logFC ), ]
  choose_gene = c( rownames( nrDEG_Z )[1:100], rownames( nrDEG_F )[1:100] )
  choose_matrix = targets[ choose_gene, ]
  choose_matrix = t( scale( t( choose_matrix ) ) )
  
  choose_matrix[choose_matrix > 2] = 2
  choose_matrix[choose_matrix < -2] = -2
  
  annotation_col = data.frame( CellType = factor( group ) )
  rownames( annotation_col ) = colnames( targets )
  choose_matrix <- na.omit(choose_matrix)
  pheatmap( fontsize = 2, choose_matrix, annotation_col = annotation_col, show_rownames = F, annotation_legend = F, filename = "heatmap_BRCA_medianexp2.png")
}








install.packages('ggplot2')
library( "ggplot2" )
nrDEG <- gene_diff
logFC_cutoff <- with( nrDEG, mean( abs( logFC ) ) + 2 * sd( abs( logFC ) ) )
logFC_cutoff
logFC_cutoff = 1
{
  nrDEG$change = as.factor( ifelse( nrDEG$PValue < 0.01 & abs(nrDEG$logFC) > logFC_cutoff,
                                    ifelse( nrDEG$logFC > logFC_cutoff , 'UP', 'DOWN' ), 'NOT' ) )
  
  save( nrDEG, file = "nrDEG_array_medianexp.Rdata" )
  
  this_tile <- paste0( 'Cutoff for logFC is ', round( logFC_cutoff, 3 ),
                       ' The number of up M/Z is ', nrow(nrDEG[ nrDEG$change =='UP', ] ),
                       ' The number of down M/Z is ', nrow(nrDEG[ nrDEG$change =='DOWN', ] ) )
  
  volcano = ggplot(data = nrDEG, aes( x = logFC, y = -log10(PValue), color = change)) +
    geom_point( alpha = 0.4, size = 1.75) +
    theme_set( theme_set( theme_minimal( base_size = 15 ) ) ) +
    xlab( "log2 fold change" ) + ylab( "-log10 p-value" ) +
    theme(legend.title = element_text(colour="black", size=6, face="bold")) +
    theme(legend.text = element_text(colour="black", size = 7, face = "bold")) +
    theme(axis.title.x = element_text(size = 9, color = "black", face = "bold")) +
    theme(axis.title.y = element_text(size = 9, color = "black", face = "bold")) +
    ggtitle( this_tile ) + theme( plot.title = element_text( size = 8, hjust = 0.5, face = "bold" )) +
    theme(legend.position=c(1.2, 0.8)) +
    theme(aspect.ratio=1) +
    scale_colour_manual( values = c('green','black','red') ) + theme(panel.grid.major = element_line(colour = "white", 
                                                                                                     linetype = "blank"), panel.grid.minor = element_line(colour = "white"), 
                                                                     panel.background = element_rect(fill = "aliceblue", 
                                                                                                     colour = "white"), plot.background = element_rect(colour = "azure1"))
  print( volcano ) 
  ggsave( volcano, filename = 'volcano_BRCA_medianexp.tiff' )
  dev.off()
}

你可能感兴趣的:(帮老婆写代谢组的热图和火山图)