rm(list = ls())
options(stringsAsFactors = F)
setwd('C:/Users/Administrator/Desktop/LSM测序总结/volcant/volcano测试4-Deseq2')
library(DESeq2)
data1 <- read.csv(file = "Count-rna.csv",header = T,sep = ",")
rownames(data1)<-data1[,1]
data2<-data1[,-1]
dat <- data2
data <- dat[which(rowSums(dat) > 10),] ##数据过滤
dat <- data
coldata <- data.frame(condition = factor(rep(c('WT', 'ko'), each = 3), levels = c('WT', 'ko')))
#第一步,构建 DESeqDataSet 对象
dds <- DESeqDataSetFromMatrix(countData = dat, colData = coldata, design= ~condition)
#第二步,计算差异倍数并获得 p 值
#备注:parallel = TRUE 可以多线程运行,在数据量较大时建议开启
dds1 <- DESeq(dds, fitType = 'mean', minReplicatesForReplace = 7, parallel = FALSE)
#注意,需将 treat 在前,control 在后,意为 treat 相较于 control 中哪些基因上调/下调
res <- results(dds1, contrast = c('condition', 'ko', 'WT'))
res
res1 <- data.frame(res, stringsAsFactors = FALSE, check.names = FALSE)
write.table(res1, 'volcant.DESeq2.txt', col.names = NA, sep = '\t', quote = FALSE)
##筛选差异表达基因
#首先对表格排个序,按 padj 值升序排序,相同 padj 值下继续按 log2FC 降序排序
res1 <- res1[order(res1$padj, res1$log2FoldChange, decreasing = c(FALSE, TRUE)), ]
#log2FC≥1 & padj<0.01 标识 up,代表显著上调的基因
#log2FC≤-1 & padj<0.01 标识 down,代表显著下调的基因
#其余标识 none,代表非差异的基因
res1[which(res1$log2FoldChange >= 1 & res1$padj < 0.05),'sig'] <- 'up'
res1[which(res1$log2FoldChange <= -1 & res1$padj < 0.05),'sig'] <- 'down'
res1[which(abs(res1$log2FoldChange) <= 1 | res1$padj >= 0.05),'sig'] <- 'none'
#输出选择的差异基因总表
res1_select <- subset(res1, sig %in% c('up', 'down'))
write.table(res1_select, file = 'volcant.DESeq2.select.txt', sep = '\t', col.names = NA, quote = FALSE)
#根据 up 和 down 分开输出
res1_up <- subset(res1, sig == 'up')
res1_down <- subset(res1, sig == 'down')
write.table(res1_up, file = 'volcant.DESeq2.up.txt', sep = '\t', col.names = NA, quote = FALSE)
write.table(res1_down, file = 'volcant.DESeq2.down.txt', sep = '\t', col.names = NA, quote = FALSE)
library(ggplot2)
#默认情况下,横轴展示 log2FoldChange,纵轴展示 -log10 转化后的 padj
p <- ggplot(data = res1, aes(x = log2FoldChange, y = -log10(padj), color = sig)) +
geom_point(size = 1) + #绘制散点图
scale_color_manual(values = c('red', 'gray', 'green'), limits = c('up', 'none', 'down')) + #自定义点的颜色
labs(x = 'log2 Fold Change', y = '-log10 adjust p-value', title = 'control vs treat', color = '') + #坐标轴标题
theme(plot.title = element_text(hjust = 0.5, size = 14), panel.grid = element_blank(), #背景色、网格线、图例等主题修改
panel.background = element_rect(color = 'black', fill = 'transparent'),
legend.key = element_rect(fill = 'transparent')) +
geom_vline(xintercept = c(-1, 1), lty = 3, color = 'black') + #添加阈值线
geom_hline(yintercept = 2, lty = 3, color = 'black') +
xlim(-12, 12) + ylim(0, 35) #定义刻度边界
p
# 绘制火山图
colnames(data)
p <- ggplot(data=data, aes(x=logFC, y=-log10(PValue),color=regulated)) +
geom_point(alpha=5, size=2.5) +
theme_set(theme_set(theme_bw(base_size = 20))) + theme_bw() +
xlab("log2FC") + ylab("-log10(Pvalue)") +
scale_colour_manual(values = c('#6666CC','#999999','#FF6666')) +
geom_vline(xintercept=c(-(log2(1.5)),log2(1.5)),lty=4,col="black",lwd=0.8) +
geom_hline(yintercept = -log10(0.05),lty=4,col="black",lwd=0.8)
p
# 添加top基因,列名必须是数字
label <- data[order(data$PValue,decreasing = F)[1:10],]
p1 <- p + geom_point(size = 3, shape = 1, data = label) +
ggrepel::geom_label_repel( aes(label = SYMBOL), data = label,color="black" )
p1