李老师RNA-seq PRC1.6-Gene和PRC1.6-Repeat数据分析
读取要处理的文件
library(ggplot2)
library (VennDiagram)
file.list<-c("PRC1.6-Gene/Rif1.KO.diff.csv",
"PRC1.6-Gene/RNF2.KO.diff.csv",
"PRC1.6-Repeat/Rif1.KO.rmsk.diff.csv",
"PRC1.6-Repeat/RNF2.KO.rmsk.diff.csv")
火山图统计上下调基因数
for(file in file.list){
DEG<-read.table(file,sep=",",header = T)
#log2FoldChange_cutoff <- with(DEG,mean(abs( log2FoldChange)) + 2*sd(abs( log2FoldChange)) )
log2FoldChange_cutoff<-1
DEG$change = as.factor(ifelse(DEG$pvalue < 0.05 & abs(DEG$log2FoldChange) > log2FoldChange_cutoff,
ifelse(DEG$log2FoldChange > log2FoldChange_cutoff ,'UP','DOWN'),'NOT')
)
#
this_tile <- paste0('Cutoff for log2FoldChange is ',round(log2FoldChange_cutoff,3),
'Pvalue is 0.05',
'\nThe number of up gene is ',nrow(DEG[DEG$change =='UP',]) ,
'\nThe number of down gene is ',nrow(DEG[DEG$change =='DOWN',])
)
#
g = ggplot(data=DEG,
aes(x=log2FoldChange, y=-log10(pvalue),
color=change)) +
geom_point(alpha=0.4, size=1.75) +
theme_set(theme_set(theme_bw(base_size=20)))+
xlab("log2 fold change") + ylab("-log10 p-value") +
ggtitle( this_tile ) + theme(plot.title = element_text(size=15,hjust = 0.5))+
scale_colour_manual(values = c('blue','black','red')) ## corresponding to the levels(res$change)
print(g)
ggsave(g,filename = paste(file,'cutoff1','volcano.png',sep="."))
}
log2FC cutoff=1,pvalue=0.05时,Rif1上调基因数为759,下调基因数为410
log2FC cutoff=1,pvalue=0.05时,RNF2上调基因数为1461,下调基因数为1189
根据数据,计算log2FC cutoff值
log2FC cutoff=1.105,pvalue=0.05时,Rif1上调基因数为635,下调基因数为227
log2FC cutoff=2.034,pvalue=0.05时,Rif1上调基因数为612,下调基因数为144
repeat数据计算出来的log2FC cutoff值为3左右,得到的差异重复太少了,故直接取1
log2FC cutoff=1,pvalue=0.05时,Rif1上调Repeat数为254,下调基因数为171
log2FC cutoff=1,pvalue=0.05时,Rif1上调Repeat数为155,下调基因数为31
韦恩图
file.list<-c("PRC1.6-Gene/Rif1.KO.diff.csv",
"PRC1.6-Gene/RNF2.KO.diff.csv")
#"PRC1.6-Repeat/Rif1.KO.rmsk.diff.csv",
#"PRC1.6-Repeat/RNF2.KO.rmsk.diff.csv",
up.list<-list()
down.list<-list()
for(file in file.list){
DEG<-read.table(file,sep=",",header = T)
log2FoldChange_cutoff <- with(DEG,mean(abs( log2FoldChange)) + 2*sd(abs( log2FoldChange)) )
#log2FoldChange_cutoff<-1
DEG$change = as.factor(ifelse(DEG$pvalue < 0.05 & abs(DEG$log2FoldChange) > log2FoldChange_cutoff,
ifelse(DEG$log2FoldChange > log2FoldChange_cutoff ,'UP','DOWN'),'NOT')
)
#
up.list[[substr(basename(file), start = 1, stop = 4)]]<-DEG[DEG$change=='UP',"X"]
down.list[[substr(basename(file), start = 1, stop = 4)]]<-DEG[DEG$change=='DOWN',"X"]
}
#
venn.diagram(up.list,
"up.png",
height = 450,
width = 450,
resolution =300,
imagetype="png",
col="white",
fill=c(colors()[616], colors()[38]),
alpha=c(0.6, 0.6), lwd=c(1, 1), cex=0.6,
cat.dist=c(-0.07, -0.07), cat.pos=c(300, 60), cat.cex=0.7,
main="Venn diagram of Rif1 and RNF2 up genes",main.cex=0.5)
venn.diagram(down.list,
"down.png",
height = 450,
width = 450,
resolution =300,
imagetype="png",
col="white",
fill=c(colors()[616], colors()[38]),
alpha=c(0.6, 0.6), lwd=c(1, 1), cex=0.6,
cat.dist=c(-0.07, -0.07), cat.pos=c(300, 60), cat.cex=0.7,
main="Venn diagram of Rif1 and RNF2 down genes",main.cex=0.5)