大家有没有遇到过一种情况,首先读入数据时因为他的分隔符时\t,所以会导致读入数据失败
setwd('/lab412C/LSM/蛋白质谱/蛋白质谱-RNA seq/guofan')
rm(list = ls())
counts <- read.table(file = "gf-oocyte-9.3.tsv",header = T,sep = "\t")
oocyte_counts <- apply(counts[,2:16],2,as.numeric)
row.names(oocyte_counts) = counts$gene
rownames(oocyte_counts)<-counts[,1]
typeof(oocyte_counts)
data2 <- oocyte_counts
typeof(data2)
qx <- as.numeric(quantile(data2, c(0., 0.25, 0.5, 0.75, 0.99, 1.0), na.rm=T))
LogC <- (qx[5] > 100) ||
(qx[6]-qx[1] > 50 && qx[2] > 0) ||
(qx[2] > 0 && qx[2] < 1 && qx[4] > 1 && qx[4] < 2)
LogC
data <- log2(data2[,]+1)
data <- data2[which(rowSums(data2) > 1),]
当读入这段代码时,你会发现又因为数据的格式不对而出现问题,例如查看数据格式的代码:typeof()
所以利用apply
代码来实现更改数据类型,最后再用limma包进行计算就可以啦啦啦啦啦
group_list <- c('1','1','1','2','2','2','3','3','3','4','4','4','5','5','5')
library(limma)
design=model.matrix(~factor(group_list))
fit=lmFit(data,design)
fit=eBayes(fit)
deg=topTable(fit,coef=2,number = Inf)
logFC_t=1 #不同的阈值,筛选到的差异基因数量就不一样,后面的超几何分布检验结果就大相径庭。
change=ifelse(deg$P.Value>0.05,'stable',
ifelse( deg$logFC >logFC_t,'up',
ifelse( deg$logFC < -logFC_t,'down','stable') )
)
deg$logP <- -log10(deg$adj.P.Val)
library(ggpubr)
library(ggthemes)
ggscatter(deg,x='logFC',y='logP')+theme_base()
deg <- mutate(deg,change)
table(deg$change)
ggscatter(deg, x = "logFC", y = "logP",color = "change",palette = c("#9999FF", "gray" , "#FF9999"),size=1 )+ theme_base()
write.csv(x=deg,file='/lab412C/LSM/蛋白质谱/蛋白质谱-RNA seq/guofan/deg.csv')