differential expression matrix

照搬了biotrainee的代码,谢谢曾建明学长。


rm(list = ls())
#BiocInstaller::biocLite('CLL')
#install.packages('corrplot')
#install.packages('gpairs')
#install.packages('vioplot')

library(CLL)
library(ggplot2)
library(reshape2)
library(gpairs)
library(corrplot)

data(sCLLex)
sCLLex = sCLLex[,1:8]
group_list = sCLLex$Disease 
exprSet= exprs(sCLLex)
head(exprSet)


exprSet_L = melt(exprSet)
colnames(exprSet_L) = c('probe','sample','value')
exprSet_L$group = rep(group_list,each = nrow(exprSet))
head(exprSet_L)

p = ggplot(exprSet_L,aes(x=sample,y=value,fill=group)) + geom_boxplot()
print(p)
p = p + stat_summary(fun.y = "mean",geom = "point",shape = 23, size = 3, fill="red")
p = p + theme_set(theme_set(theme_bw(base_size = 20)))
p = p + theme(text = element_text(face = 'bold'),axis.text.x = element_text(angle = 30,hjust = 1),axis.title = element_blank())
print(p)

p = ggplot(exprSet_L,aes(x=sample,y=value,fill=group)) + geom_violin()
print(p)

p = ggplot(exprSet_L,aes(value,fill=group)) + geom_histogram(bins = 200)+facet_wrap(~sample,nrow = 4)
print(p)

p = ggplot(exprSet_L,aes(value,col=group)) + geom_density()
print(p)

##gpairs
gpairs(exprSet)

##clusterhclust()
out.dist = dist(t(exprSet),method = 'euclidean')
out.hclust = hclust(out.dist,method = 'complete')
plot(out.hclust)

 ##PCA
pc <- prcomp(t(exprSet),scale =TRUE)
pcx = data.frame(pc$x)
pcr = cbind(samples=rownames(pcx),group_list,pcx)
p=ggplot(pcr, aes(PC1,PC2)) + geom_point(size=5, aes(color=group_list)) + geom_text(aes(label=samples),hjust=-0.1,vjust=-0.3)
print(p)

##heatmap
choose_gene = names(sort(apply(exprSet,1,mad),decreasing = T)[1:50])
choose_matrix = exprSet[choose_gene,]
choose_matrix = scale(choose_matrix)
heatmap(choose_matrix)

library(pheatmap)
pheatmap(choose_matrix)

##DEG && volcano plot
library(limma)
design = model.matrix(~factor(group_list))
fit = lmFit(exprSet,design)
fit = eBayes(fit)
DEG = topTable(fit,coef = 2,n=Inf)
with(DEG,plot(logFC,-log10(P.Value),pch = 20,main = "Volcano plot"))

logFC_cutoff <- with(DEG,mean(abs(logFC)) + 2*sd(abs(logFC)))
DEG$change = as.factor(ifelse(DEG$P.Value < 0.05 & abs(DEG$logFC) > logFC_cutoff,ifelse(DEG$logFC > logFC_cutoff, 'UP','DOWN'),'NOT'))
this_title <- paste0('Cutoff for logFC is ',round(logFC_cutoff,3), '\nThe number of up gene is ',nrow(DEG[DEG$change=='UP',]),'\nThe number of down gene is ',nrow(DEG[DEG$change=='DOWN',]))
g = ggplot(data=DEG, aes(x=logFC, y = -log10(P.Value),color=change)) + geom_point(alpha=0.4,size = 1.75) + theme_set(theme_set(theme_bw(base_size = 20))) + xlab("log2 fold change") + ylab("-log10 p-value") + ggtitle(this_title)+ theme(plot.title = element_text(size=15,hjust = 0.5)) + scale_color_manual(values = c('blue','black','red'))
print(g)

yuwq
2017/5/23


你可能感兴趣的:(differential expression matrix)