GEO数据挖掘之绘制PCA热图和差异基因火山图

PCA和差异基因图是生信技能树[生信爆款入门课程]GEO数据挖掘的重点。为拓展课堂所学知识,现在找一个数据集对他们做下练习总结。

1.主成分PCA 图----

> dat=as.data.frame(t(exp))
> library(FactoMineR)
> library(factoextra)
> dat.pca <- PCA(dat, graph = FALSE)
> pca_plot <- fviz_pca_ind(dat.pca,
+                          geom.ind = "point", # show points only (nbut not "text")
+                          col.ind = Group, # color by groups
+                          palette = c("#00AFBB", "#E7B800"),
+                          addEllipses = TRUE, # Concentration ellipses
+                          legend.title = "Groups"
+ )
> pca_plot
> ggsave(plot = pca_plot,filename = paste0(gse_number,"_PCA.png"))
Saving 6.4 x 3.77 in image
> save(pca_plot,file = "pca_plot.Rdata")
image.png

2.top 1000 sd 热图----

> cg=names(tail(sort(apply(exp,1,sd)),1000))
> n=exp[cg,]
> library(pheatmap)
Warning message:
程辑包‘pheatmap’是用R版本4.0.3 来建造的 
> annotation_col=data.frame(group=Group)
> rownames(annotation_col)=colnames(n)
> ## 2.自行标准化再画热图
> n2 = t(scale(t(n)))
> pheatmap(n2,
+          show_colnames =F,
+          show_rownames = F,
+          cluster_cols = F,
+          annotation_col=annotation_col,
+          breaks = seq(-3,3,length.out = 100)
+ )
image.png

3.相关性热图

> pheatmap::pheatmap(cor(exp),
+                    annotation_col = annotation_col)
> 
image.png
  1. 差异基因火山图

1.火山图----

> library(dplyr)
> library(ggplot2)
> dat  = deg
> p <- ggplot(data = dat, 
+             aes(x = logFC, 
+                 y = -log10(P.Value))) +
+   geom_point(alpha=0.4, size=3.5, 
+              aes(color=change)) +
+   ylab("-log10(Pvalue)")+
+   scale_color_manual(values=c("blue", "grey","red"))+
+   geom_vline(xintercept=c(-logFC_t,logFC_t),lty=4,col="black",lwd=0.8) +
+   geom_hline(yintercept = -log10(P.Value_t),lty=4,col="black",lwd=0.8) +
+   theme_bw()
> p
> load(file = 'step2output.Rdata')
> if(T){
+   #全部差异基因
+   cg = deg$probe_id[deg$change !="stable"]
+   length(cg)
+ }else{
+   #取前30上调和前30下调
+   x=deg$logFC[deg$change !="stable"] 
+   names(x)=deg$probe_id[deg$change !="stable"] 
+   cg=names(c(head(sort(x),30),tail(sort(x),30)))
+   length(cg)
+ }
[1] 1616
> n=exp[cg,]
> dim(n)
[1] 1616   22
image.png

差异基因热图

> library(pheatmap)
> annotation_col=data.frame(group=Group)
> rownames(annotation_col)=colnames(n)
> heatmap_plot <- pheatmap(n,show_colnames =F,
+                          show_rownames = F,
+                          scale = "row",
+                          cluster_cols = F, 
+                          annotation_col=annotation_col,
+                          breaks = seq(-3,3,length.out = 100)
+ )
> heatmap_plot
> ggsave(heatmap_plot,filename = paste0(gse_number,"_heatmap.png"))
Saving 6.4 x 3.77 in image
> load("pca_plot.Rdata")
image.png

3.拼图

> library(patchwork)
Warning message:
程辑包‘patchwork’是用R版本4.0.3 来建造的 
> library(ggplotify)
Warning message:
程辑包‘ggplotify’是用R版本4.0.3 来建造的 
> (pca_plot + p +as.ggplot(heatmap_plot))
>
image.png

你可能感兴趣的:(GEO数据挖掘之绘制PCA热图和差异基因火山图)