1.目的
要把免疫细胞与某些基因计算相关性,并画出热图,例如(图)
2.思路
其实就是把完整的相关性矩阵切出一部分,像这样
只拿左上角或者右下角来画热图就好啦。
3.代码实现
3.1. ssGSEA 计算免疫细胞丰度
ssGSEA的代码已经在上一篇有过啦,这里就不再赘述。
rm(list = ls())
if(!require(tinyarray))devtools::install_github("xjsun1221/tinyarray")
library(tinyarray)
es = geo_download("GSE42872")
split_list(es)
#exp = log2(exp+1)
ids = idmap(gpl)
ids = ids[!duplicated(ids$symbol),]
exp = trans_array(exp,ids)
exp[1:4,1:4]
## GSM1052615 GSM1052616 GSM1052617 GSM1052618
## LINC01128 8.75126 8.61650 8.81149 8.32067
## SAMD11 8.39069 8.52617 8.43338 9.17284
## KLHL17 8.20228 8.30886 8.18518 8.13322
## PLEKHN1 8.41004 8.37679 8.27521 8.34524
geneset = rio::import("mmc3.xlsx",skip = 2)
geneset = split(geneset$Metagene,geneset$`Cell type`)
lapply(geneset[1:3], head)
## $`Activated B cell`
## [1] "ADAM28" "CD180" "CD79B" "BLK" "CD19" "MS4A1"
##
## $`Activated CD4 T cell`
## [1] "AIM2" "BIRC3" "BRIP1" "CCL20" "CCL4" "CCL5"
##
## $`Activated CD8 T cell`
## [1] "ADRM1" "AHSA1" "C1GALT1C1" "CCT6B" "CD37" "CD3D"
library(GSVA)
re <- gsva(exp, geneset, method="ssgsea",
mx.diff=FALSE, verbose=FALSE
)
3.2.计算相关性系数和p值
cor函数可以方便的计算相关性系数,而p值则需要写循环。我不想写循环,就去搜了一下,有函数可以实现哦,出自Hmisc包。
library(Hmisc)
identical(colnames(re),colnames(exp))
## [1] TRUE
gs = c("CD36", "DUSP6", "DCT", "SPRY2", "MOXD1", "ETV4", "DTL", "NUPR1",
"ETV5", "ST6GALNAC2", "LDLR", "CCND1", "IER3", "TXNIP", "AREG",
"RNF150", "SCRG1", "SPRY4", "SERPINF1", "FST", "UBASH3B", "MR1",
"TGFA", "SESN3", "KIAA0040", "AOAH", "SLCO4A1", "AZGP1", "LCTL",
"CD24")
nc = t(rbind(re,exp[gs,]))
nc[1:4,1:4]
## Activated B cell Activated CD4 T cell Activated CD8 T cell
## GSM1052615 -0.3720872 0.19193682 -0.07031845
## GSM1052616 -0.3542791 0.17935420 -0.07245836
## GSM1052617 -0.3741143 0.18833815 -0.07231844
## GSM1052618 -0.4096034 0.06878724 -0.11710947
## Activated dendritic cell
## GSM1052615 0.09408956
## GSM1052616 0.09695546
## GSM1052617 0.09016797
## GSM1052618 0.09480261
m = rcorr(nc)$r[1:nrow(re),(ncol(nc)-length(gs)+1):ncol(nc)]
m[1:4,1:4]
## CD36 DUSP6 DCT SPRY2
## Activated B cell -0.9016301 0.8992479 -0.9067670 0.8978868
## Activated CD4 T cell -0.9861614 0.9863182 -0.9848700 0.9887454
## Activated CD8 T cell -0.9855525 0.9869912 -0.9905654 0.9870644
## Activated dendritic cell 0.3144122 -0.3142938 0.2868775 -0.3116635
p = rcorr(nc)$P[1:nrow(re),(ncol(nc)-length(gs)+1):ncol(nc)]
p[1:4,1:4]
## CD36 DUSP6 DCT SPRY2
## Activated B cell 0.014039022 0.0147151178 0.0126333720 0.0151082852
## Activated CD4 T cell 0.000285934 0.0002795077 0.0003416444 0.0001892849
## Activated CD8 T cell 0.000311588 0.0002527423 0.0001330987 0.0002499140
## Activated dendritic cell 0.543922358 0.5440823180 0.5814885810 0.5476413900
上面取子集的操作就是把本文开头的相关性矩阵左上角取了下来哦,
3.3 画图
原图是行名在右边的,而pheatmap默认行名在右边且无法修改。网上有大佬把pheatmap函数修改了一下,让它无缝跑到左边去。代码在https://stackoverflow.com/questions/57729914/how-can-you-show-the-rownames-in-pheatmap-on-the-left-side-of-the-graph。 我把他存在了modified_pheatmap.R脚本里。
library(dplyr)
tmp = matrix(case_when(p<0.01~"**",
p<0.05~"*",
T~""),nrow = nrow(p))
source("modified_pheatmap.R")
pheatmap(m,
display_numbers =tmp,
angle_col =45,
color = colorRampPalette(c("#92b7d1", "white", "#d71e22"))(100),
border_color = "white",
treeheight_col = 0,
treeheight_row = 0)
齐活儿!