免疫细胞丰度与基因表达量的相关性热图

1.目的

要把免疫细胞与某些基因计算相关性，并画出热图，例如（图）

2.思路

其实就是把完整的相关性矩阵切出一部分，像这样

只拿左上角或者右下角来画热图就好啦。

3.代码实现

3.1. ssGSEA 计算免疫细胞丰度

ssGSEA的代码已经在上一篇有过啦，这里就不再赘述。

rm(list = ls())
if(!require(tinyarray))devtools::install_github("xjsun1221/tinyarray")
library(tinyarray)
es = geo_download("GSE42872")
split_list(es)
#exp = log2(exp+1)
ids = idmap(gpl)
ids = ids[!duplicated(ids$symbol),]
exp = trans_array(exp,ids)
exp[1:4,1:4]

##           GSM1052615 GSM1052616 GSM1052617 GSM1052618
## LINC01128    8.75126    8.61650    8.81149    8.32067
## SAMD11       8.39069    8.52617    8.43338    9.17284
## KLHL17       8.20228    8.30886    8.18518    8.13322
## PLEKHN1      8.41004    8.37679    8.27521    8.34524

geneset = rio::import("mmc3.xlsx",skip = 2)
geneset = split(geneset$Metagene,geneset$`Cell type`)
lapply(geneset[1:3], head)

## $`Activated B cell`
## [1] "ADAM28" "CD180"  "CD79B"  "BLK"    "CD19"   "MS4A1" 
## 
## $`Activated CD4 T cell`
## [1] "AIM2"  "BIRC3" "BRIP1" "CCL20" "CCL4"  "CCL5" 
## 
## $`Activated CD8 T cell`
## [1] "ADRM1"     "AHSA1"     "C1GALT1C1" "CCT6B"     "CD37"      "CD3D"

library(GSVA)
re <- gsva(exp, geneset, method="ssgsea",
           mx.diff=FALSE, verbose=FALSE
)

3.2.计算相关性系数和p值

cor函数可以方便的计算相关性系数，而p值则需要写循环。我不想写循环，就去搜了一下，有函数可以实现哦，出自Hmisc包。

library(Hmisc)
identical(colnames(re),colnames(exp))

## [1] TRUE

gs = c("CD36", "DUSP6", "DCT", "SPRY2", "MOXD1", "ETV4", "DTL", "NUPR1", 
       "ETV5", "ST6GALNAC2", "LDLR", "CCND1", "IER3", "TXNIP", "AREG", 
       "RNF150", "SCRG1", "SPRY4", "SERPINF1", "FST", "UBASH3B", "MR1", 
       "TGFA", "SESN3", "KIAA0040", "AOAH", "SLCO4A1", "AZGP1", "LCTL", 
       "CD24")
nc = t(rbind(re,exp[gs,]))
nc[1:4,1:4]

##            Activated B cell Activated CD4 T cell Activated CD8 T cell
## GSM1052615       -0.3720872           0.19193682          -0.07031845
## GSM1052616       -0.3542791           0.17935420          -0.07245836
## GSM1052617       -0.3741143           0.18833815          -0.07231844
## GSM1052618       -0.4096034           0.06878724          -0.11710947
##            Activated dendritic cell
## GSM1052615               0.09408956
## GSM1052616               0.09695546
## GSM1052617               0.09016797
## GSM1052618               0.09480261

m = rcorr(nc)$r[1:nrow(re),(ncol(nc)-length(gs)+1):ncol(nc)]
m[1:4,1:4]

##                                CD36      DUSP6        DCT      SPRY2
## Activated B cell         -0.9016301  0.8992479 -0.9067670  0.8978868
## Activated CD4 T cell     -0.9861614  0.9863182 -0.9848700  0.9887454
## Activated CD8 T cell     -0.9855525  0.9869912 -0.9905654  0.9870644
## Activated dendritic cell  0.3144122 -0.3142938  0.2868775 -0.3116635

p = rcorr(nc)$P[1:nrow(re),(ncol(nc)-length(gs)+1):ncol(nc)]
p[1:4,1:4]

##                                 CD36        DUSP6          DCT        SPRY2
## Activated B cell         0.014039022 0.0147151178 0.0126333720 0.0151082852
## Activated CD4 T cell     0.000285934 0.0002795077 0.0003416444 0.0001892849
## Activated CD8 T cell     0.000311588 0.0002527423 0.0001330987 0.0002499140
## Activated dendritic cell 0.543922358 0.5440823180 0.5814885810 0.5476413900

上面取子集的操作就是把本文开头的相关性矩阵左上角取了下来哦，

3.3 画图

原图是行名在右边的，而pheatmap默认行名在右边且无法修改。网上有大佬把pheatmap函数修改了一下，让它无缝跑到左边去。代码在https://stackoverflow.com/questions/57729914/how-can-you-show-the-rownames-in-pheatmap-on-the-left-side-of-the-graph。我把他存在了modified_pheatmap.R脚本里。

library(dplyr)
tmp = matrix(case_when(p<0.01~"**",
                       p<0.05~"*",
                       T~""),nrow = nrow(p))
source("modified_pheatmap.R")
pheatmap(m,
         display_numbers =tmp,
         angle_col =45,
         color = colorRampPalette(c("#92b7d1", "white", "#d71e22"))(100),
         border_color = "white",
         treeheight_col = 0,
         treeheight_row = 0)

齐活儿！