R语言- ComplexHeatmap 绘制复杂热图示例

热图（Heatmap）作为展示基因表达模式最直观的工具。它经常用在分子生物学文章里（尤其是microarray, RNA-seq相关论文）直观地呈现多样本多个基因的全局表达量变化，和呈现多样本或多基因表达量的聚类关系。绘制复杂热图最好用的是 ComplexHeatmap包，，它提供了灵活、高效、易于定制的方法来绘制多种类型的热图，并支持多种数据类型和数据格式，可以处理大型数据集，并在短时间内生成高质量的热图。

本文重点介绍如何用ComplexHeatmap::Heatmap的方法在热图上展示全样本的基因表达量情况的同时，在样本组水平上展示簇间的聚类关系（如果样本太多，则展示样本间的聚类关系将变得非常不直观）。

簇级聚类表达热图

1、示例数据准备

future::plan("multiprocess", workers = 6);options(future.globals.maxSize = 100000 * 1024^5) #设置任务多线程
##Data process >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
scRNA <- readRDS("test.RDS")  #加载Seurat对象数据
### 计算seurat_cluster间的 差异基因
markers <- FindAllMarkers(object = scRNA, only.pos = FALSE, min.pct = 0.25, logfc.threshold = 0.25)
### 挑选每个cluster top5 的基因画表达热图
select.features <- markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC) 
### 随机为 细胞样本分配一个模拟标签
[email protected]$sample <- sample(c("species.1","species.2","species.3","species.4"),size = ncol(scRNA),replace = T) 
### 提取Seurat绘制热图的矩阵数据
data <- Seurat::DoHeatmap(scRNA, features = select.features$gene, group.by = "seurat_clusters", group.bar = T, size = 4)$data 
### 提取细胞标识
cell.meta <- [email protected] %>% tibble::rownames_to_column(var="Cell") %>% 
  select(Cell,seurat_clusters,sample) %>% 
  arrange(seurat_clusters) %>% mutate(Identity = seurat_clusters)

### 长数据转换宽数据
counts <- data %>% select(!Identity) %>% 
  tidyr::pivot_wider(names_from =  Cell, values_from = Expression) %>% 
  data.frame() %>% tibble::column_to_rownames(var = "Feature") %>% select(cell.meta$Cell)
mat <- as.matrix(counts) #格式转换为矩阵

预处理后用于热图展示的数据集

2、安装本示例需要使用的软件包

BiocManager::install("ComplexHeatmap"); library(ComplexHeatmap) #画热图的包
BiocManager::install("dendextend"); library(dendextend) #用于树聚类的包
BiocManager::install("magick") #用于图像元光栅化处理的包
ht_opt$message = FALSE #忽略ComplexHeatmap包的提示信息

3、创建本示例需要使用的调色板

colormaps <- c(RColorBrewer::brewer.pal(name="Dark2", n = 8),RColorBrewer::brewer.pal(name="Paired", n = 12),RColorBrewer::brewer.pal(name="Set1", n = 9))
scales::show_col(colormaps)

29色调色板

4、创建热图注释块

簇级聚类树

# Dg_tree
### 创建表达矩阵的样本组间聚类树【计算组内样本均值进行建树】
dend1 = cluster_between_groups(mat, cell.meta$Identity)
### 你希望这些样本被聚类成几簇 【按树枝颜色区分】
dend1 = color_branches(dend1, k = 5)  
### 树样式调整
dend1 = dend1 %>% set("branches_lwd", 2) # 聚类树树枝线条 厚度
### dend1 = dend1 %>% raise.dendrogram (3) #聚类树底端线条厚度
### dend1 = dend1 %>% highlight_branches_col(viridis::viridis(100)) #聚类树颜色调整
### dend1 = dend1 %>% highlight_branches_col(rev(viridis::magma(1000)))  #聚类树颜色调整

顶部列注释- 1 - 插入一个空注释行

# 注释1 empty
ha_top_1 <- HeatmapAnnotation(
  empty = anno_empty(border = FALSE,height = unit(0.1, "cm")), #添加空的注释块
  annotation_name_side = "left",which = "column" 
)

顶部列注释- 2 - 插入块注释显示样本的簇编号

# 注释2 Group
### 获取树聚类后的矩阵样本的排列顺序
HM <- Heatmap(mat,cluster_columns = dend1)
HM = draw(HM)
### 根据树聚类的样本排列顺序 来排列细胞信息表cell.meta
group.data <- cell.meta[column_order(HM),] 
### 提取按树聚类排布的样本簇标签顺序
group_order_label <- unique(group.data$Identity,fromLast = F) %>% as.vector()

### 创建样本簇标识 色板
color.cl <- colormaps[seq(length(unique(cell.meta$Identity)))] 
### 创建簇标识色块注释对象
ha_top_2 <- HeatmapAnnotation( 
  Group = anno_block(gp = gpar(fill = color.cl,col = 0),
                     labels = group_order_label, #块注释标签
                     labels_gp = gpar(col = "white", fontface = "bold") , #注释文本样式
                     show_name = TRUE , #显示注释对象名
                     height = unit(0.5,"cm") # 注释对象的整体高度
                     # weight = unit(10,"cm") # 注释对象的整体宽度
                     ),
  annotation_name_side = "left",#注释对象名显示方向
  which = "column"
  )
### anno_block 块注释的图例构建
lgd_Group <- Legend(title = "Group", labels = group_order_label,legend_gp = gpar(fill = color.cl))

顶部列注释- 3 - 添加样本的sample标签注释

# 注释3 Batch
ha_top_3 <- HeatmapAnnotation(Batch = cell.meta$sample, 
                              annotation_legend_param = list(Batch = list(title = "Batch",ncol=1)), #注释图例参数调整
                              annotation_name_side = "left",which = "column")

右侧行注释- 4 - 统计每个基因的表达量(基于seurat标准化后的data矩阵)

### 统计每个基因的表达量
sum_Normexpr <- scRNA@assays$RNA@data[rownames(mat),] %>% Matrix::rowSums()
ha_rig = rowAnnotation(sum_Normexpr = anno_barplot(sum_Normexpr, bar_width = 1,gp = gpar(fill = "yellow",col="red"),
                                                   border=F, #行注释对象外侧边框
                                                   width = unit(2,"cm"), # 行注释的宽度
                                                   axis_param =(list(side = "top",gp=gpar(fontsize=5,col="red"))) # 坐标轴参数
                                                   ),
                       show_annotation_name = FALSE, #不显示注释对象标题
                       annotation_name_side = "bottom",# 注释标题旋转位置
                       annotation_name_gp= gpar(fontsize = 8), #注释标题大小
                       annotation_name_rot = 0 #注释标题旋转
                       )
### anno_block注释图例对象创建
lgd_sumExpr <- Legend(title = "sum_Norm_expr",at = "",legend_gp = gpar(fill = "yellow"))

5、合并模块创建热图

Heatmap(mat,
        cluster_columns = dend1, #列方向添加 簇级 树聚类
        column_split = length(unique(cell.meta$Identity)), #热图列方向按簇拆分
        #热图主体
        column_dend_height = unit(2, "cm"), #树的高度
        clustering_method_columns = "spearson", #树的聚类方法
        column_title = "_OH_MY_Doheatmap_", #列方向大标题
        column_title_side = "bottom",
        column_title_gp = gpar(fontsize = 15, fontface = "bold"), #列方向大标题样式
        
        name = "Expr", #热图名称，表达量图例名
        cluster_rows = FALSE, #关闭行方向聚类
        show_column_names = FALSE, #关闭显示列名
        show_row_names = TRUE, #打开显示行名
        col = viridis::viridis(200), #表达量梯度颜色设置
        na_col = "black", #空值单元格的颜色
        row_title = "cluster_between_groups", #行方向大标题
        row_title_gp = grid::gpar(fontsize = 20,fontface="bold"), #行方向大标题样式
        row_names_side = "left", #行名显示方向
        row_names_gp = grid::gpar(fontsize = 6,fontface="bold"), #行名大小调整
        border = TRUE, #热图图像外边框显示
        
        # 表达量图例 样式设置
        heatmap_legend_param = list(
          title = "Exp",
          border = "red",
          direction = "vertical",
          title_position = "topleft"
          # legend_height = unit(12, "cm") # 热图表达量图例大小
        ),
        # 顶部注释
        top_annotation = c(ha_top_1,ha_top_2,ha_top_3), # 合并多个注释对象
        # 右注释
        right_annotation = ha_rig,
        
        ##图像 光栅化转换
        use_raster = TRUE, raster_quality = 5
) %>% draw(merge_legend = TRUE,padding = unit(c(1, 1, 2, 1), "cm"), # panding:图像编剧下-左-上-右
           annotation_legend_list = list(lgd_Group,lgd_sumExpr) # 添加 自己创建的 legend 对象
           )
decorate_column_dend("Expr", {grid.yaxis()}) # 树聚类 修饰

6、转换 `Heatmap` 为 `ggplot` 并保存出PDF

p <- grid.grabExpr(draw(ht.p))
ggsave(filename = ".pdf",plot = p,width = *,height = *)

ok,以上就是首图所示热图样式的绘制的所有具体代码了，如有不懂欢迎留言一起讨论...

热图注释 -
树聚类简介Introduction to dendextend (r-project.org)
viridis color maps 调色板简介
Chapter 5 Legends | ComplexHeatmap Complete Reference (jokergoo.github.io)
Chapter 14 More Examples | ComplexHeatmap Complete Reference (jokergoo.github.io)
Cluster groups in ComplexHeatmap - A Bioinformagician (jokergoo.github.io)