热图(Heatmap)作为展示基因表达模式最直观的工具。它经常用在分子生物学文章里(尤其是microarray, RNA-seq相关论文)直观地呈现多样本多个基因的全局表达量变化,和呈现多样本或多基因表达量的聚类关系。绘制复杂热图最好用的是 ComplexHeatmap包,,它提供了灵活、高效、易于定制的方法来绘制多种类型的热图,并支持多种数据类型和数据格式,可以处理大型数据集,并在短时间内生成高质量的热图。
本文重点介绍如何用
ComplexHeatmap::Heatmap
的方法在热图上展示全样本的基因表达量情况的同时,在样本组水平上展示簇间的聚类关系(如果样本太多,则展示样本间的聚类关系将变得非常不直观)。
1、示例数据准备
future::plan("multiprocess", workers = 6);options(future.globals.maxSize = 100000 * 1024^5) #设置任务多线程
##Data process >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
scRNA <- readRDS("test.RDS") #加载Seurat对象数据
### 计算seurat_cluster间的 差异基因
markers <- FindAllMarkers(object = scRNA, only.pos = FALSE, min.pct = 0.25, logfc.threshold = 0.25)
### 挑选每个cluster top5 的基因画表达热图
select.features <- markers %>% group_by(cluster) %>% top_n(n = 5, wt = avg_log2FC)
### 随机为 细胞样本分配一个模拟标签
[email protected]$sample <- sample(c("species.1","species.2","species.3","species.4"),size = ncol(scRNA),replace = T)
### 提取Seurat绘制热图的矩阵数据
data <- Seurat::DoHeatmap(scRNA, features = select.features$gene, group.by = "seurat_clusters", group.bar = T, size = 4)$data
### 提取细胞标识
cell.meta <- [email protected] %>% tibble::rownames_to_column(var="Cell") %>%
select(Cell,seurat_clusters,sample) %>%
arrange(seurat_clusters) %>% mutate(Identity = seurat_clusters)
### 长数据转换宽数据
counts <- data %>% select(!Identity) %>%
tidyr::pivot_wider(names_from = Cell, values_from = Expression) %>%
data.frame() %>% tibble::column_to_rownames(var = "Feature") %>% select(cell.meta$Cell)
mat <- as.matrix(counts) #格式转换为矩阵
2、安装本示例需要使用的软件包
BiocManager::install("ComplexHeatmap"); library(ComplexHeatmap) #画热图的包
BiocManager::install("dendextend"); library(dendextend) #用于树聚类的包
BiocManager::install("magick") #用于图像元光栅化处理的包
ht_opt$message = FALSE #忽略ComplexHeatmap包的提示信息
3、创建本示例需要使用的调色板
colormaps <- c(RColorBrewer::brewer.pal(name="Dark2", n = 8),RColorBrewer::brewer.pal(name="Paired", n = 12),RColorBrewer::brewer.pal(name="Set1", n = 9))
scales::show_col(colormaps)
4、创建热图注释块
- 簇级聚类树
# Dg_tree
### 创建表达矩阵的样本组间聚类树【计算组内样本均值进行建树】
dend1 = cluster_between_groups(mat, cell.meta$Identity)
### 你希望这些样本被聚类成几簇 【按树枝颜色区分】
dend1 = color_branches(dend1, k = 5)
### 树样式调整
dend1 = dend1 %>% set("branches_lwd", 2) # 聚类树树枝线条 厚度
### dend1 = dend1 %>% raise.dendrogram (3) #聚类树底端线条厚度
### dend1 = dend1 %>% highlight_branches_col(viridis::viridis(100)) #聚类树颜色调整
### dend1 = dend1 %>% highlight_branches_col(rev(viridis::magma(1000))) #聚类树颜色调整
- 顶部列注释- 1 - 插入一个空注释行
# 注释1 empty
ha_top_1 <- HeatmapAnnotation(
empty = anno_empty(border = FALSE,height = unit(0.1, "cm")), #添加空的注释块
annotation_name_side = "left",which = "column"
)
- 顶部列注释- 2 - 插入块注释显示样本的簇编号
# 注释2 Group
### 获取树聚类后的矩阵样本的排列顺序
HM <- Heatmap(mat,cluster_columns = dend1)
HM = draw(HM)
### 根据树聚类的样本排列顺序 来排列细胞信息表cell.meta
group.data <- cell.meta[column_order(HM),]
### 提取按树聚类排布的样本簇标签顺序
group_order_label <- unique(group.data$Identity,fromLast = F) %>% as.vector()
### 创建样本簇标识 色板
color.cl <- colormaps[seq(length(unique(cell.meta$Identity)))]
### 创建簇标识色块注释对象
ha_top_2 <- HeatmapAnnotation(
Group = anno_block(gp = gpar(fill = color.cl,col = 0),
labels = group_order_label, #块注释标签
labels_gp = gpar(col = "white", fontface = "bold") , #注释文本样式
show_name = TRUE , #显示注释对象名
height = unit(0.5,"cm") # 注释对象的整体高度
# weight = unit(10,"cm") # 注释对象的整体宽度
),
annotation_name_side = "left",#注释对象名显示方向
which = "column"
)
### anno_block 块注释的图例构建
lgd_Group <- Legend(title = "Group", labels = group_order_label,legend_gp = gpar(fill = color.cl))
- 顶部列注释- 3 - 添加样本的sample标签注释
# 注释3 Batch
ha_top_3 <- HeatmapAnnotation(Batch = cell.meta$sample,
annotation_legend_param = list(Batch = list(title = "Batch",ncol=1)), #注释图例参数调整
annotation_name_side = "left",which = "column")
- 右侧行注释- 4 - 统计每个基因的表达量(基于seurat标准化后的data矩阵)
### 统计每个基因的表达量
sum_Normexpr <- scRNA@assays$RNA@data[rownames(mat),] %>% Matrix::rowSums()
ha_rig = rowAnnotation(sum_Normexpr = anno_barplot(sum_Normexpr, bar_width = 1,gp = gpar(fill = "yellow",col="red"),
border=F, #行注释对象外侧边框
width = unit(2,"cm"), # 行注释的宽度
axis_param =(list(side = "top",gp=gpar(fontsize=5,col="red"))) # 坐标轴参数
),
show_annotation_name = FALSE, #不显示注释对象标题
annotation_name_side = "bottom",# 注释标题旋转位置
annotation_name_gp= gpar(fontsize = 8), #注释标题大小
annotation_name_rot = 0 #注释标题旋转
)
### anno_block注释图例对象创建
lgd_sumExpr <- Legend(title = "sum_Norm_expr",at = "",legend_gp = gpar(fill = "yellow"))
5、合并模块创建热图
Heatmap(mat,
cluster_columns = dend1, #列方向添加 簇级 树聚类
column_split = length(unique(cell.meta$Identity)), #热图列方向按簇拆分
#热图主体
column_dend_height = unit(2, "cm"), #树的高度
clustering_method_columns = "spearson", #树的聚类方法
column_title = "_OH_MY_Doheatmap_", #列方向大标题
column_title_side = "bottom",
column_title_gp = gpar(fontsize = 15, fontface = "bold"), #列方向大标题样式
name = "Expr", #热图名称,表达量图例名
cluster_rows = FALSE, #关闭行方向聚类
show_column_names = FALSE, #关闭显示列名
show_row_names = TRUE, #打开显示行名
col = viridis::viridis(200), #表达量梯度颜色设置
na_col = "black", #空值单元格的颜色
row_title = "cluster_between_groups", #行方向大标题
row_title_gp = grid::gpar(fontsize = 20,fontface="bold"), #行方向大标题样式
row_names_side = "left", #行名显示方向
row_names_gp = grid::gpar(fontsize = 6,fontface="bold"), #行名大小调整
border = TRUE, #热图图像外边框显示
# 表达量图例 样式设置
heatmap_legend_param = list(
title = "Exp",
border = "red",
direction = "vertical",
title_position = "topleft"
# legend_height = unit(12, "cm") # 热图表达量图例大小
),
# 顶部注释
top_annotation = c(ha_top_1,ha_top_2,ha_top_3), # 合并多个注释对象
# 右注释
right_annotation = ha_rig,
##图像 光栅化转换
use_raster = TRUE, raster_quality = 5
) %>% draw(merge_legend = TRUE,padding = unit(c(1, 1, 2, 1), "cm"), # panding:图像编剧下-左-上-右
annotation_legend_list = list(lgd_Group,lgd_sumExpr) # 添加 自己创建的 legend 对象
)
decorate_column_dend("Expr", {grid.yaxis()}) # 树聚类 修饰
6、转换 Heatmap
为 ggplot
并保存出PDF
p <- grid.grabExpr(draw(ht.p))
ggsave(filename = ".pdf",plot = p,width = *,height = *)
ok,以上就是首图所示热图样式的绘制的所有具体代码了,如有不懂欢迎留言一起讨论...
热图注释 -
树聚类简介Introduction to dendextend (r-project.org)
viridis color maps 调色板简介
Chapter 5 Legends | ComplexHeatmap Complete Reference (jokergoo.github.io)
Chapter 14 More Examples | ComplexHeatmap Complete Reference (jokergoo.github.io)
Cluster groups in ComplexHeatmap - A Bioinformagician (jokergoo.github.io)