pheatmap
是绘制热图的经典R包。其中一些细节参数设置,之前每次遇到都是网上搜索。这次系统整理下常用用法,为以后绘图提供方便。
0、示例数据与R包加载
1、聚类相关参数
2、热图的颜色
3、行,列的注释
4、热图的格子相关
5、行名与列名的调整
6、热图的分割
7、转为ggplot2对象
8、按行按列归一化
0、示例数据与R包加载
(1)模拟示例数据
- 假设有两组样本,每组5个的30个基因的表达数据(15个上调,15个下调)
exp = matrix(rnorm(300), nrow = 30, ncol = 10)
exp[1:15, 1:5] = exp[1:15, 1:5] + matrix(rnorm(75,mean = 4), nrow = 15, ncol = 5)
exp[16:30, 6:10] = exp[16:30, 6:10] + matrix(rnorm(75,mean = 3), nrow = 15, ncol = 5)
exp = round(exp, 2)
colnames(exp) = paste("Sample", 1:10, sep = "")
rownames(exp) = paste("Gene", 1:30, sep = "")
head(exp)
# Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 Sample7 Sample8 Sample9 Sample10
# Gene1 0.89 5.00 2.37 4.59 2.71 -0.89 0.21 -0.98 -0.41 0.19
# Gene2 4.05 5.58 3.51 5.45 5.47 -0.77 2.34 -0.67 0.83 -3.54
# Gene3 5.39 6.53 5.25 4.08 5.76 0.98 -0.56 -0.37 -0.56 1.21
# Gene4 4.16 2.29 2.58 3.87 5.17 0.92 -0.80 1.00 -1.23 0.29
# Gene5 3.66 2.30 2.54 2.90 5.37 -0.29 2.29 0.10 -0.33 0.81
# Gene6 2.07 4.19 3.87 1.82 4.82 -0.49 0.37 1.48 0.01 -0.22
(2)加载R包
# install.packages("pheatmap")
library(pheatmap)
packageVersion("pheatmap")
# [1] ‘1.0.12’
(3)基础绘图
pheatmap(exp)
1、聚类相关参数
如上图默认会分别对行、列计算两两间的距离,再进行聚类
1.1 聚类算法
- 对于两两间距离计算参数:
clustering_distance_rows=
,clustering_distance_cols=
。默认为"euclidean"
,备选方法有"correlation"
- 聚类方法的参数:
clustering_method=
。默认为"average"
,备选方法有"ward.D", "ward.D2", "single", "complete", "mcquitty" (= WPGMA), "median" (= WPGMC) or "centroid" (= UPGMC)
1.2 不聚类
pheatmap(exp, cluster_row = FALSE)
1.3 聚类但不想显示树
pheatmap(exp, treeheight_row = 0)
其实
treeheight_row
参数是用来调整树的显示尺寸的;设置为0,也就是不显示树了。
1.4 提取热图的表达矩阵
由于聚类会调整原始数据的行列顺序,如果想要获得热图里的行列顺序数据,可如下调整
ph = pheatmap(exp)
ph$tree_row$order
ph$tree_col$order
ph_exp = exp[ph$tree_row$order, ph$tree_col$order]
ph_exp[1:4,1:4]
# Sample3 Sample2 Sample4 Sample1
# Gene2 3.51 5.58 5.45 4.05
# Gene12 6.12 5.19 4.04 3.73
# Gene3 5.25 6.53 4.08 5.39
# Gene10 5.36 4.47 4.42 4.54
2、热图的颜色
color=
#Default
colours = colorRampPalette(rev(RColorBrewer::brewer.pal(n = 7, name = "RdYlBu")))(100)
str(colours)
# chr [1:100] "#4575B4" "#4979B6" "#4E7DB8" "#5282BB" "#5786BD" "#5C8BBF" "#608FC2" ...
# 个性化修改
colours = colorRampPalette(c("navy", "white", "firebrick3"))(10)
str(colours)
# chr [1:10] "#3288BD" "#5FA2CB" "#8DBCDA" "#BAD7E9" "#E8F1F7" "#FAE9EB" "#F1BEC4" ...
pheatmap(exp, color = colours)
#colours = colorRampPalette(c("#3288bd", "white", "#d53e4f"))(10)
3、行,列的注释
- 为
annotation_col=
,annotation_row=
参数提供data.frame,行名与表达矩阵的行名与列名相同;列的内容为分组信息。
# 构建列注释信息(行名与表达矩阵的列名col保持一致)
annotation_col = data.frame(
group = rep(c("Group_A", "Group_B"), each = 5),
row.names = colnames(exp))
head(annotation_col)
# 构建行注释信息(行名与表达矩阵的行名row保持一致)
annotation_row = data.frame(
Type = rep(c("Up", "Down"), each = 15),
row.names = rownames(exp))
head(annotation_row)
pheatmap(exp,
annotation_col = annotation_col,
annotation_row = annotation_row)
- 为
ann_colors=
参数提供一个list对象,可修改注释分组的颜色
#修改注释标签的颜色
ann_colors = list(
group = c(Group_A = "#e66101", Group_B = "#5e3c99"),
Type = c(Up = "#e7298a", Down = "#66a61e"))
pheatmap(exp,
annotation_col = annotation_col,
annotation_row = annotation_row,
annotation_colors = ann_colors)
4、热图的格子相关
- 格子的边框颜色
border_color=
;无边框border=F
- 格子的长宽
cellwidth = 15
,cellheight = 12
pheatmap(exp, border_color = "white",
cellwidth = 9, cellheight = 9)
- 格子内的文本注释
pheatmap(exp, display_numbers = TRUE,
number_color = "blue", number_format = "%.1e") #default "%.2f"
pheatmap(exp, display_numbers = matrix(ifelse(exp > 5, "*", ""),
nrow(exp)))
pheatmap(exp, display_numbers = matrix(ifelse(exp > 5, exp, ""),
nrow(exp)))
5、行名与列名的调整
- (1)不显示行名与列名
pheatmap(exp,show_rownames=F,show_colnames=F)
- (2)仅特定显示部分行名
注意:由于聚类的原因,一定要注意对应的顺序!
labels_row = rep("", nrow(exp))
labels_row[c(5,8,16)]=c("gene_A","gene_B","gene_C")
pheatmap(exp, labels_row = labels_row)
-
fontsize_row = 12
,fontsize_col = 8
可修改行名与列名的大小; -
angle_col = 45
可修改列名的角度;但只能是修改行名,并且只能是270”, “0”, “45”, “90”, “315”中的一种
6、热图的分割
- (1)根据聚类树,按指定数目进行分割
pheatmap(exp,
cutree_cols = 2,
cutree_rows = 4)
- (2)指定行/列数进行分割,前提是要取消聚类
pheatmap(exp, cluster_rows = FALSE,cluster_cols = FALSE,
gaps_row = c(10, 20),
gaps_col = 5)
7、转为ggplot2对象
library(ggplot2)
library(ggplotify)
g = as.ggplot(pheatmap(exp))
g + ggtitle("This is a ggplot object")
8、按行按列归一化
- 默认按照原始值可视化,可通过
scale =
参数设置按照行或者列进行归一化之后的结果绘图
pheatmap(exp, scale = "row")
# scale_rows = function(x){
# m = apply(x, 1, mean, na.rm = T)
# s = apply(x, 1, sd, na.rm = T)
# return((x - m) / s)
# }