【R】热图绘制

获得数据

导入数据(示例数据请私信)
data = read.csv("pros_with_name.csv", header = T, row.names = 1)

data = data[1:22,2:22]
宽数据转长数据
library(reshape2)
data$ID =  letters[1: nrow(data)] # 保留行名

# id.vars 列用于指定哪些列为 id 列;这些列不会被 merge,会保留为完整一列。
data_long = melt(data, id.vars = "ID")

图形绘制

theme: 是处理图美观的一个函数,可以调整横纵轴 label 的选择、图例的位置等。
这里选择 X 轴标签 45 度。
hjust 和 vjust 调整标签的相对位置,
具体见下图。
简单说,hjust 是水平的对齐方式,0 为左,1 为右,0.5 居中,0-1 之间可以取任意值。
vjust 是垂直对齐方式,0 底对齐,1 为顶对齐,0.5 居中,0-1 之间可以取任意值。

library(ggplot2)
library(reshape2)
library(scales)
p = ggplot(data_long, aes(x = variable, y = ID)) + 
  geom_tile(aes(fill = value)) + # 用数值做填充色
  theme(axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1))
p  
image.png
颜色设置
# 颜色梯度,分别给定最大值和最低值的颜色
# mutde 函数在 "scales" 包内
p1 = p + scale_fill_gradient(low = muted("green"), high = muted("red"))
p1
p1 = p + scale_fill_gradient2(low = muted("green"), mid = "white", high = muted("red"))
p1
调整 legend 的位置

legend.position, 可以接受的值有 top, bottom, left, right, 和一个坐标 c(0.05,0.8) (左上角,坐标是相对于图的左下角 (即原点) 计算的)

p1 = p1 + theme(legend, position = "top")
调整背景、背景格线以及X轴,Y轴标题
p1 = p + scale_fill_gradient(low = muted("blue"), high = "red")
p1 = p1 + xlab("aa") + theme_bw() + 
  theme(panel.grid.major = element_blank())
p1
横轴旋转45度
p1 = p1 + theme(axis.text.x = element_text(angle = -45, 
                                           hjust = 0, vjust = 1))
# 加点和数字,round函数保留一位小数
p2 = p1 + geom_point(aes(color = round(value)), size = 3) +
  geom_text(aes(label = round(value, digits=1)), size = 1.5)
p2
只用点
p = ggplot(data_long, aes(x = variable, y = ID))+
  xlab("aa") +
  theme_bw() + 
  theme(panel.grid.major = element_blank())+
  theme(legend.key = element_blank()) +
  theme(axis.text.y = element_text(angle =0, hjust = 0, vjust = 0.03, family="Times", size = 18)) +
  theme(axis.text.x = element_text(angle =0, hjust = 0.03, family="Times", vjust = 0, size = 18)) +
  theme(legend.position = "left") +
  geom_point(aes(color = value), size = 11.5) +
  scale_color_gradient(low = "white", high = "red") + 
  theme(axis.title = element_text(size = 23)) +
  geom_text(aes(label = round(value, digits = 2)), family="Times", size = 3.5)
图片保存
ggsave(p, filename = "heatmap.pdf", width = 15, 
       height = 10, dpi = 300, colormodel = "srgb")
image.png
热图美化

为了更好的可视化效果,需要对数据做些预处理,主要有 对数转换,Z-score 转换,抹去异常值,非线性颜色等方式。

# 对数转化
data_log = data_long
data_log$value = log2(data_log$value + 1)
p = ggplot(data_log, aes(x = variable, y = ID)) +
  xlab("aa") + 
  ylab("pro") +
  theme_bw() +
  theme(panel.grid.major = element_blank()) +
  theme(legend.position = "right") +
  theme(axis.text = element_text(family = "Times", size = 18)) +
  theme(axis.title = element_text(family = "Times", size = 23)) +
  geom_tile(aes(fill = value)) + 
  scale_fill_gradient(low = muted("blue"), high = "red")
ggsave(p, filename = "heatmap.pdf", width = 15, 
       height = 10, dpi = 300, colormodel = "srgb")
image.png

Z-score 又称为标准分数,是一组数中的每个数减去这一组数的平均值再除以这一组数的标准差,代表的是原始分数距离原始平均值的距离,以标准差为单位。可以对不同分布的各原始分数进行比较,用来反映数据的相对变化趋势,而非绝对变化量。

# z-score 转换
data_scale = data_long
data_scale$value = as.numeric(prettyNum(data_scale$value, digits = 2))
p = ggplot(data_scale, aes(x = variable, y = ID)) +
  xlab("aa") + 
  ylab("pro") +
  theme_bw() +
  theme(panel.grid.major = element_blank()) +
  theme(legend.position = "right") +
  theme(axis.text = element_text(family = "Times", size = 18)) +
  theme(axis.title = element_text(family = "Times", size = 23)) +
  geom_tile(aes(fill = value)) + 
  scale_fill_gradient(low = muted("green"), high = "red")
ggsave(p, filename = "heatmap.pdf", width = 15, 
       height = 10, dpi = 300, colormodel = "srgb")
image.png

正常来讲,颜色的赋予在最小值到最大值之间是均匀分布的。如果最小值到最大值之间用 100 个颜色区分,则其中每一个 bin,不论其大小、有没有值都会赋予一个颜色。非线性颜色则是对数据比较小但密集的地方赋予更多颜色,数据大但分布散的地方赋予更少颜色,这样既能加大区分度,又最小的影响原始数值。通常可以根据数据模式,手动设置颜色区间。为了方便自动化处理,也可选择用四分位数的方式设置颜色区间。

# 非线性颜色
# 按照区间分割数据,原始数据替换为其所在区间的数值
summary_v = summary(data_long$value)
summary_v
# 在最小值和第一四分位数之间划出 6 个区间,第一四分位数和中位数之间划出 6 个区间,中位数和第三四分位数之间划出 5 个区间,最后的数划出 5 个区间
break_v = unique(c(seq(summary_v[1]*0.95, summary_v[2], length = 6),
                   seq(summary_v[2], summary_v[3], length = 6),
                   seq(summary_v[3], summary_v[5], length = 5),
                   seq(summary_v[5], summary_v[6], length = 5)))
# 按照设定的区间分割数据,原始数据替换为其所在的区间的数值

data2 = data_long
data2$value = cut(data2$value, breaks = break_v, 
                  labels = break_v[2:length(break_v)])
class(data2$value) # 此时的数据类型为 factor 了

# 产生对应的颜色
col_p = c("blue", "yellow", "red")
col = colorRampPalette(col_p)(length(break_v))
col
# 绘图
p = ggplot(data2, aes(x = variable, y = ID)) +
  xlab("aa") + 
  ylab("pro") +
  theme_bw() +
  theme(panel.grid.major = element_blank()) +
  theme(legend.position = "right") +
  theme(axis.text = element_text(family = "Times", size = 18)) +
  theme(axis.title = element_text(family = "Times", size = 23)) +
  geom_tile(aes(fill = value)) + 
  scale_fill_manual(values = col)
ggsave(p, filename = "heatmap.pdf", width = 15, 
       height = 10, dpi = 300, colormodel = "srgb")
image.png
调整行或列的顺序

如果想保持图中每一行的顺序与输入的数据框一致,需要设置因子的水平。这也是 ggplot2 中调整图例或横纵轴字符顺序的常用方式。

data_rowname <- rownames(data)
data_rowname <- as.vector(rownames(data))
data_rownames <- rev(data_rowname)
data_log_m$ID <- factor(data_log_m$ID, levels=data_rownames, ordered=T)

使用 pheatmap 绘制热图

绘制热图除了使用 ggplot2,还可以有其它的包或函数,比如 pheatmap::pheatmap (pheatmap 包中的 pheatmap函数)、gplots::heatmap.2 等。

相比于 ggplot2 作 heatmap, pheatmap 会更为简单一些,一个函数设置不同的参数,可以完成行列聚类、行列注释、Z-score 计算、颜色自定义等。

# pheatmap 绘制热图
install.packages("pheatmap")
library(pheatmap)
data = read.csv("pros_with_name.csv", header = T, row.names = 1)
data = data[1:22,2:22]
data = data[, 1:19]
rownames(data) = letters[1:nrow(data)]
pheatmap::pheatmap(data)
image.png
# z-score
pheatmap::pheatmap(data, scale = "row")
image.png
# 有时可能不需要行或列的聚类,原始展示就可以了。
pheatmap::pheatmap(data, scale="row", cluster_rows=FALSE, cluster_cols=FALSE)
image.png

给矩阵的行和列进行分组注释

# 给矩阵的行和列进行分组注释
row_anno = data.frame(type = c("1", "2", "3","4", "5","1", "2", "3","4", "5","1", "2", "3","4", "5","1", "2", "3","4","5","3", "4"),
                      class = c("1", "2", "3","4", "5","1", "2", "3","4", "5","1", "2", "3","4", "5","1", "2", "3","4","5","3", "4"),
                      row.names = rownames(data))
col_anno = data.frame(type = c("5", "4", "3", "2", "1","5", "4", "3", "2", "1","5", "4", "3", "2", "1","5", "4", "3", "2"),
                      class = c("5", "4", "3", "2", "1","5", "4", "3", "2", "1","5", "4", "3", "2", "1","5", "4", "3", "2"),
                      row.names = colnames(data))
pheatmap::pheatmap(data, scale="row", legend = T, annotation_legend = T,
                   cluster_rows=FALSE, annotation_col=col_anno, annotation_row=row_anno)

image.png
# 自定义颜色
pheatmap::pheatmap(data, scale="row", cluster_rows=FALSE,
                   annotation_col=col_anno, annotation_row=row_anno,
                   color=colorRampPalette(c('green','yellow','red'), bias=1)(50))
image.png

你可能感兴趣的:(【R】热图绘制)