这几年,我也陆陆续续在paper中看到一些环形的富集图,有时候也挺好的,今天我们也来学习一下画法,继续练习circlize的技巧。
这次,我找了一个经常做的GO富集的结果拿来做测试。
circlize包提供了一些专门的基因组绘图函数,让基因组分析更加简单方便,如:
circos.genomicTrack(): 添加轨迹和图形
circos.genomicPoints(): 添加点
circos.genomicLines(): 添加线条或线段
circos.genomicRect(): 添加矩形
circos.genomicText(): 添加文本
circos.genomicLink(): 添加连接
这样函数与基础的绘制函数是类似的,只是接受的输入数据格式不同,都是基于基础的circlize绘图函数实现的(如circos.track(),circos.points()等)。
library(grid)
library(graphics)
library(ComplexHeatmap)
data <- read.table("data.txt",header = T,sep="\t")
data_new <- data[,c(2,1,4,5,6)]
# 通路总基因数:min -- 0、 max -- 生物的总基因数、rich表示该通路中的基因数;
data_new$gene_num.min <- 0
# -log10 p值
data_new$"-log10Pvalue" <- -log10(data$pvalue)
#data_new$up.regulated <- round(data$GeneNum*data$UpRatio)
#data_new$down.regulated <- data_new$GeneNum-data_new$up.regulated
data_new$up.regulated <-round(data$UpRatio,2)
data_new$down.regulated <-round(data$DownRatio,2)
colnames(data_new)[c(1,2,3,4)] <- c("id", "category","gene_num.rich","gene_num.max")
id,富集GO的ID;
category,富集通路所属的高级分类;
gene_num.min和gene_num.max,gene_num.min均为0,gene_num.max为目标通路中所含基因(背景基因)总数目;
gene_num.rich,富集到目标通路的基因数量,也就是差异基因在这个GO上的数目;
-log10Pvalue,富集分析的p值,已做了-log10转换处理;
up.regulated和down.regulated,富集到该通路中的基因中,显著上调和下调基因的数量比例;
Ratio,各个通路的富集因子,或者富集得分,已标准化至[0,1]区间。
然后我们BP,MF,CC各取了top6的来显示。
dat<- bind_rows(
data_new %>%filter(category == 'BP') %>%arrange('-log10Pvalue') %>% head(6),
data_new %>%filter(category == 'MF') %>%arrange('-log10Pvalue') %>% head(6),
data_new %>%filter(category == 'CC') %>%arrange('-log10Pvalue') %>% head(6),
)
dat$id <- factor(dat$id, levels = dat$id) #变成因子,是可以按照我们先要的顺序来显示
rownames(dat)<-dat$id
首先开始绘制,第一个圈,也就是表征GO ID的圈,圈的大小有gene_num.max来决定。
# 第一个圈:绘制id
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max')]
ko_color <- c(rep('#F7CC13',6), rep('#954572',6), rep('#0796E0',6)) #各分类的颜色和数目
circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1)
circos.track(
ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,
panel.fun = function(x, y) {
ylim = get.cell.meta.data('ycenter')
xlim = get.cell.meta.data('xcenter')
sector.name = get.cell.meta.data('sector.index')
circos.axis(h = 'top', labels.cex = 0.4, labels.niceFacing = FALSE)
circos.text(xlim, ylim, sector.name, cex = 0.6,col="white",niceFacing = FALSE)
} )
# 第二圈,绘制富集的基因和富集p值
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]
plot_data$gene_num.rich[16] <- 901 # 为了显示的更清晰,手动改了3个值
plot_data$gene_num.rich[11] <- 1023
plot_data$gene_num.rich[3] <- 1201
label_data <- dat['gene_num.rich']
p_max <- round(max(dat$'-log10Pvalue')) + 1
colorsChoice <- colorRampPalette(c('white', 'blue'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE, #圈图的高度、颜色等设置
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...) #区块的长度反映了富集基因的数量,颜色与 p 值有关
ylim = get.cell.meta.data('ycenter')
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),1] #ylim、xlim、sector.name 等用于指定文字标签(富集基因数量)添加的合适坐标
circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE) #将文字标签添(富集基因数量)加在图中指定位置处
} )
# 第三圈,绘制上下调基因的比例
dat$up <- dat$up.regulated * dat$gene_num.max
plot_data_up <- dat[c('id', 'gene_num.min', 'up')]
names(plot_data_up) <- c('id', 'start', 'end')
plot_data_up$type <- 1
dat$down <- dat$down.regulated * dat$gene_num.max + dat$up
plot_data_down <- dat[c('id', 'up', 'down')]
names(plot_data_down) <- c('id', 'start', 'end')
plot_data_down$type <- 2
#选择作图数据集(作图用)、标签数据集(添加相应的文字标识用),并分别为上下调基因赋值不同颜色
plot_data <- rbind(plot_data_up, plot_data_down)
label_data <- dat[c('up', 'down', 'up.regulated', 'down.regulated')]
color_assign <- colorRamp2(breaks = c(1, 2), col = c('red', 'blue'))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('cell.bottom.radius') - 0.5
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),3]
circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE) #将文字标签(上调基因比例)添加在图中指定位置处
xlim = (label_data[get.cell.meta.data('sector.index'),2]+label_data[get.cell.meta.data('sector.index'),1]) / 2
sector.name = label_data[get.cell.meta.data('sector.index'),4]
circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE) #将下调基因比例的标签也添加在图中
} )
# 第四圈,绘制富集因子
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max', 'Ratio')]
plot_data$Ratio <- plot_data$Ratio *10
label_data <- dat['category']
color_assign <- c('BP' = '#F7CC13', 'CC' = '#954572', 'MF' = '#0796E0')#各二级分类的名称和颜色
circos.genomicTrack(
plot_data, ylim = c(0, 1), track.height = 0.3, bg.col = 'gray95', bg.border = NA,
panel.fun = function(region, value, ...) {
sector.name = get.cell.meta.data('sector.index') #sector.name 用于提取 GO id 名称,并添加在下一句中匹配 GO对应的高级分类,以分配颜色
circos.genomicRect(region, value, col = color_assign[label_data[sector.name,1]], border = NA, ytop.column = 1, ybottom = 0, ...) #绘制矩形区块,高度代表富集因子数值,颜色代表 GO的分类
circos.lines(c(0, max(region)), c(0.5, 0.5), col = 'gray', lwd = 0.3) #在富集因子等于 0.5 的位置处添加一个灰线
} )
下面我们来添加legend。但是,circlize包绘制的圈图是没有图例的。若有需要,您可以选择用AI、PS等工具手动绘制图例,也可以借助其它一些R包实现,例如ComplexHeatmap包。
是可以直接产生legend,可以AI添加,也可以直接添加到图片上。
category_legend <- Legend(
title="Category",
labels = c('BP', 'CC', 'MF'),#各二级分类的名称
type = 'points', pch = NA, background = c('#F7CC13', '#954572', '#0796E0'), #各二级分类的颜色
labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'),
nr=1,
title_gp = gpar(fontsize = 9),title_position = 'topleft')
updown_legend <- Legend(
title="Differential expressed",
labels = c('Up-regulated', 'Down-regulated'),
type = 'points', pch = NA, background = c('red', 'blue'),
labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'),
#nr=1,
title_gp = gpar(fontsize = 9),title_position = 'topleft')
pvalue_legend <- Legend(
title = '-Log10(Pvalue)',
col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0),
colorRampPalette(c('#FF906F', '#861D30'))(6)),
legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8),
direction = "horizontal",
title_gp = gpar(fontsize = 9), title_position = 'topleft')
lgd_list_vertical <- packLegend(category_legend, updown_legend, pvalue_legend) #里面有很多参数可以调整几个图例的排版
#grid.draw(lgd_list_vertical)
draw(lgd_list_vertical)
总的代码如下:
library(circlize)
library(grid)
library(graphics)
library(ComplexHeatmap)
data <- read.table("data.txt",header = T,sep="\t")
data_new <- data[,c(2,1,4,5,6)]
# 通路总基因数:min -- 0、 max -- 生物的总基因数
data_new$gene_num.min <- 0
# -log10 p值
data_new$"-log10Pvalue" <- -log10(data$pvalue)
#data_new$up.regulated <- round(data$GeneNum*data$UpRatio)
#data_new$down.regulated <- data_new$GeneNum-data_new$up.regulated
data_new$up.regulated <-round(data$UpRatio,2)
data_new$down.regulated <-round(data$DownRatio,2)
colnames(data_new)[c(1,2,3,4)] <- c("id", "category","gene_num.rich","gene_num.max")
dat<- bind_rows(
data_new %>%filter(category == 'BP') %>%arrange('-log10Pvalue') %>% head(6),
data_new %>%filter(category == 'MF') %>%arrange('-log10Pvalue') %>% head(6),
data_new %>%filter(category == 'CC') %>%arrange('-log10Pvalue') %>% head(6),
)
dat$id <- factor(dat$id, levels = dat$id)
rownames(dat)<-dat$id
pdf('circlize.pdf', width = 12, height = 15)
# 第一个圈:绘制id
circle_size = unit(1, 'snpc')
circos.par(gap.degree = 0.5, start.degree = 90)
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max')]
ko_color <- c(rep('#F7CC13',6), rep('#954572',6), rep('#0796E0',6)) #各二级分类的颜色和数目
circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1)
circos.track(
ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color,
panel.fun = function(x, y) {
ylim = get.cell.meta.data('ycenter')
xlim = get.cell.meta.data('xcenter')
sector.name = get.cell.meta.data('sector.index')
circos.axis(h = 'top', labels.cex = 0.4, labels.niceFacing = FALSE)
circos.text(xlim, ylim, sector.name, cex = 0.6,col="white",niceFacing = FALSE)
} )
# 第二圈,绘制富集的基因和富集p值
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]
plot_data$gene_num.rich[16] <- 901 # 为了显示的更清晰,手动改了3个值
plot_data$gene_num.rich[11] <- 1023
plot_data$gene_num.rich[3] <- 1201
label_data <- dat['gene_num.rich']
p_max <- round(max(dat$'-log10Pvalue')) + 1
#colorsChoice <- colorRampPalette(c('#FF906F', '#861D30'))
colorsChoice <- colorRampPalette(c('white', 'blue'))
color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('ycenter')
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),1]
circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)
} )
# 第三圈,绘制上下调基因
dat$up <- dat$up.regulated * dat$gene_num.max
plot_data_up <- dat[c('id', 'gene_num.min', 'up')]
names(plot_data_up) <- c('id', 'start', 'end')
plot_data_up$type <- 1
dat$down <- dat$down.regulated * dat$gene_num.max + dat$up
plot_data_down <- dat[c('id', 'up', 'down')]
names(plot_data_down) <- c('id', 'start', 'end')
plot_data_down$type <- 2
plot_data <- rbind(plot_data_up, plot_data_down)
label_data <- dat[c('up', 'down', 'up.regulated', 'down.regulated')]
color_assign <- colorRamp2(breaks = c(1, 2), col = c('red', 'blue'))
circos.genomicTrackPlotRegion(
plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,
panel.fun = function(region, value, ...) {
circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)
ylim = get.cell.meta.data('cell.bottom.radius') - 0.5
xlim = label_data[get.cell.meta.data('sector.index'),1] / 2
sector.name = label_data[get.cell.meta.data('sector.index'),3]
circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)
xlim = (label_data[get.cell.meta.data('sector.index'),2]+label_data[get.cell.meta.data('sector.index'),1]) / 2
sector.name = label_data[get.cell.meta.data('sector.index'),4]
circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)
} )
# 第四圈,绘制富集因子
plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max', 'Ratio')]
plot_data$Ratio <- plot_data$Ratio *10
label_data <- dat['category']
color_assign <- c('BP' = '#F7CC13', 'CC' = '#954572', 'MF' = '#0796E0')#各二级分类的名称和颜色
circos.genomicTrack(
plot_data, ylim = c(0, 1), track.height = 0.3, bg.col = 'gray95', bg.border = NA,
panel.fun = function(region, value, ...) {
sector.name = get.cell.meta.data('sector.index')
circos.genomicRect(region, value, col = color_assign[label_data[sector.name,1]], border = NA, ytop.column = 1, ybottom = 0, ...)
circos.lines(c(0, max(region)), c(0.5, 0.5), col = 'gray', lwd = 0.3)
} )
category_legend <- Legend(
title="Category",
labels = c('BP', 'CC', 'MF'),#各二级分类的名称
type = 'points', pch = NA, background = c('#F7CC13', '#954572', '#0796E0'), #各二级分类的颜色
labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'),
nr=1,
title_gp = gpar(fontsize = 9),title_position = 'topleft')
updown_legend <- Legend(
title="Differential expressed",
labels = c('Up-regulated', 'Down-regulated'),
type = 'points', pch = NA, background = c('red', 'blue'),
labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'),
#nr=1,
title_gp = gpar(fontsize = 9),title_position = 'topleft')
pvalue_legend <- Legend(
title = '-Log10(Pvalue)',
col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0),
colorRampPalette(c('#FF906F', '#861D30'))(6)),
legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8),
direction = "horizontal",
title_gp = gpar(fontsize = 9), title_position = 'topleft')
lgd_list_vertical <- packLegend(category_legend, updown_legend, pvalue_legend)
#grid.draw(lgd_list_vertical)
draw(lgd_list_vertical)
dev.off()