【R画图学习11.3】富集圈图---circlize

这几年,我也陆陆续续在paper中看到一些环形的富集图,有时候也挺好的,今天我们也来学习一下画法,继续练习circlize的技巧。

这次,我找了一个经常做的GO富集的结果拿来做测试。

circlize包提供了一些专门的基因组绘图函数,让基因组分析更加简单方便,如:

circos.genomicTrack(): 添加轨迹和图形

circos.genomicPoints(): 添加点

circos.genomicLines(): 添加线条或线段

circos.genomicRect(): 添加矩形

circos.genomicText(): 添加文本

circos.genomicLink(): 添加连接

这样函数与基础的绘制函数是类似的,只是接受的输入数据格式不同,都是基于基础的circlize绘图函数实现的(如circos.track(),circos.points()等)。


library(grid)

library(graphics)

library(ComplexHeatmap)

data <- read.table("data.txt",header = T,sep="\t")

data_new <- data[,c(2,1,4,5,6)]

# 通路总基因数:min -- 0、 max -- 生物的总基因数、rich表示该通路中的基因数;

data_new$gene_num.min <- 0

# -log10 p值

data_new$"-log10Pvalue" <- -log10(data$pvalue)

#data_new$up.regulated <- round(data$GeneNum*data$UpRatio)

#data_new$down.regulated <- data_new$GeneNum-data_new$up.regulated

data_new$up.regulated  <-round(data$UpRatio,2)

data_new$down.regulated  <-round(data$DownRatio,2)

colnames(data_new)[c(1,2,3,4)] <- c("id", "category","gene_num.rich","gene_num.max")

id,富集GO的ID;

category,富集通路所属的高级分类;

gene_num.min和gene_num.max,gene_num.min均为0,gene_num.max为目标通路中所含基因(背景基因)总数目;

gene_num.rich,富集到目标通路的基因数量,也就是差异基因在这个GO上的数目;

-log10Pvalue,富集分析的p值,已做了-log10转换处理;

up.regulated和down.regulated,富集到该通路中的基因中,显著上调和下调基因的数量比例;

Ratio,各个通路的富集因子,或者富集得分,已标准化至[0,1]区间。

然后我们BP,MF,CC各取了top6的来显示。

dat<- bind_rows(

data_new %>%filter(category == 'BP') %>%arrange('-log10Pvalue') %>% head(6),

data_new %>%filter(category == 'MF') %>%arrange('-log10Pvalue') %>% head(6),

data_new %>%filter(category == 'CC') %>%arrange('-log10Pvalue') %>% head(6),

)

dat$id <- factor(dat$id, levels = dat$id)  #变成因子,是可以按照我们先要的顺序来显示

rownames(dat)<-dat$id

首先开始绘制,第一个圈,也就是表征GO ID的圈,圈的大小有gene_num.max来决定。

# 第一个圈:绘制id

circle_size = unit(1, 'snpc')

circos.par(gap.degree = 0.5, start.degree = 90)

plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max')]

ko_color <- c(rep('#F7CC13',6), rep('#954572',6), rep('#0796E0',6)) #各分类的颜色和数目

circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1)   

circos.track(

  ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color, 

  panel.fun = function(x, y) {

    ylim = get.cell.meta.data('ycenter') 

    xlim = get.cell.meta.data('xcenter')

    sector.name = get.cell.meta.data('sector.index') 

    circos.axis(h = 'top', labels.cex = 0.4, labels.niceFacing = FALSE)

    circos.text(xlim, ylim, sector.name, cex = 0.6,col="white",niceFacing = FALSE) 

  } )


# 第二圈,绘制富集的基因和富集p值

plot_data <- dat[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]

plot_data$gene_num.rich[16] <- 901  # 为了显示的更清晰,手动改了3个值

plot_data$gene_num.rich[11] <- 1023

plot_data$gene_num.rich[3] <- 1201

label_data <- dat['gene_num.rich'] 

p_max <- round(max(dat$'-log10Pvalue')) + 1 

colorsChoice <- colorRampPalette(c('white', 'blue')) 

color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(

  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,  #圈图的高度、颜色等设置

  panel.fun = function(region, value, ...) {

    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...)   #区块的长度反映了富集基因的数量,颜色与 p 值有关

    ylim = get.cell.meta.data('ycenter') 

    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2

    sector.name = label_data[get.cell.meta.data('sector.index'),1]  #ylim、xlim、sector.name 等用于指定文字标签(富集基因数量)添加的合适坐标

    circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)  #将文字标签添(富集基因数量)加在图中指定位置处

  } )

# 第三圈,绘制上下调基因的比例

dat$up <- dat$up.regulated * dat$gene_num.max

plot_data_up <- dat[c('id', 'gene_num.min', 'up')]

names(plot_data_up) <- c('id', 'start', 'end')

plot_data_up$type <- 1 

dat$down <- dat$down.regulated * dat$gene_num.max + dat$up

plot_data_down <- dat[c('id', 'up', 'down')]

names(plot_data_down) <- c('id', 'start', 'end')

plot_data_down$type <- 2 

#选择作图数据集(作图用)、标签数据集(添加相应的文字标识用),并分别为上下调基因赋值不同颜色

plot_data <- rbind(plot_data_up, plot_data_down)

label_data <- dat[c('up', 'down', 'up.regulated', 'down.regulated')]

color_assign <- colorRamp2(breaks = c(1, 2), col = c('red', 'blue'))

circos.genomicTrackPlotRegion(

  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,

  panel.fun = function(region, value, ...) {

    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...) 

    ylim = get.cell.meta.data('cell.bottom.radius') - 0.5

    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2

    sector.name = label_data[get.cell.meta.data('sector.index'),3]

    circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)  #将文字标签(上调基因比例)添加在图中指定位置处

    xlim = (label_data[get.cell.meta.data('sector.index'),2]+label_data[get.cell.meta.data('sector.index'),1]) / 2

    sector.name = label_data[get.cell.meta.data('sector.index'),4]

    circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE)  #将下调基因比例的标签也添加在图中

  } )

# 第四圈,绘制富集因子

plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max', 'Ratio')]

plot_data$Ratio <- plot_data$Ratio *10

label_data <- dat['category'] 

color_assign <- c('BP' = '#F7CC13', 'CC' = '#954572', 'MF' = '#0796E0')#各二级分类的名称和颜色

circos.genomicTrack(

  plot_data, ylim = c(0, 1), track.height = 0.3, bg.col = 'gray95', bg.border = NA, 

  panel.fun = function(region, value, ...) {

    sector.name = get.cell.meta.data('sector.index')   #sector.name 用于提取 GO id 名称,并添加在下一句中匹配 GO对应的高级分类,以分配颜色

    circos.genomicRect(region, value, col = color_assign[label_data[sector.name,1]], border = NA, ytop.column = 1, ybottom = 0, ...)  #绘制矩形区块,高度代表富集因子数值,颜色代表 GO的分类

    circos.lines(c(0, max(region)), c(0.5, 0.5), col = 'gray', lwd = 0.3)  #在富集因子等于 0.5 的位置处添加一个灰线

  } )

下面我们来添加legend。但是,circlize包绘制的圈图是没有图例的。若有需要,您可以选择用AI、PS等工具手动绘制图例,也可以借助其它一些R包实现,例如ComplexHeatmap包。

是可以直接产生legend,可以AI添加,也可以直接添加到图片上。

category_legend <- Legend(

  title="Category",

  labels = c('BP', 'CC', 'MF'),#各二级分类的名称

  type = 'points', pch = NA, background = c('#F7CC13', '#954572', '#0796E0'), #各二级分类的颜色

  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'),

  nr=1,

  title_gp = gpar(fontsize = 9),title_position = 'topleft')

updown_legend <- Legend(

  title="Differential expressed",

  labels = c('Up-regulated', 'Down-regulated'),

  type = 'points', pch = NA, background = c('red', 'blue'),

  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'),

  #nr=1,

  title_gp = gpar(fontsize = 9),title_position = 'topleft')

pvalue_legend <- Legend(

  title = '-Log10(Pvalue)',

  col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0),

                      colorRampPalette(c('#FF906F', '#861D30'))(6)),

  legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8),

  direction = "horizontal",

  title_gp = gpar(fontsize = 9), title_position = 'topleft')

lgd_list_vertical <- packLegend(category_legend, updown_legend, pvalue_legend)  #里面有很多参数可以调整几个图例的排版

#grid.draw(lgd_list_vertical)

draw(lgd_list_vertical)

总的代码如下:

library(circlize)

library(grid)

library(graphics)

library(ComplexHeatmap)

data <- read.table("data.txt",header = T,sep="\t")

data_new <- data[,c(2,1,4,5,6)]

# 通路总基因数:min -- 0、 max -- 生物的总基因数

data_new$gene_num.min <- 0

# -log10 p值

data_new$"-log10Pvalue" <- -log10(data$pvalue)

#data_new$up.regulated <- round(data$GeneNum*data$UpRatio)

#data_new$down.regulated <- data_new$GeneNum-data_new$up.regulated

data_new$up.regulated  <-round(data$UpRatio,2)

data_new$down.regulated  <-round(data$DownRatio,2)

colnames(data_new)[c(1,2,3,4)] <- c("id", "category","gene_num.rich","gene_num.max")

dat<- bind_rows(

data_new %>%filter(category == 'BP') %>%arrange('-log10Pvalue') %>% head(6),

data_new %>%filter(category == 'MF') %>%arrange('-log10Pvalue') %>% head(6),

data_new %>%filter(category == 'CC') %>%arrange('-log10Pvalue') %>% head(6),

)

dat$id <- factor(dat$id, levels = dat$id)

rownames(dat)<-dat$id

pdf('circlize.pdf', width = 12, height = 15)

# 第一个圈:绘制id

circle_size = unit(1, 'snpc')

circos.par(gap.degree = 0.5, start.degree = 90)

plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max')]

ko_color <- c(rep('#F7CC13',6), rep('#954572',6), rep('#0796E0',6)) #各二级分类的颜色和数目

circos.genomicInitialize(plot_data, plotType = NULL, major.by = 1)

circos.track(

  ylim = c(0, 1), track.height = 0.05, bg.border = NA, bg.col = ko_color, 

  panel.fun = function(x, y) {

    ylim = get.cell.meta.data('ycenter') 

    xlim = get.cell.meta.data('xcenter')

    sector.name = get.cell.meta.data('sector.index') 

    circos.axis(h = 'top', labels.cex = 0.4, labels.niceFacing = FALSE)

    circos.text(xlim, ylim, sector.name, cex = 0.6,col="white",niceFacing = FALSE) 

  } )

# 第二圈,绘制富集的基因和富集p值

plot_data <- dat[c('id', 'gene_num.min', 'gene_num.rich', '-log10Pvalue')]

plot_data$gene_num.rich[16] <- 901  # 为了显示的更清晰,手动改了3个值

plot_data$gene_num.rich[11] <- 1023

plot_data$gene_num.rich[3] <- 1201

label_data <- dat['gene_num.rich'] 

p_max <- round(max(dat$'-log10Pvalue')) + 1 

#colorsChoice <- colorRampPalette(c('#FF906F', '#861D30')) 

colorsChoice <- colorRampPalette(c('white', 'blue')) 

color_assign <- colorRamp2(breaks = 0:p_max, col = colorsChoice(p_max + 1))

circos.genomicTrackPlotRegion(

  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE, 

  panel.fun = function(region, value, ...) {

    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...) 

    ylim = get.cell.meta.data('ycenter') 

    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2

    sector.name = label_data[get.cell.meta.data('sector.index'),1]

    circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE) 

  } )

# 第三圈,绘制上下调基因

dat$up <- dat$up.regulated * dat$gene_num.max

plot_data_up <- dat[c('id', 'gene_num.min', 'up')]

names(plot_data_up) <- c('id', 'start', 'end')

plot_data_up$type <- 1 

dat$down <- dat$down.regulated * dat$gene_num.max + dat$up

plot_data_down <- dat[c('id', 'up', 'down')]

names(plot_data_down) <- c('id', 'start', 'end')

plot_data_down$type <- 2 

plot_data <- rbind(plot_data_up, plot_data_down)

label_data <- dat[c('up', 'down', 'up.regulated', 'down.regulated')]

color_assign <- colorRamp2(breaks = c(1, 2), col = c('red', 'blue'))

circos.genomicTrackPlotRegion(

  plot_data, track.height = 0.08, bg.border = NA, stack = TRUE,

  panel.fun = function(region, value, ...) {

    circos.genomicRect(region, value, col = color_assign(value[[1]]), border = NA, ...) 

    ylim = get.cell.meta.data('cell.bottom.radius') - 0.5

    xlim = label_data[get.cell.meta.data('sector.index'),1] / 2

    sector.name = label_data[get.cell.meta.data('sector.index'),3]

    circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE) 

    xlim = (label_data[get.cell.meta.data('sector.index'),2]+label_data[get.cell.meta.data('sector.index'),1]) / 2

    sector.name = label_data[get.cell.meta.data('sector.index'),4]

    circos.text(xlim, ylim, sector.name, cex = 0.4, niceFacing = FALSE) 

  } )

# 第四圈,绘制富集因子

plot_data <- dat[c('id', 'gene_num.min', 'gene_num.max', 'Ratio')]

plot_data$Ratio <- plot_data$Ratio *10

label_data <- dat['category'] 

color_assign <- c('BP' = '#F7CC13', 'CC' = '#954572', 'MF' = '#0796E0')#各二级分类的名称和颜色

circos.genomicTrack(

  plot_data, ylim = c(0, 1), track.height = 0.3, bg.col = 'gray95', bg.border = NA, 

  panel.fun = function(region, value, ...) {

    sector.name = get.cell.meta.data('sector.index') 

    circos.genomicRect(region, value, col = color_assign[label_data[sector.name,1]], border = NA, ytop.column = 1, ybottom = 0, ...) 

    circos.lines(c(0, max(region)), c(0.5, 0.5), col = 'gray', lwd = 0.3) 

  } )

category_legend <- Legend(

  title="Category",

  labels = c('BP', 'CC', 'MF'),#各二级分类的名称

  type = 'points', pch = NA, background = c('#F7CC13', '#954572', '#0796E0'), #各二级分类的颜色

  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'),

  nr=1,

  title_gp = gpar(fontsize = 9),title_position = 'topleft')

updown_legend <- Legend(

  title="Differential expressed",

  labels = c('Up-regulated', 'Down-regulated'),

  type = 'points', pch = NA, background = c('red', 'blue'),

  labels_gp = gpar(fontsize = 8), grid_height = unit(0.5, 'cm'), grid_width = unit(0.5, 'cm'),

  #nr=1,

  title_gp = gpar(fontsize = 9),title_position = 'topleft')

pvalue_legend <- Legend(

  title = '-Log10(Pvalue)',

  col_fun = colorRamp2(round(seq(0, p_max, length.out = 6), 0),

                      colorRampPalette(c('#FF906F', '#861D30'))(6)),

  legend_height = unit(3, 'cm'), labels_gp = gpar(fontsize = 8),

  direction = "horizontal",

  title_gp = gpar(fontsize = 9), title_position = 'topleft')

lgd_list_vertical <- packLegend(category_legend, updown_legend, pvalue_legend)

#grid.draw(lgd_list_vertical)

draw(lgd_list_vertical)

dev.off()

你可能感兴趣的:(【R画图学习11.3】富集圈图---circlize)