『R画图脚本进阶』单个基因结构绘制


2020.11.10更新代码
emmm, 稍微想了下把那个辣眼睛的代码改的稍微不辣眼睛了一点...


果然需求是生产力驱动的第一要素.....以前没想过要整,现在又需求了,其实TBtools就可以很方便的绘制,不过还是手痒想用R实现一下。就用ggplot2撸出来了。国际惯例,上图

Gh_A08G039000.jpg

方法和原理

其实就是把gff文件可视化一下,看GFF文件的介绍:陈连福大佬的博客-GFF3格式介绍
我们发现GFF3包含了基因整体信息,所以就想**根据gff文件信息,提取UTR和cds信息用ggplot2geom_segment小片段一段一段的画出来。其实很简单。注意以下几个信息:

  • strand,方向性。
  • exon是否包含UTR。

所以鉴于这两点也是写了两个判断。直来直去的代码有点辣眼睛,不过没时间了,先解决问题,以后在慢慢美化ಥ_ಥ。

Script

更新后, 之前的不删除了,看最后能优化成什么样

drawGeneStructure = function(gff,size,lcolor){
  names(gff) = c("chr","source","type","start","end","score","strand","phase","attributes")
  chr = unique(gff$chr)
  strand = unique(gff$strand)
  a = filter(gff,type == "gene")$attributes
  desc = strsplit(a,split = ";")%>%unlist(.)
  tpm1 = filter(gff,type == "exon")
  tpm3 = filter(gff,type == "five_prime_UTR"| type == "three_prime_UTR")
  if (strand == "-"){
    tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
                      end = tpm1$start,
                      start = tpm1$end,
                      y = 1)
    xend = tpm2$end[1]-100
  } else {
    tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
                      start = tpm1$start,
                      end = tpm1$end,
                      y = 1)
    xend = tpm2$end[1]+100
  }
  p = ggplot(tpm2,aes(x = start,y = y))+
    geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
                 color = lcolor)+
    geom_segment(data = tpm2,
                 mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
                 size = size)+
    geom_segment(aes(x = tpm2$end[1],y = 0, xend = xend, yend = 0),
                 arrow = arrow(length = unit(0.2,"cm")),
                 color = "red")+
    xlab(chr)+
    theme(axis.ticks.y.left = element_blank(),
          axis.ticks.y = element_blank(),
          axis.line.y = element_blank(),
          axis.line.x.top = element_blank(),
          panel.grid = element_blank(),
          axis.text.y = element_blank(),
          plot.background = element_blank(),
          panel.background = element_blank(),
          axis.title.y = element_blank(),
          axis.line = element_line(colour = "black",size = 1),
          legend.position = "none")
  
  if(nrow(tpm3) != 0){
    p = p +
      geom_segment(data = tpm3,
                   mapping = aes(x = start,xend = end,y = 0, yend = 0),
                   size = size,
                   color = "grey")
  } else {
    p = p
  }
  return(p)
}

原始辣眼睛代码

drawGeneStructure = function(gff,size,lcolor){
  names(gff) = c("chr","source","type","start","end","score","strand","phase","attributes")
  chr = unique(gff$chr)
  strand = unique(gff$strand)
  a = filter(gff,type == "gene")$attributes
  desc = strsplit(a,split = ";")%>%unlist(.)
  tpm1 = filter(gff,type == "exon")
  tpm3 = filter(gff,type == "five_prime_UTR"| type == "three_prime_UTR")
  if (nrow(tpm3) != 0) {
    if (strand == "-") {
      tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
                        end = tpm1$start,
                        start = tpm1$end,
                        y = 1)
      tpm3 = data.frame(type = paste(tpm3$type,seq(1:nrow(tpm3)),sep = ""),
                        end = tpm3$start,
                        start = tpm3$end)
      p = ggplot(tpm2,aes(x = start,y = y))+
        geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
                     color = lcolor)+
        geom_segment(data = tpm2,
                     mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
                     size = size)+
        geom_segment(data = tpm3,
                     mapping = aes(x = start,xend = end,y = 0, yend = 0),
                     size = size,
                     color = "grey")+
        geom_segment(aes(x = tpm2$end[1],y = 0, xend = tpm2$end[1]-100, yend = 0),
                     arrow = arrow(length = unit(0.2,"cm")),
                     color = "red")+
        xlab(chr)+
        theme(axis.ticks.y.left = element_blank(),
              axis.ticks.y = element_blank(),
              axis.line.y = element_blank(),
              axis.line.x.top = element_blank(),
              panel.grid = element_blank(),
              axis.text.y = element_blank(),
              plot.background = element_blank(),
              panel.background = element_blank(),
              axis.title.y = element_blank(),
              axis.line = element_line(colour = "black",size = 1),
              legend.position = "none")
    } else {
      tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
                        start = tpm1$start,
                        end = tpm1$end,
                        y = 1)
      p = ggplot(tpm2,aes(x = start,y = y))+
        geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
                     color = lcolor)+
        geom_segment(data = tpm2,
                     mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
                     size = size)+
        geom_segment(data = tpm3,
                     mapping = aes(x = start,xend = end,y = 0, yend = 0),
                     size = size,
                     color = "grey")+
        geom_segment(aes(x = tpm2$end[1],y = 0, xend = tpm2$end[1]+100, yend = 0),
                     arrow = arrow(length = unit(0.2,"cm")),
                     color = "red")+
        xlab(chr)+
        theme(axis.ticks.y.left = element_blank(),
              axis.ticks.y = element_blank(),
              axis.line.y = element_blank(),
              axis.line.x.top = element_blank(),
              panel.grid = element_blank(),
              axis.text.y = element_blank(),
              plot.background = element_blank(),
              panel.background = element_blank(),
              axis.title.y = element_blank(),
              axis.line = element_line(colour = "black",size = 1),
              legend.position = "none")
    }
  } else{
    if (strand == "-") {
      tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
                        end = tpm1$start,
                        start = tpm1$end,
                        y = 1)
      p = ggplot(tpm2,aes(x = start,y = y))+
        geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
                     color = lcolor)+
        geom_segment(data = tpm2,
                     mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
                     size = size)+
        geom_segment(aes(x = tpm2$end[1],y = 0, xend = tpm2$end[1]-100, yend = 0),
                     arrow = arrow(length = unit(0.2,"cm")),
                     color = "red")+
        xlab(chr)+
        theme(axis.ticks.y.left = element_blank(),
              axis.ticks.y = element_blank(),
              axis.line.y = element_blank(),
              axis.line.x.top = element_blank(),
              panel.grid = element_blank(),
              axis.text.y = element_blank(),
              plot.background = element_blank(),
              panel.background = element_blank(),
              axis.title.y = element_blank(),
              axis.line = element_line(colour = "black",size = 1),
              legend.position = "none")
    } else {
      tpm2 = data.frame(type = paste(tpm1$type,seq(1:nrow(tpm1)),sep = ""),
                        start = tpm1$start,
                        end = tpm1$end,
                        y = 1)
      p = ggplot(tpm2,aes(x = start,y = y))+
        geom_segment(aes(x = tpm2$start[1],y = 0, xend = tpm2$end[nrow(tpm2)], yend = 0),
                     color = lcolor)+
        geom_segment(data = tpm2,
                     mapping = aes(x = start,xend = end,y = 0, yend = 0,color = type),
                     size = size)+
        geom_segment(aes(x = tpm2$end[1],y = 0, xend = tpm2$end[1]+100, yend = 0),
                     arrow = arrow(length = unit(0.2,"cm")),
                     color = "red")+
        xlab(chr)+
        theme(axis.ticks.y.left = element_blank(),
              axis.ticks.y = element_blank(),
              axis.line.y = element_blank(),
              axis.line.x.top = element_blank(),
              panel.grid = element_blank(),
              axis.text.y = element_blank(),
              plot.background = element_blank(),
              panel.background = element_blank(),
              axis.title.y = element_blank(),
              axis.line = element_line(colour = "black",size = 1),
              legend.position = "none")
    }
  }
  
return(p)
}

Usage

  • gff3文件提取你要画的基因的结构,如果没有gff3,按照下面的自己写一个df也行:
# 提取
grep "Gh_A08G039000" gh.criv2.gff3
# 提取结果:
A08     EVM     gene    4148620 4154669 .       -       .       ID=Gh_A08G039000;Name=RH8;description=DEAD-box ATP-dependent RNA helicase 8 ;
A08     EVM     mRNA    4148620 4154669 .       -       .       ID=Gh_A08G039000.1;Parent=Gh_A08G039000
A08     EVM     exon    4148620 4148965 .       -       .       Parent=Gh_A08G039000.1
A08     EVM     three_prime_UTR 4148620 4148965 .       -       .       Parent=Gh_A08G039000.1
A08     EVM     three_prime_UTR 4149091 4149130 .       -       .       Parent=Gh_A08G039000.1
A08     EVM     exon    4149091 4149209 .       -       .       Parent=Gh_A08G039000.1
A08     EVM     CDS     4149131 4149209 .       -       1       Parent=Gh_A08G039000.1
A08     EVM     exon    4149308 4149381 .       -       .       Parent=Gh_A08G039000.1
A08     EVM     CDS     4149308 4149381 .       -       0       Parent=Gh_A08G039000.1
A08     EVM     exon    4149465 4149553 .       -       .       Parent=Gh_A08G039000.1
A08     EVM     CDS     4149465 4149553 .       -       2       Parent=Gh_A08G039000.1
A08     EVM     exon    4150111 4150291 .       -       .       Parent=Gh_A08G039000.1
A08     EVM     CDS     4150111 4150291 .       -       0       Parent=Gh_A08G039000.1
A08     EVM     exon    4150367 4150618 .       -       .       Parent=Gh_A08G039000.1
A08     EVM     CDS     4150367 4150618 .       -       0       Parent=Gh_A08G039000.1
A08     EVM     exon    4151917 4152155 .       -       .       Parent=Gh_A08G039000.1
A08     EVM     CDS     4151917 4152155 .       -       2       Parent=Gh_A08G039000.1
A08     EVM     exon    4152683 4152917 .       -       .       Parent=Gh_A08G039000.1
A08     EVM     CDS     4152683 4152917 .       -       0       Parent=Gh_A08G039000.1
A08     EVM     exon    4153404 4153464 .       -       .       Parent=Gh_A08G039000.1
A08     EVM     CDS     4153404 4153464 .       -       1       Parent=Gh_A08G039000.1
A08     EVM     CDS     4154015 4154283 .       -       0       Parent=Gh_A08G039000.1
A08     EVM     exon    4154015 4154669 .       -       .       Parent=Gh_A08G039000.1
A08     EVM     five_prime_UTR  4154284 4154669 .       -       .       Parent=Gh_A08G039000.1
  • 画图
    参数解释:
    • gff: 就是上面你提取出来的基因gff信息
    • size: 就是图中exon的高度,size越大高度越大,建议size = 4
    • lcolor: 中间那条黑线的颜色,这个看你爱好
drawGeneStructure(gff = gff, lcolor = "black", size = 4)

完工...
为了保住小徽章我也是醉了....

你可能感兴趣的:(『R画图脚本进阶』单个基因结构绘制)