想去这样的实验室,图源水印
需要下载两个文件:
cytoband文件
gencode注释文件
用到的包:
if(!require(GenomicRanges)){
BiocManager::install("GenomicRanges")
library(GenomicRanges)
}
文件下载到本地后,简单粗暴读取
cytobandfile <- read.csv("cytoBand.txt.gz", sep = "\t",header = F)
gencodeinfo <- read.csv("gencode_v29_human_gene_info",sep = "\t",header = F)
先对 cytobandfile
做些文本处理
cytobandfile$band <- gsub("[[:alpha:]]","",cytobandfile$V4)
cytobandfile$arm <- gsub("\\d.+","",cytobandfile$V4)
colnames(cytobandfile) <- c("chr","start","end", "stain","band","arm")
cytobandfile <- cytobandfile[,-4] ##其实不把 p,q 和数字分开似乎更方便
cytobandfile$gene_name <- NA
gencodeinfo
输入正确列名
colnames(gencodeinfo) <- c("gene_name", "gene_type", "gene_id",
"seqnames", "start", "end")
一步get时间到:
Chrloc2Sym <- function(cytobandfile,gencodeinfo,arm,band) {
## 构建cytoband GRanges
cytobandgr <- GRanges(seqnames = Rle(cytobandfile$chr),
ranges = IRanges(start = cytobandfile$start,
end = cytobandfile$end),
stain = cytobandfile$stain,
arm = cytobandfile$arm,
band = cytobandfile$band,
symbol = cytobandfile$gene_name)
## 构建gencodeinfo GRanges
gencodegr <- GRanges(seqnames = Rle(gencodeinfo$seqnames),
ranges = IRanges(start = gencodeinfo$start,
end = gencodeinfo$end),
type = gencodeinfo$gene_type,
ensemble = gencodeinfo$gene_id,
symbol = gencodeinfo$gene_name)
## overlap subsetting
subs <- subsetByOverlaps(cytobandgr,gencodegr)
## get the index
ovlporder <- match(subs@ranges,cytobandgr@ranges)
## merge the symbol
subs@elementMetadata@listData[["symbol"]] <- gencodegr@elementMetadata@listData[["symbol"]][ovlporder]
## band&arm to symbol
subs[subs@elementMetadata@listData[["arm"]]==arm & subs@elementMetadata@listData[["band"]]==band,"symbol"]
}
试试水:
Chrloc2Sym(cytobandfile = cytobandfile, gencodeinfo = gencodeinfo,arm = "p", band = "36.21")
# GRanges object with 1 range and 1 metadata column:
# seqnames ranges strand | symbol
# |
# [1] chr1 12500000-15900000 * | OR4G11P
# -------
# seqinfo: 595 sequences from an unspecified genome; no seqlengths
完美┏ (゜ω゜)=☞
References
最后,向大家隆重推荐生信技能树的一系列干货!
- 生信技能树全球公益巡讲:https://mp.weixin.qq.com/s/E9ykuIbc-2Ja9HOY0bn_6g
- B站公益74小时生信工程师教学视频合辑:https://mp.weixin.qq.com/s/IyFK7l_WBAiUgqQi8O7Hxw
- 招学徒:https://mp.weixin.qq.com/s/KgbilzXnFjbKKunuw7NVfw