参考:
以水稻为例教你如何使用BSA方法进行遗传定位(下篇) - (jianshu.com)
使用QTLseqr进行BSA-seq分析 - (jianshu.com)
- 加上拟合线
文献中有拟合线(黑色的那个),如何按照每1Mb为窗口,每次移动10kb计算均值
以KY0DN1为例
calcValueByWindow <- function(pos, value,
window_size = 1000000,
step_size = 100000){
# 找到最大位置
max_pos <- max(pos)
# 构建窗口
#window_start:seq等差数列,以最大位置加窗口大小作为最大值,步长为公差
#window_end:起始位置加步长
window_start <- seq(0, max_pos + window_size,step_size)
window_end <- window_start + step_size
mean_value <- vector(mode = "numeric", length = length(window_start))
# 选择窗口内的值
for (j in seq_along(window_start)){
pos_in_window <- which(pos > window_start[j] &
pos < window_end[j])
value_in_window <- value[pos_in_window]
mean_value[j] <- mean(value_in_window)
}
# remove the Not A Number position
nan_pos <- is.nan(mean_value)
mean_value <- mean_value[! nan_pos]
window_pos <- ((window_start + window_end)/ 2)[!nan_pos]
df <- data.frame(pos = window_pos,
value = mean_value)
return(df)
}
par(mfrow = c(3,4))
for (i in paste0("chr", formatC(1:12, width = 2, flag=0)) ){
freq_flt <- freq2[grepl(i,row.names(freq2)), ]
pos <- as.numeric(substring(row.names(freq_flt), 7))
snp_index <- freq_flt[,1] - freq_flt[,2]
# bin
df <- calcValueByWindow(pos = pos, value = snp_index)
plot(x = pos, y =snp_index,
ylim = c(-1,1),
pch = 20, cex = 0.2,
xlab = i,
ylab = expression(paste(Delta, " " ,"SNP index")))
lines(x = df$pos, y = df$value, col = "red")
}
2.QTLseqr
devtools::install_github("bmansfeld/QTLseqr")
library(QTLseqr)
library(vcfR)
#建议开始前都清空一下变量,不然容易出错
rm(list = ls())
#设置工作路径
setwd("~/workspace/BSA/practice/")
#加载数据,
vcf <- read.vcfR("4.variants_filter/snps.vcf")
chrom <- getCHROM(vcf)
pos <- getPOS(vcf)
ref <- getREF(vcf)
alt <- getALT(vcf)
ad <- extract.gt(vcf, "AD")
ref_split <- masplit(ad, record = 1, sort = 0)
alt_split <- masplit(ad, record = 2, sort = 0)
gt <- extract.gt(vcf, "GT")
#生成一个适用QTLseqr包importFromTable()函数的数据框
df <- data.frame(CHROM = chrom,
POS = pos,
REF = ref,
ALT = alt,
AD_REF.SRR6327817 = ref_split[,3],
AD_ALT.SRR6327817 = alt_split[,3],
AD_REF.SRR6327818 = ref_split[,4],
AD_ALT.SRR6327818 = alt_split[,4]
)
mask <- which(gt[,"SRR6327815"] != "0/1" & gt[,"SRR6327816"] == "0/1")
df <- df[mask,]
write.table(df, file = "rice.tsv", sep = "\t", row.names = F, quote = F)
#读取数据
df <- importFromTable("rice.tsv",
highBulk = "SRR6327817",
lowBulk = "SRR6327818",
chromList = paste0("chr", formatC(1:12, width = 2, flag=0)),
sep = "\t")
#删去SNPindex为NA的值
df <- subset(df, !is.na(SNPindex.LOW) & !is.na(SNPindex.HIGH))
#G统计值
df <- runGprimeAnalysis(SNPset = df,
windowSize = 1e6,
outlierFilter = "deltaSNP")
#delta SNP置信区间
df <- runQTLseqAnalysis(SNPset = df,
windowSize = 1e6,
popStruc = "RIL",
bulkSize = c(20,20))
#绘图
plotQTLStats(
SNPset = df,
var = "Gprime",
plotThreshold = TRUE,
q = 0.01
)
plotQTLStats(
SNPset = df,
var = "deltaSNP",
plotIntervals = TRUE)
- ggplot2绘图
跟着文献里的图画的,努力在还原了…
ggplot(data = df,aes(x = POS,y = deltaSNP)) + #映射x、y轴
geom_point(aes(color=as.factor(CHROM)), #按照CHROM进行分组
alpha=0.8, size=0.8,position ="jitter") +
facet_wrap(~CHROM,ncol = 12,scales = "free_x",strip.position = 'bottom') + #分成12列,x轴设置一下自由尺度,分面标签位置改为bottom
geom_smooth(method = 'gam',fullrange = TRUE,
size = 0.7,color = "black",
se=FALSE) + #加上拟合线,se=FLASE为限制置信区间,相当于去掉拟合线附近的阴影
ylim(0,1) +#设置一下y轴范围
ylab(expression(paste(Delta, " " ,"SNP index")))+ #更改y轴标签
theme(
legend.position="none",
panel.border = element_blank(), #绘图区边框
panel.grid.major.x = element_blank(), #主网格线
panel.grid.minor.x = element_blank(), #次网格线
panel.spacing.x = unit(0, "cm"), #分面之间的x轴方向距离
strip.placement = "outside", #设置分面标签位于图的外侧还是内侧
strip.background.x = element_rect(color = "white",fill = "white"), #分面标签背景设为白色
axis.text.x = element_blank() #删去x轴的刻度
)
这里的拟合线我是直接用的lm,但感觉还是1里的那个线比较好。
但是1里是分了12条染色体进行绘制的,我再想想怎么样在这个图里加上1里的拟合线。
2022.5.17更新
对数据处理改了一下,上面那个是按照binmapr算出来的deltaSNP画的图
感觉结果与文献里不太符合,换了一下参数设置,(SRR17和SRR18换了个位置),然后就是加了一条y=0.5的水平虚线
至于那个文章中的拟合曲线,还没想到怎么样可以加上去。。待我再思考思考。。