以1000kb为窗口
bedtools makewindows -b hg19.txt -w 1000 > hg19.window.bed
awk '{print $1"\t"$2"\t"$3"\t0\t0\t+\n"$1"\t"$2"\t"$3"\t0\t0\t-"}' hg19.window.bed > hg19.window.bed.region
###需要修改!bam转换成bed 再进行下一步###
bedtools coverage -S –abam accepted_hits.uniq.bam –b hg19.window.bed.region > hg19.window.bed.region.cov
R:
data <- read.table("hg19.window.bed.region.cov ",sep="\t") #读取数据t$log <- 0for(i in 1:nrow(data)){ if(data[i,6]=="+"){ data$log[i] <- log2(data$V7+1)} else{ data$log[i] <- -log2(data$V7+1)}
} #为了降低数据的离散程度,画图之前一般会把count取个log。此处将负链count设为负数,正链为正数。+1是因为有些count为0,0是不能取对数的。这一步比较慢,建议使用perl或python进行处理。chr <-c("chr1","chr2","chr3","chr4","chr5","chr6","chr7","chr8","chr9","chr10","chr11","chr12","chr13","chr14","chr15","chr16","chr17","chr18","chr19","chr20","chr21","chr22","chrX","chrY") #因为人类染色体还有很多非常规染色体,如果全部画出来会不好看,所有此处只画出常规染色体。也可以在文件hg19.txt里面只输入常规染色体。data2<- data[data$V1 %in% chr,]
color <- c("+"="#008B00","-"="#FF8247") # 定义正负链颜色p<-ggplot(data2)+facet_grid(V1~.)+
geom_point(aes(V2/1000000,log,colour=V6),size=1) #根据染色体进行分面,最简单出图。p<-p+theme(
strip.text.y=element_text(angle=0,face="bold",hjust=0),
legend.position="none",panel.grid.minor=element_blank(),
panel.grid.major=element_blank(),
plot.title=element_text(size=20,face="bold"),
axis.text.y=element_text(size=10,angle=50,face="bold"),
strip.background=element_blank(),
panel.background=element_rect(fill="white"),
axis.line=element_line(linetype=1)
)+
scale_colour_manual(values=color)+
scale_y_continuous(breaks=c(-15,15))+
labs(title="Reads Density in Chromosomes")+
xlab("chromosome position(Mb)")+
ylab("reads density(log2)")