Download fastq file
#method 1: sratoolkits
cat srr.txt | while read line
do
prefetch -O $wkd/sra $line
done
#method 2: axel
#find the links from ebi (srr)
cat link.txt | while read line
do
axel -n 30 ${line}
done
Define
nThread=32
bowtie2Index='~/reference.and.annotations/Homo_sapiens/UCSC/hg38/Sequence/Bowtie2Index/hg38'
mkdir
if [ ! -d trim ];then
mkdir -p trim
fi
if [ ! -d bam ];then
mkdir -p bam
fi
if [ ! -d bw ];then
mkdir -p bw
fi
if [ ! -d peak ];then
mkdir -p peak
fi
QC
#ls fastq/*.fq.gz | while read line
#do
# aa=$(basename $line .fq.gz)
# if [ ! -e fastqc/${aa}_fastqc.html ];then
# fastqc -o fastqc -t $nThread $line
# fi
#done
Loop
#default fastq file name: a.fq.gz
ls fastq/*.fq.gz | while read file
do
aa=$(basename $file".fq.gz")
bb=${aa%_*}
echo $bb
done | sort | uniq | while read sample
do
if [ ! -e trim/${sample}_1_val_1.fq.gz ];then
trim_galore --paired fastq/${sample}_1.fq.gz fastq/${sample}_2.fq.gz --gzip -o trim
fi
if [ ! -e bam/${sample}.bam ];then
bowtie2 -I 15 -X 1000 -x $bowtie2Index -p $nThread -1 trim/${sample}_1_val_1.fq.gz -2 trim/${sample}_2_val_2.fq.gz | samtools sort -@ nThread -O bam -o bam/${sample}.bam
fi
done
一些可选操作
samtools merge
samtools merge WT.bam 1.bam 2.bam
samtools merge KO.bam 3.bam 4.bam
bam2bw
# 不去除input
ls bam/*bam |while read file
do
samtools index -@ $nThread $file
id=$(basename $file .bam)
bamCoverage -b $file -o bw/${id}.bw -p $nThread
done
bigwigCompare -p $nThread -b1 SMARCC1_arid1a-KO.bw -b2 input_arid1a-KO.bw -o bw/SMARCC1_arid1a-KO-exInput.bw
# 去除 input
ls bam/*bam |while read file
do
samtools index -@ $nThread $file
done
bamCompare -p $nThread --bamfile1 bam/SMARCA4_H.bam --bamfile2 bam/input_H.bam --outFileName bw/SMARCA4_H.bw
macs2 callpeak
ls bam/*.bam | while read file
do
id=$(basename $file .bam)
macs2 callpeak -t $file -m 10 30 -p 1e-5 -f BAMPE -g hs -n ${id} --outdir peak 2>${id}.masc2.log
done
te
mkdir pic
nThread=16
size=5000
#samp="H2A119ub"
ls bed/*.bed | while read line
do
id=$(basename $line ".bed")
computeMatrix scale-regions -p $nThread -S bw/*.bw -R $line -a 5000 -b 5000 -o pic/${id}.computeMatrix
plotHeatmap --matrixFile pic/${id}.computeMatrix -o pic/${id}.WT_KO.point.heatmap.pdf --yAxisLabel ${id}.KO-WT.signal
plotProfile --dpi 720 -m pic/${id}.computeMatrix -out pic/${id}.WT_KO.heatmap.merge.pdf --plotTitle ${id} --plotFileFormat pdf --perGroup --yAxisLabel ${id}.Seq.KO-WT.signal
#computeMatrix reference-point -p $nThread --referencePoint TSS -b $size -a $size -S bw/*.bw -R bed/RNF2.bed --skipZeros -o ./pic/${size}.mat.gz
done
computeMatrix reference-point -p $nThread --referencePoint TSS -b $size -a $size -S bw/*.bw -R bed/RNF2.bed --skipZeros -o ./pic/${size}.mat.gz
plotHeatmap -m ./pic/${samp}.${size}.mat.gz -out ./pic/${samp}.${size}.heat.png --colorList green,yellow,red
plotProfile -m ./pic/${samp}.${size}.mat.gz -out ./pic/${samp}.${size}.profile.png --perGroup
computeMatrix reference-point -p 16 --referencePoint TSS -b 5000 -a 5000 -S bw/Rif1*H2A119ub.bw -R bed/Rif1.bed --skipZeros -o ./pic/H2A119ub.mat.gz
plotHeatmap -m ./pic/H2A119ub.mat.gz -out ./pic/H2A119ub.heat.png
plotProfile -m ./pic/H2A119ub.mat.gz -out ./pic/H2A119ub.profile.png --perGroup
deeptools 可视化TSS上下游5K,peaks位于TSS下游一点
computeMatrix reference-point -p 16 --referencePoint TSS -b 5000 -a 5000 -S bw/*.bw -R ~/maos/annotation/chipseq/mm9/ucsc.refseq.jimmy.bed --skipZeros -o 5000.mat.gz
plotHeatmap -m 5000.mat.gz -out 5000.heat.png
plotProfile -m 5000.mat.gz -out 5000.profile.png --perGroup
plotHeatmap --dpi 720 -m 5000.mat.gz -out 5000.heat.pdf --plotFileFormat pdf
plotProfile --dpi 720 -m 5000.mat.gz -out 5000.profile.pdf --plotFileFormat pdf --perGroup
deeptools 可视化TSS上下游2K,peaks位于TSS下游一点
computeMatrix reference-point -p 16 --referencePoint TSS -b 2000 -a 2000 -S bw/*.bw -R ~/maos/annotation/chipseq/mm9/ucsc.refseq.jimmy.bed --skipZeros -o 2000.mat.gz
plotHeatmap -m 2000.mat.gz -out 2000.heat.png
plotProfile -m 2000.mat.gz -out 2000.profile.png --perGroup
plotHeatmap --dpi 720 -m 2000.mat.gz -out 2000.heat.pdf --plotFileFormat pdf
plotProfile --dpi 720 -m 2000.mat.gz -out 2000.profile.pdf --plotFileFormat pdf --perGroup
Chipseeker包注释与可视化
ChIPseeker的功能分为三类:
- 注释:提取peak附近最近的基因, 注释peak所在区域。
- 比较:估计ChIP peak数据集中重叠部分的显著性;整合GEO数据集,以便于将当前结果和已知结果比较。
- peak的覆盖情况;TSS区域结合的peak的平均表达谱和热图;基因组注释;TSS距离;peak和基因的重叠。
download packages
source ("https://bioconductor.org/biocLite.R")
biocLite("ChIPseeker")
biocLite("org.Mm.eg.db")
biocLite("TxDb.Mmusculus.UCSC.mm9.knownGene")
biocLite("clusterProfiler")
biocLite("ReactomePA")
biocLite("DOSE")
loading packages
library("ChIPseeker")
library("org.Mm.eg.db")
library("clusterProfiler")
library("TxDb.Mmusculus.UCSC.mm9.knownGene")
txdb <- TxDb.Mmusculus.UCSC.mm9.knownGene
code body
rkr<-readPeakFile("./peak/Rif1-KO-RNF2_peaks.narrowPeak")
rwr<-readPeakFile("./peak/Rif1-WT-RNF2_peaks.narrowPeak")
rkh<-readPeakFile("./peak/Rif1-KO-H2A119ub_peaks.narrowPeak")
rwh<-readPeakFile("./peak/Rif1-WT-H2A119ub_peaks.narrowPeak")
#单个peak做图
#Chip peaks coverage plot, whole genome
covplot(rkr)
#Chip peaks coverage plot, specific chr
covplot(rkr,chrs=paste("chr",c(1:19,"X","Y"),sep=""))
promoter <- getPromoters(TxDb=txdb, upstream=2000, downstream=2000)
tagMatrix <- getTagMatrix(rkr, windows=promoter)
tagHeatmap(tagMatrix, xlim=c(-2000, 2000), color="red")
#注释
#对每个样本都进行注释,单个PEAK分析
size<-5000
peakAnnoRKR <- annotatePeak(rkr, tssRegion=c(-size, size),TxDb=txdb, annoDb="org.Mm.eg.db",verbose=FALSE)
peakAnnoRWR <- annotatePeak(rkr, tssRegion=c(-size, size),TxDb=txdb, annoDb="org.Mm.eg.db",verbose=FALSE)
#
plotAnnoBar(peakAnnoRKR)
vennpie(peakAnnoRKR)
upsetplot(peakAnnoRKR) #upset技术适用于多于5个集合的表示情况。
plotDistToTSS(peakAnnoRKR,title="Distribution of transcription factor-binding loci\nrelative to TSS")
#多个peak的比较
#Heatmap of ChIP binding to TSS regions
peaks <- list(rkr=rkr,rwr=rwr,wkh=rkh,rwh=rwh)
peaks[[1]]
size=2000
promoter <- getPromoters(TxDb=txdb, upstream=size, downstream=size)
tagMatrixList <- lapply(peaks, getTagMatrix, windows=promoter)
#Error: cannot allocate vector of size 818.0 Mb
plotAvgProf(tagMatrixList, xlim=c(-size, size))
plotAvgProf(tagMatrixList, xlim=c(-size, size), conf=0.95,resample=500, facet="row")
tagHeatmap(tagMatrixList, xlim=c(-size, size), color=NULL)
#多个样本注释
size<-5000
peakAnnoList <- lapply(peaks, annotatePeak, TxDb=txdb,
tssRegion=c(-size, size), verbose=FALSE)
plotAnnoBar(peakAnnoList)
plotDistToTSS(peakAnnoList,title="Distribution of transcription factor-binding loci\nrelative to TSS")
#Overlap of peaks and annotated genes
genes= lapply(peakAnnoList, function(i) as.data.frame(i)$geneId)
vennplot(genes)
#KEGG
names(genes) = sub("_", "\n", names(genes))
compKEGG <- compareCluster(geneCluster=genes,fun="enrichKEGG",pvalueCutoff=0.05,pAdjustMethod="BH")
Distribution of transcription factor-binding loci relative to TSS
Distribution of transcription factor-binding loci relative to TSS
Overlap of peaks and annotated genes
Overlap of peaks and annotated genes