ChIPseq pipeline

conda activate bowtie2

#bulid bowtie2 index

cd /home/yc17628/RefData/hg38_p13

bowtie2-build --threads 10 -f hg38.p13.fa hg38

conda activate bowtie2

cd /home/yc17628/Project/NSD3/ChIPseq/align

index=/home/yc17628/RefData/hg38_p13/hg38

outputdir=/home/yc17628/Project/NSD3/ChIPseq/align

ls /home/yc17628/Project/NSD3/ChIPseq/2.cleandata/*gz|cut -d"_" -f 1,2,3,4|sort -u|while read id; do

sample=$(basename $id)

fq1=${id}_1.clean.fq.gz

fq2=${id}_2.clean.fq.gz

ls -lh $fq1 $fq2

bowtie2 -p 10 -x $index -1 $fq1 -2 $fq2 | samtools sort -O bam -@ 6 -o - > $outputdir/${sample}.sorted.bam

done

#Bam qc

cd /home/yc17628/Project/NSD3/ChIPseq/align

ls  *.bam  | xargs -i samtools index {}

ls  *.bam  | while read id ;do (nohup samtools flagstat $id > $(basename $id ".bam").stat & );done

#PCR dup remove

conda activate bowtie2

cd /home/yc17628/Project/NSD3/ChIPseq/align

outputdir=/home/yc17628/Project/NSD3/ChIPseq/rmdup

ls *.bam|cut -d"." -f 1| sort -u | while read id; do samtools sort -n -o $outputdir/${id}.namesort.bam ${id}.sorted.bam ;done

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

ls *.namesort.bam  | while read id ;do  samtools fixmate -m $id $(basename $id ".namesort.bam").fix.bam ;done

ls *.fix.bam  | while read id ;do  samtools sort -o $(basename $id ".fix.bam").fix.pos.bam $id  ;done

ls *.fix.pos.bam  | while read id ;do  samtools markdup -r $id $(basename $id ".fix.pos.bam").rmdup.bam ;done

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

ls  *.rmdup.bam  | xargs -i samtools index {}

ls  *.rmdup.bam  | while read id ;do (nohup samtools flagstat $id > $(basename $id ".bam").stat & );done

#bam to bed

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

ls *.bam | cut -d"." -f 1 |while read id; do

sample=$(basename $id)

ls ${id}.rmdup.bam

echo ${sample}

bedtools bamtobed -i ${id}.rmdup.bam -bed12 > ${sample}.bed

done

#bam to bw bamCoverage

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

outputdir=/home/yc17628/Project/NSD3/ChIPseq/rmdup

ls *.bam | cut -d"." -f 1| sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

bamCoverage --normalizeUsing CPM -b ${sample}.rmdup.bam -o $outputdir/${sample}.bw

done

#bam to bw bamCompare

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

outputdir=/home/yc17628/Project/NSD3/ChIPseq/bw

ls sgControl*.bam |cut -d"." -f 1| sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

bamCompare -b1 ${sample}.rmdup.bam -b2 sgControl_ChIP_input_R.rmdup.bam -o ${sample}.bw

done

ls sgNSD3*.bam |cut -d"." -f 1| sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

bamCompare -b1 ${sample}.rmdup.bam -b2 sgNSD3_ChIP_input_R.rmdup.bam -o ${sample}.bw

done

#diffReps

cd /home/yc17628/Project/NSD3/ChIPseq/bed

outputdir=/home/yc17628/Project/NSD3/ChIPseq/diffReps_2

diffReps.pl -tr sgNSD3_H2AZ_ChIP_R1.bed sgNSD3_H2AZ_ChIP_R2.bed -co sgControl_H2AZ_ChIP_R1.bed sgControl_H2AZ_ChIP_R2.bed --chrlen /home/yc17628/RefData/hg38_p13/hg38.p13.chrom.sizes --mode p -me nb -re $outputdir/ChIP_H2AZ.NvsC.diff.txt --noanno

diffReps.pl -tr sgNSD3_NSD3_ChIP_R1.bed sgNSD3_NSD3_ChIP_R2.bed -co sgControl_NSD3_ChIP_R1.bed sgControl_NSD3_ChIP_R2.bed --chrlen /home/yc17628/RefData/hg38_p13/hg38.p13.chrom.sizes --mode p -me nb -re $outputdir/ChIP_NSD3.NvsC.diff.txt --noanno

diffReps.pl -tr sgControl_H2AZ_ChIP_R1.bed sgControl_H2AZ_ChIP_R2.bed -co sgControl_NSD3_ChIP_R1.bed sgControl_NSD3_ChIP_R2.bed --chrlen /home/yc17628/RefData/hg38_p13/hg38.p13.chrom.sizes --mode p -me nb -re $outputdir/c_H2AZvsNSD3.diff.txt --noanno

diffReps.pl -tr sgNSD3_H2AZ_ChIP_R1.bed sgNSD3_H2AZ_ChIP_R2.bed -co sgNSD3_NSD3_ChIP_R1.bed sgNSD3_NSD3_ChIP_R2.bed --chrlen /home/yc17628/RefData/hg38_p13/hg38.p13.chrom.sizes --mode p -me nb -re $outputdir/n_H2AZvsNSD3.diff.txt --noanno

#Peaks annotation

conda activate chipseq

cd /home/yc17628/Project/NSD3/ChIPseq/broad_peaks

outputdir=/home/yc17628/Project/NSD3/ChIPseq/broad_peaks/annotate

ls *broadPeak | cut -d"." -f 1| sort -u |while read id; do

ls ${id}.broadPeak

annotatePeaks.pl ${id}.broadPeak hg38  > $outputdir/${id}.peak_related_genes.txt

done

#phantompeakqualtools qual

conda activate phantompeak

cd /home/yc17628/App/phantompeakqualtools

mkdir -p logs qual

ls /home/yc17628/Project/NSD3/ChIPseq/rmdup/*.bam |cut -d"." -f 1 |sort -u |while read id; do

sample=$(basename $id)

ls -lh ${id}.rmdup.bam

echo ${sample}

Rscript run_spp.R -c=${id}.rmdup.bam -savp=qual/${sample}.png -out=qual/${sample}.qual > logs/${sample}.Rout

done

#phantompeakqualtools_peak calling

conda activate phantompeak

cd /home/yc17628/App/phantompeakqualtools

inputdir=/home/yc17628/Project/NSD3/ChIPseq/rmdup

ls $inputdir/sgControl*R#*.bam |cut -d"." -f 1|sort -u|while read id; do

sample=$(basename $id)

mkdir ${sample}

Rscript run_spp.R -c=$inputdir/${sample}.rmdup.bam -i=$inputdir/sgControl_ChIP_input_R_BMRC210005804-1A.rmdup.bam -fdr=0.05 -odir=${sample} -savr -savp -savd -rf

done

conda activate phantompeak

cd /home/yc17628/App/phantompeakqualtools

inputdir=/home/yc17628/Project/NSD3/ChIPseq/rmdup

ls sgNSD3*R#*.bam |cut -d"." -f 1|sort -u|while read id; do

sample=$(basename $id)

mkdir ${sample}

Rscript run_spp.R -c=$inputdir/${sample}.rmdup.bam -i=$inputdir/sgNSD3_ChIP_input_R_BMRC210005807-1A.rmdup.bam -fdr=0.05 -odir=${sample} -savr -savp -savd -rf

done

#TSS

cd /home/yc17628/Project/NSD3/ChIPseq/rmdpeaks/TSS

inputdir=/home/yc17628/Project/NSD3/ChIPseq/rmdpeaks/bw

ls $inputdir/*.bw |cut -d"." -f 1|sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

ls ${id}.bw

computeMatrix reference-point \

-S ${id}.bw \

-R /home/yc17628/RefData/deeptools/ref.bed \

--referencePoint TSS \

-a 2000 -b 2000 \

--skipZeros -out matrix_${sample}_TSS.gz \

--outFileSortedRegions regions_${sample}_TSS_2K.bed

plotHeatmap \

-m matrix_${sample}_TSS.gz\

-out ${sample}_TSS.png \

--heatmapHeight 15  \

--refPointLabel TSS \

--regionsLabel genes \

--plotTitle '${sample}' \

#Chipseq_signal heatmap_scale-regions

cd /home/yc17628/Project/NSD3/ChIPseq/rmdpeaks/Region

inputdir=/home/yc17628/Project/NSD3/ChIPseq/rmdpeaks/bw

ls $inputdir/*.bw |cut -d"." -f 1|sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

ls ${id}.bw

computeMatrix scale-regions \

-S ${id}.bw \

-R /home/yc17628/RefData/deeptools/ref.bed \

--beforeRegionStartLength 3000 \

--regionBodyLength 5000 \

--afterRegionStartLength 3000 \

--skipZeros -o matrix_${sample}_region.gz

plotHeatmap -m matrix_${sample}_region.gz \

-out ${sample}_region.png \

--whatToShow "plot, heatmap and colorbar"

done

#homer motif

cd /home/yc17628/Project/NSD3/ChIPseq/rmdpeaks_0.1/IDR

ls *.bed |cut -d"." -f 1| cut -d"_" -f 1-3| sort -u |while read id ;do

sample=$(basename $id)

bed=${sample}_idr.bed

ls ${bed}

findMotifsGenome.pl ${bed} hg38 ${sample}_motifDir/ -size 200 -mask

annotatePeaks.pl ${bed} hg38  > ${sample}.peak_related_genes.txt

done

#BETA

conda activate python2.7

cd /home/yc17628/Project/NSD3/ChIPseq/BETA

ref=/home/yc17628/RefData/Cellranger/refdata-cellranger-GRCh38/fasta

BETA basic -p sgControl_H2AZ_ChIP_idr.bed ñe Limma_DEG_list.csv ñk LIM ñg hg38 --da500 ñn sgControl --gname2

BETA plus -p sgControl_H2AZ_ChIP_idr.bed -e Limma_DEG_list.csv -k LIM -g hg38 --gs $ref/genome.fa --bl -n sgControl --gname2

#macs3 broad peak

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

outputdir=/home/yc17628/Project/NSD3/ChIPseq/broad_peaks

ls sgControl*R*.bam |cut -d"." -f 1|sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

macs3 callpeak -t ${sample}.rmdup.bam -c sgControl_ChIP_input_R.rmdup.bam -n ${sample} -f BAM -g hs --outdir $outputdir --broad --broad-cutoff 0.1

done

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

outputdir=/home/yc17628/Project/NSD3/ChIPseq/broad_peaks

ls sgNSD3*R*.bam |cut -d"." -f 1|sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

macs3 callpeak -t ${sample}.rmdup.bam -c sgNSD3_ChIP_input_R.rmdup.bam  -n ${sample} -f BAM -g hs --outdir $outputdir --broad --broad-cutoff 0.1

done

#macs3 TF peak

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

outputdir=/home/yc17628/Project/NSD3/ChIPseq/TF_peaks_0.1

ls sgControl*R*.bam |cut -d"." -f 1|sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

macs3 callpeak -t ${sample}.rmdup.bam -c sgControl_ChIP_input_R_BMRC210005804-1A.rmdup.bam -n ${sample} -f BAM -g hs --outdir $outputdir -n test -B -q 0.1

done

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

outputdir=/home/yc17628/Project/NSD3/ChIPseq/TF_peaks_0.1

ls sgNSD3*R*.bam |cut -d"." -f 1|sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

macs3 callpeak -t ${sample}.rmdup.bam -c ssgNSD3_ChIP_input_R_BMRC210005807-1A.rmdup.bam  -n ${sample} -f BAM -g hs --outdir $outputdir -n test -B -q 0.1

done

#compare bed files

bedtools intersect -a ChIP_H2AZ.NvsC.Down.bed -b ChIP_NSD3.NvsC.Down.bed -wo > H2AZ_NSD3_Down_overlap.bed

bedtools intersect -a ChIP_H2AZ.NvsC.Up.bed -b ChIP_NSD3.NvsC.Up.bed -wo > H2AZ_NSD3_Up_overlap.bed

annotatePeaks.pl H2AZ_NSD3_Down_overlap.bed hg38  > H2AZ_NSD3_Down_overlap_related_genes.txt

annotatePeaks.pl H2AZ_NSD3_Up_overlap.bed hg38  > H2AZ_NSD3_Up_overlap_related_genes.txt

wc -l ChIP_H2AZ.NvsC.Down.bed

wc -l ChIP_NSD3.NvsC.Down.bed

wc -l H2AZ_NSD3_Down_overlap.bed

wc -l ChIP_H2AZ.NvsC.diff.txt

wc -l ChIP_NSD3.NvsC.diff.txt

wc -l ChIP_H2AZ.NvsC.Up.bed

wc -l ChIP_NSD3.NvsC.Up.bed

wc -l H2AZ_NSD3_Up_overlap.bed

#peak files

cd /home/yc17628/Project/NSD3/ChIPseq/TF_peaks_0.1

ls *.narrowPeak |while read id ; do

sample=$(basename $id|cut -d"." -f 1)

echo ${sample}

bedtools merge -i ${id} > ${sample}.bed

done

cd /home/yc17628/Project/NSD3/ChIPseq/TF_peaks_0.1

outputdir=/home/yc17628/Project/NSD3/ChIPseq/TF_peaks_0.1/annotate

ls *peaks.bed | cut -d"." -f 1| sort -u |while read id; do

ls ${id}.bed

annotatePeaks.pl ${id}.bed hg38  > $outputdir/${id}_related_genes.txt

done

#deeptools correlation&PCA

cd /home/yc17628/Project/NSD3/ChIPseq/bw

outputdir=/home/yc17628/Project/NSD3/ChIPseq/bw/bs500

multiBigwigSummary bins -bs 500 --labels sgControl_H2AZ_R1 sgControl_H2AZ_R2 sgControl_NSD3_R1 sgControl_NSD3_R2 sgControl_input sgNSD3_H2AZ_R1 sgNSD3_H2AZ_R2 sgNSD3_NSD3_R1 sgNSD3_NSD3_R2 sgNSD3_input \

-b sgControl_H2AZ_ChIP_R1.bw sgControl_H2AZ_ChIP_R2.bw sgControl_NSD3_ChIP_R1.bw sgControl_NSD3_ChIP_R2.bw sgControl_ChIP_input_R.bw \

sgNSD3_H2AZ_ChIP_R1.bw sgNSD3_H2AZ_ChIP_R2.bw sgNSD3_NSD3_ChIP_R1.bw sgNSD3_NSD3_ChIP_R2.bw sgNSD3_ChIP_input_R.bw \

-o $outputdir/results.npz --outRawCounts counts.tab

#deeptools correlation&PCA

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

outputdir=/home/yc17628/Project/NSD3/ChIPseq/rmdup/bs500

multiBamSummary bins -bs 500 --labels sgControl_H2AZ_R1 sgControl_H2AZ_R2 sgControl_NSD3_R1 sgControl_NSD3_R2 sgControl_input sgNSD3_H2AZ_R1 sgNSD3_H2AZ_R2 sgNSD3_NSD3_R1 sgNSD3_NSD3_R2 sgNSD3_input \

-b sgControl_H2AZ_ChIP_R1.rmdup.bam sgControl_H2AZ_ChIP_R2.rmdup.bam sgControl_NSD3_ChIP_R1.rmdup.bam sgControl_NSD3_ChIP_R2.rmdup.bam sgControl_ChIP_input_R.rmdup.bam \

sgNSD3_H2AZ_ChIP_R1.rmdup.bam sgNSD3_H2AZ_ChIP_R2.rmdup.bam sgNSD3_NSD3_ChIP_R1.rmdup.bam sgNSD3_NSD3_ChIP_R2.rmdup.bam sgNSD3_ChIP_input_R.rmdup.bam \

-o $outputdir/results.npz --outRawCounts $outputdir/counts.tab

#read counts

conda activate bowtie2

cd /home/yc17628/Project/NSD3/Tag/workdir/aligned/dedup/bs50

plotCorrelation \-in results.npz \--corMethod spearman \--skipZeros \--plotTitle "Spearman Correlation of Read Counts" \--whatToPlot heatmap \--colorMap RdYlBu \--plotNumbers \-o heatmap_SpearmanCorr.pdf \--outFileCorMatrix SpearmanCorr_readCounts.tab

plotPCA -in results.npz -o PCA_readCounts.png -T "PCA of read counts"

cd /home/yc17628/Project/NSD3/Tag/workdir/aligned/dedup/bs500

plotCorrelation \-in results.npz \--corMethod spearman \--skipZeros \--plotTitle "Spearman Correlation of Read Counts" \--whatToPlot heatmap \--colorMap RdYlBu \--plotNumbers \-o heatmap_SpearmanCorr.pdf \--outFileCorMatrix SpearmanCorr_readCounts.tab

plotPCA -in results.npz -o PCA_readCounts.png -T "PCA of read counts"

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup/bs50

plotCorrelation \-in results.npz \--corMethod spearman \--skipZeros \--plotTitle "Spearman Correlation of Read Counts" \--whatToPlot heatmap \--colorMap RdYlBu \--plotNumbers \-o heatmap_SpearmanCorr.pdf \--outFileCorMatrix SpearmanCorr_readCounts.tab

plotPCA -in results.npz -o PCA_readCounts.png -T "PCA of read counts"

#bw score

plotCorrelation \-in results.npz \--corMethod pearson \--skipZeros \--plotTitle "Pearson Correlation of Average Scores Per Transcript" \--whatToPlot heatmap \--colorMap RdYlBu \--plotNumbers \-o scatterplot_PearsonCorr.pdf  \--outFileCorMatrix PearsonCorr_bigwigScores.tab

#filter bed file for danpos

ls *.bed |while read id ;do

sample=$(basename $id|cut -d '.' -f 1)

echo ${sample}

ls ${id}

grep -v '_' ${id} > ${sample}.filter.bed

done

#Danpos

conda activate danpos3

cd /home/yc17628/App/DANPOS3

workdir=/home/yc17628/Project/NSD3/ChIPseq/bed

python danpos.py dpos $workdir/sgNSD3_H2AZ/:$workdir/sgControl_H2AZ/ -s 0 -o $workdir/danpos_pos_ChIP_H2AZ -b $workdir/sgNSD3_H2AZ/:$workdir/sgNSD3_ChIP_input_R.filter.bed,$workdir/sgControl_H2AZ/:$workdir/sgControl_ChIP_input_R.filter.bed

python danpos.py dpeak $workdir/sgNSD3_H2AZ/:$workdir/sgControl_H2AZ/ -s 0 -o $workdir/danpos_peak_ChIP_H2AZ -b $workdir/sgNSD3_H2AZ/:$workdir/sgNSD3_ChIP_input_R.filter.bed,$workdir/sgControl_H2AZ/:$workdir/sgControl_ChIP_input_R.filter.bed

python danpos.py dregion $workdir/sgNSD3_H2AZ/:$workdir/sgControl_H2AZ/ -s 0 -o $workdir/danpos_region_ChIP_H2AZ  -b $workdir/sgNSD3_H2AZ/:$workdir/sgNSD3_ChIP_input_R.filter.bed,$workdir/sgControl_H2AZ/:$workdir/sgControl_ChIP_input_R.filter.bed

conda activate danpos3

cd /home/yc17628/App/DANPOS3

workdir1=/home/yc17628/Project/NSD3/ChIPseq/bed/danpos_pos_ChIP_H2AZ/pooled

python danpos.py stat $workdir1/home_yc17628_Project_NSD3_ChIPseq_bed_sgNSD3_H2AZ.bgsub.Fnor.smooth.positions.xls,$workdir1/home_yc17628_Project_NSD3_ChIPseq_bed_sgControl_H2AZ.bgsub.Fnor.smooth.positions.xls sgNSD3,sgControl --plot_colors red,blue --name $workdir1/stat

workdir2=/home/yc17628/Project/NSD3/ChIPseq/bed/danpos_peak_ChIP_H2AZ/pooled

python danpos.py stat $workdir2/home_yc17628_Project_NSD3_ChIPseq_bed_sgNSD3_H2AZ.bgsub.Fnor.smooth.peaks.xls,$workdir2/home_yc17628_Project_NSD3_ChIPseq_bed_sgControl_H2AZ.bgsub.Fnor.smooth.peaks.xls sgNSD3,sgControl --plot_colors red,blue --name $workdir2/stat

workdir3=/home/yc17628/Project/NSD3/ChIPseq/bed/danpos_region_ChIP_H2AZ/pooled

python danpos.py stat $workdir3/home_yc17628_Project_NSD3_ChIPseq_bed_sgNSD3_H2AZ.bgsub.Fnor.smooth.regions.xls,$workdir3/home_yc17628_Project_NSD3_ChIPseq_bed_sgControl_H2AZ.bgsub.Fnor.smooth.regions.xls sgNSD3,sgControl --plot_colors red,blue --name $workdir3/stat

conda activate danpos3

cd /home/yc17628/App/DANPOS3

workdir=/home/yc17628/Project/NSD3/ChIPseq/bed

python danpos.py dpos $workdir/sgNSD3_NSD3/:$workdir/sgControl_NSD3/ -s 0 -o $workdir/danpos_pos_ChIP_NSD3 -b $workdir/sgNSD3_NSD3/:$workdir/sgNSD3_ChIP_input_R.filter.bed,$workdir/sgControl_NSD3/:$workdir/sgControl_ChIP_input_R.filter.bed

python danpos.py dpeak $workdir/sgNSD3_NSD3/:$workdir/sgControl_NSD3/ -s 0 -o $workdir/danpos_peak_ChIP_NSD3 -b $workdir/sgNSD3_NSD3/:$workdir/sgNSD3_ChIP_input_R.filter.bed,$workdir/sgControl_NSD3/:$workdir/sgControl_ChIP_input_R.filter.bed

python danpos.py dregion $workdir/sgNSD3_NSD3/:$workdir/sgControl_NSD3/ -s 0 -o $workdir/danpos_region_ChIP_NSD3_5000kb -rd 5000 -b $workdir/sgNSD3_NSD3/:$workdir/sgNSD3_ChIP_input_R.filter.bed,$workdir/sgControl_NSD3/:$workdir/sgControl_ChIP_input_R.filter.bed

conda activate danpos3

cd /home/yc17628/App/DANPOS3

workdir1=/home/yc17628/Project/NSD3/ChIPseq/bed/danpos_pos_ChIP_NSD3/pooled

python danpos.py stat $workdir1/home_yc17628_Project_NSD3_ChIPseq_bed_sgNSD3_NSD3.bgsub.Fnor.smooth.positions.xls,$workdir1/home_yc17628_Project_NSD3_ChIPseq_bed_sgControl_NSD3.bgsub.Fnor.smooth.positions.xls sgNSD3,sgControl --plot_colors red,blue --name $workdir1/stat

workdir2=/home/yc17628/Project/NSD3/ChIPseq/bed/danpos_peak_ChIP_NSD3/pooled

python danpos.py stat $workdir2/home_yc17628_Project_NSD3_ChIPseq_bed_sgNSD3_NSD3.bgsub.Fnor.smooth.peaks.xls,$workdir2/home_yc17628_Project_NSD3_ChIPseq_bed_sgControl_NSD3.bgsub.Fnor.smooth.peaks.xls sgNSD3,sgControl --plot_colors red,blue --name $workdir2/stat

workdir3=/home/yc17628/Project/NSD3/ChIPseq/bed/danpos_region_ChIP_NSD3/pooled

python danpos.py stat $workdir3/home_yc17628_Project_NSD3_ChIPseq_bed_sgNSD3_NSD3.bgsub.Fnor.smooth.regions.xls,$workdir3/home_yc17628_Project_NSD3_ChIPseq_bed_sgControl_NSD3.bgsub.Fnor.smooth.regions.xls sgNSD3,sgControl --plot_colors red,blue --name $workdir3/stat

#profile

conda activate danpos3

cd /home/yc17628/App/DANPOS3

workdir1=/home/yc17628/Project/NSD3/ChIPseq/bed/danpos_pos_ChIP_NSD3/pooled

python danpos.py profile $workdir1/sgNSD3_NSD3.wig,$workdir1/sgControl_NSD3.wig --wigfile_aliases sgNSD3,sgControl --genefile_paths gene --heatmap 1 --plot_colors red,blue --name $workdir1/profile

workdir2=/home/yc17628/Project/NSD3/ChIPseq/bed/danpos_peak_ChIP_NSD3/pooled

python danpos.py profile $workdir2/sgNSD3_NSD3.wig,$workdir2/sgControl_NSD3.wig --wigfile_aliases sgNSD3,sgControl --genefile_paths gene --heatmap 1 --plot_colors red,blue --name $workdir2/profile

workdir3=/home/yc17628/Project/NSD3/ChIPseq/bed/danpos_region_ChIP_NSD3/pooled

python danpos.py profile $workdir3/sgNSD3_NSD3.wig,$workdir3/sgControl_NSD3.wig --wigfile_aliases sgNSD3,sgControl --genefile_paths gene --heatmap 1 --plot_colors red,blue --name $workdir3/profile

#H2AZ profile

conda activate danpos3

cd /home/yc17628/App/DANPOS3

workdir2=/home/yc17628/Project/NSD3/ChIPseq/bed/danpos_peak_ChIP_H2AZ/pooled

python danpos.py profile $workdir2/sgNSD3_H2AZ.wig,$workdir2/sgControl_H2AZ.wig --wigfile_aliases sgNSD3,sgControl --genomic_sites TSS --genefile_paths gene --heatmap 1 --plot_colors red,blue --name $workdir2/profile

workdir3=/home/yc17628/Project/NSD3/ChIPseq/bed/danpos_region_ChIP_H2AZ/pooled

python danpos.py profile $workdir3/sgNSD3_H2AZ.wig,$workdir3/sgControl_H2AZ.wig --wigfile_aliases sgNSD3,sgControl --genomic_sites TSS --genefile_paths gene --heatmap 1 --plot_colors red,blue --name $workdir3/profile

#macs3 10K peak

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

outputdir=/home/yc17628/Project/NSD3/ChIPseq/Broad_peaks_10k

ls sgControl*R*.bam |cut -d"." -f 1|sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

macs3 callpeak -t ${sample}.rmdup.bam -c sgControl_ChIP_input_R.rmdup.bam -n ${sample} -f BAM -g hs --outdir $outputdir -n ${sample} -B -q 0.1 --min-length 500

done

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

outputdir=/home/yc17628/Project/NSD3/ChIPseq/Broad_peaks_10k

ls sgNSD3*R*.bam |cut -d"." -f 1|sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

macs3 callpeak -t ${sample}.rmdup.bam -c ssgNSD3_ChIP_input_R.rmdup.bam  -n ${sample} -f BAM -g hs --outdir $outputdir -n ${sample} -B -q 0.1 --min-length 500

done

#macs3 broad peak

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

outputdir=/home/yc17628/Project/NSD3/ChIPseq/broad_peaks_5k

ls sgControl*R*.bam |cut -d"." -f 1|sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

macs3 callpeak -t ${sample}.rmdup.bam -c sgControl_ChIP_input_R.rmdup.bam -n ${sample} -f BAM -g hs --outdir $outputdir --broad --broad-cutoff 0.1 --min-length 5000

done

cd /home/yc17628/Project/NSD3/ChIPseq/rmdup

outputdir=/home/yc17628/Project/NSD3/ChIPseq/broad_peaks_5k

ls sgNSD3*R*.bam |cut -d"." -f 1|sort -u|while read id; do

sample=$(basename $id)

echo ${sample}

macs3 callpeak -t ${sample}.rmdup.bam -c sgNSD3_ChIP_input_R.rmdup.bam  -n ${sample} -f BAM -g hs --outdir $outputdir --broad --broad-cutoff 0.1 --min-length 5000

done

#### Combining the replicates

ls *.narrowPeak|cut -d"_" -f 1-3| sort -u | while read id;do

sample=$(basename $id)

rep1=${sample}_R1_peaks.narrowPeak

rep2=${sample}_R2_peaks.narrowPeak

ls ${rep1}

ls ${rep2}

cat ${rep1} ${rep2} > ${sample}_combined.narrowPeak

done

ls *_combined.narrowPeak|cut -d"_" -f 1-3| sort -u | while read id;do

sample=$(basename $id)

sort -k1,1 -k2,2n ${sample}_combined.narrowPeak | bedtools merge -i - > ${sample}_merged.bed

done

#### Looking for differences in enrichment between sgControl and sgNSD3

#H2AZ

bedtools intersect -a sgControl_H2AZ_ChIP_merged.bed -b sgNSD3_H2AZ_ChIP_merged.bed -v > sgControl_H2AZ_only_peaks.bed

bedtools intersect -a sgNSD3_H2AZ_ChIP_merged.bed -b sgControl_H2AZ_ChIP_merged.bed -v > sgNSD3_H2AZ_only_peaks.bed

bedtools intersect -a sgControl_NSD3_ChIP_merged.bed -b sgNSD3_NSD3_ChIP_merged.bed -v > sgControl_NSD3_only_peaks.bed

bedtools intersect -a sgNSD3_NSD3_ChIP_merged.bed -b sgControl_NSD3_ChIP_merged.bed -v > sgNSD3_NSD3_only_peaks.bed

wc -l sgControl_H2AZ_only_peaks.bed

wc -l sgNSD3_H2AZ_only_peaks.bed

wc -l sgControl_NSD3_only_peaks.bed

wc -l sgNSD3_NSD3_only_peaks.bed

#Combine H2AZ peaks of tow groups

cat sgControl_H2AZ_ChIP_merged.bed sgNSD3_H2AZ_ChIP_merged.bed > H2AZ_peak.bed

sort -k1,1 -k2,2n H2AZ_peak.bed | bedtools merge -i - > H2AZ_peak_merged.bed

wc -l H2AZ_peak_merged.bed

#H2AZ non-enrichment region

bedtools intersect -a allgene.bed -b H2AZ_peak_merged.bed -v > H2AZ_Non_enrich.bed

wc -l H2AZ_Non_enrich.bed

wc -l allgene.bed

#H2AZ enrich-region

sgControl_H2AZ_only_peaks.bed

sgNSD3_H2AZ_only_peaks.bed

H2AZ_peak_merged.bed

#Combine NSD3 peaks of tow groups

cd /home/yc17628/Project/NSD3/ChIPseq/broad_peaks_10k/bed

cat sgControl_NSD3_ChIP_merged.bed sgNSD3_NSD3_ChIP_merged.bed > NSD3_peak.bed

sort -k1,1 -k2,2n NSD3_peak.bed | bedtools merge -i - > NSD3_peak_merged.bed

wc -l NSD3_peak_merged.bed

bedtools intersect -a allgene.bed -b NSD3_peak_merged.bed -v > NSD3_Non_enrich.bed

wc -l NSD3_Non_enrich.bed

wc -l allgene.bed

#H2AZ non-enrich region

H2AZ_Non_enrich.bed

#NSD3 enrich region

NSD3_peak_merged.bed

sgControl_NSD3_only_peaks.bed

sgNSD3_NSD3_only_peaks.bed

#NSD3 non enrich region

NSD3_Non_enrich.bed

cd /home/yc17628/Project/NSD3/ChIPseq/broad_peaks_10k/bed

outputdir=/home/yc17628/Project/NSD3/ChIPseq/broad_peaks_10k/bed/annotate

ls *.bed | cut -d"." -f 1| sort -u |while read id; do

ls ${id}.bed

annotatePeaks.pl ${id}.bed hg38  > $outputdir/${id}.txt

done

你可能感兴趣的:(ChIPseq pipeline)