全基因组CNV分析

建立人基因组windows,计算GC含量

#!/bin/bash
mkdir bed
(
#子shell
cd bed
genome="hg38"
# fetch chromosome size
if [ ! -f ${genome}.chrom.sizes ]; then
    fetchChromSizes $genome > ${genome}.chrom.sizes
fi
binsize=200000
stepsize=40000
fasta="/home/reference/Hsapiens/UCSC/hg38/Sequence/WholeGenomeFasta/hg38.fa"
# make windows
bedtools makewindows -g ${genome}.chrom.sizes -w $binsize -s $stepsize >${genome}_binsize_${binsize}.bed
# calculate GC content
bedtools nuc -fi $fasta -bed ${genome}_binsize_${binsize}.bed | cut -f 1-3,5 > ${genome}_binsize_${binsize}_gc.bed
)

质控

DIR=/home/maosong/replication/paper/rawdata
sp="_"
suffix=".fastq.gz"
ls $DIR/*$suffix | while read line; do echo ${line%_*}; done | uniq >sample.txt

if [ ! -d trim ]; then
    mkdir trim
fi

for sample in $(cat sample.txt); do
    trim_galore --paired ${sample}${sp}1${suffix} ${sample}${sp}2${suffix} --gzip -j 8 -o trim
done

比对

if [ ! -d bam ]; then
    mkdir bam
fi
REF=/home/reference/Hsapiens/UCSC/hg38/Sequence/BwaIndex/hg38
for line in $(cat sample.txt); do
    sample=$(basename $line)
    bwa mem -t 16 $REF trim/${sample}_1_val_1.fq.gz trim/${sample}_2_val_2.fq.gz | samtools sort -@ 16 -o bam/${sample}.bam -
done

过滤掉比对不上的reads

for line in $(cat sample.txt); do
    sample=$(basename $line)
    samtools view -b -h -F 4 bam/${sample}.bam >bam/${sample}.mapped.bam
done

合并bam, 可选操作

(
cd bam
samtools merge G1.mapped.bam SRR6491823.mapped.bam SRR6491824.mapped.bam
samtools merge MidS.mapped.bam SRR6491825.mapped.bam SRR6491826.mapped.bam
)
for sample in G1 MidS; do
    bedtools coverage -a bed/${genome}_binsize_${binsize}_gc.bed -b bam/${sample}.mapped.bam > depth/${sample}.depth.txt
done

计算每个bin的counts数

for line in $(cat sample.txt); do
    sample=$(basename $line)
    bedtools coverage -a bed/${genome}_binsize_${binsize}_gc.bed -b bam/${sample}_mapped.bam > depth/${sample}.depth.txt
done

你可能感兴趣的:(全基因组CNV分析)