rmats 可变剪切分析记录

create conda env

conda create -n rmats
conda activate rmats
conda install rmats
conda install rmats2sashimiplot

quality control

DIR=""
ls $DIR/*.fastq.gz | while read line; do echo ${line%_*}; done | uniq >sample.txt

if [ ! -d trim ]; then
    mkdir trim
fi
sp="_"
suffix=".fastq.gz"
for sample in $(cat sample.txt); do
    trim_galore --paired ${sample}${sp}1${suffix} ${sample}${sp}2${suffix} --gzip -j 8 -o trim
done

mapping

if [ ! -d bam ]; then
    mkdir bam
fi
cat sample.txt | while read line; do sample=$(basename $line);bwa mem -t 16 /home/reference/Dmelanogaster/UCSC/dm6/Sequence/BWAindex/dm6 trim/${sample}_1_val_1.fq.gz trim/${sample}_2_val_2.fq.gz | samtools sort -@ 16 -o bam/${sample}.bam -; done

run rmats to find splicing events

echo "shDEK1.bam,shDEK2.bam" >b1.txt
echo "shNT1.bam,shNT2.bam" >b2.txt
GTF=""
readLength=150
rmats.py --b1 b1.txt --b2 b2.txt --gtf $GTF --od rmatsOut --tmp rmatsTemp -t paired --nthread 16 --readLength $readLength --novelSS

filter splicing events

# add gene symbol
awk 'BEGIN{FS=OFS="\t"}NR==FNR{a[$1]=$2}NR>FNR{gsub(/"/,"",$2);if($2 in a){$3=a[$2]};print}' /home/reference/Mmusculus/UCSC/mm10/Annotation/gene.refseq.to.symbol.txt RI.MATS.JCEC.txt >RI.MATS.JCEC.add.symbol.txt
# filter FDR<0.05, output the header line
awk 'NR==1{print};$(NF-3)<0.05{print}' RI.MATS.JCEC.add.symbol.txt >RI.MATS.JCEC.add.symbol.filtered.txt

visualization

rmats2sashimiplot --b1 shDEK1.bam,shDEK2.bam --b2 shNT1.bam,shNT2.bam -t RI -e rmatsOut/RI.MATS.JCEC.filtered.txt --l1 shDEK --l2 shNT --exon_s 1 --intron_s 1 -o RI_plot

conda deactivate

conda deactivate 

你可能感兴趣的:(rmats 可变剪切分析记录)