hisat2+stringtie+deseq2分析RNA-SEQ数据

hisat2+stringtie

for ((i=2064449;i<2064457;i++)); 

do 

hisat2 -p 2 --dta -x ~/RNASEQ/index/grch38_tran/genome_tran -U ~/ribosome/GSE69923/SRR${i}.rmadapt.fq -S SRR${i}.sam;

 samtools sort -@ 2 -o SRR${i}.bam SRR${i}.sam;

 samtools index -@ 2 SRR${i}.bam

stringtie -p 2 -G ~/RNASEQ/index/grch38_tran/Homo_sapiens.GRCh38.84.gtf -o SRR${i}.gtf -A SRR${i}.tab -B -e -l SRR${i} SRR${i}.bam

done


prepDE.py

生成DEseq2能够读取的read count 矩阵

python ~/Software/prepDE.py -i gtflist.txt -g countRes/gene_count.csv -t countRes/transcript.csv

附:gtflist.txt格式:

SRR3469478    ./SRR3469478.gtf

SRR3469479      ./SRR3469479.gtf

SRR4421540      ./SRR4421540.gtf

SRR4421541      ./SRR4421541.gtf


DEseq2差异分析的R代码


args<-commandArgs(TRUE)

library(DESeq2)

library(BiocParallel)

register(MulticoreParam(8))

database=read.csv("transcript_count_matrix.csv",header = T,row.names = 1)

condition <- factor(c(rep("control",args[1]),rep("treat",args[2])))

coldata <- data.frame(row.names = colnames(database), condition)

dds <- DESeqDataSetFromMatrix(countData=database, colData=coldata, design=~condition)

dds <- dds[ rowSums(counts(dds)) > 1, ]

nrow(dds)

dds <- DESeq(dds,parallel = T)

res <- results(dds)

summary(res)

count_r <- counts(dds, normalized=T)

table(res$padj<0.01)

res <- res[order(res$padj),]

resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)),by="row.names",sort=FALSE)

signresdata<-resdata[resdata$padj<0.01,]

write.csv(signresdata,file = "DE_results.csv")

write.csv(count_r,file = "read_counts.csv")

你可能感兴趣的:(hisat2+stringtie+deseq2分析RNA-SEQ数据)