hisat2+stringtie
for ((i=2064449;i<2064457;i++));
do
hisat2 -p 2 --dta -x ~/RNASEQ/index/grch38_tran/genome_tran -U ~/ribosome/GSE69923/SRR${i}.rmadapt.fq -S SRR${i}.sam;
samtools sort -@ 2 -o SRR${i}.bam SRR${i}.sam;
samtools index -@ 2 SRR${i}.bam
stringtie -p 2 -G ~/RNASEQ/index/grch38_tran/Homo_sapiens.GRCh38.84.gtf -o SRR${i}.gtf -A SRR${i}.tab -B -e -l SRR${i} SRR${i}.bam
done
prepDE.py
生成DEseq2能够读取的read count 矩阵
python ~/Software/prepDE.py -i gtflist.txt -g countRes/gene_count.csv -t countRes/transcript.csv
附:gtflist.txt格式:
SRR3469478 ./SRR3469478.gtf
SRR3469479 ./SRR3469479.gtf
SRR4421540 ./SRR4421540.gtf
SRR4421541 ./SRR4421541.gtf
DEseq2差异分析的R代码
args<-commandArgs(TRUE)
library(DESeq2)
library(BiocParallel)
register(MulticoreParam(8))
database=read.csv("transcript_count_matrix.csv",header = T,row.names = 1)
condition <- factor(c(rep("control",args[1]),rep("treat",args[2])))
coldata <- data.frame(row.names = colnames(database), condition)
dds <- DESeqDataSetFromMatrix(countData=database, colData=coldata, design=~condition)
dds <- dds[ rowSums(counts(dds)) > 1, ]
nrow(dds)
dds <- DESeq(dds,parallel = T)
res <- results(dds)
summary(res)
count_r <- counts(dds, normalized=T)
table(res$padj<0.01)
res <- res[order(res$padj),]
resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)),by="row.names",sort=FALSE)
signresdata<-resdata[resdata$padj<0.01,]
write.csv(signresdata,file = "DE_results.csv")
write.csv(count_r,file = "read_counts.csv")