RNA-seq分析(上)DESeq&MA_src

##接featureCounts的基因表达水平的分析结果
suppressMessages(library(DESeq2))
setwd("F:/RNA_learning/")
rm(list=ls())
count_tab<-read.table("F:/RNA_learning/5.counts/counts",header=T)
##删除count_tab的1、2列
#count_tab<-count_tab[,-c(1,2)]
sampleNames <-c('SRR6791080','SRR6791081','SRR6791082',
                'SRR6791083','SRR6791084','SRR6791085')
#给7-12列修改列名
names(count_tab)[7:12]<-sampleNames
countMatrix<-as.matrix(count_tab[7:12])
rownames(countMatrix)<-count_tab$Geneid
head(countMatrix)
save(countMatrix,file='expr.Rdata')
#deal是处理后样本,control是野生型,基因表达应该是control比上deal,要把control放在前面
group_list <- c('control','control','control','deal','deal','deal')
colData<-data.frame(sampleNames,group_list)

colData$group_list = factor(colData$group_list,c('control','deal'))

##colData用于存放样本信息数据,design是实验设计,表示counts文件中每个基因与colData中变量的依赖关系。
dds<-DESeqDataSetFromMatrix(countData = countMatrix,
                            colData = colData,
                            design = ~ group_list)
#过滤掉那些 count 结果为0的数据,这些基因没有表达
dds <- dds[rowSums(counts(dds)) > 1,]
dds2<-DESeq(dds)
resultsNames(dds2) # lists the coefficients
res <- results(dds2, name="group_list_deal_vs_control")
res <- res[order(res$padj),]
resDF = as.data.frame(res)


##PCA
suppressMessages(library(ggplot2))
##PCA中归一化方法
vsd <- vst(dds, blind=FALSE)
pcaData <- plotPCA(vsd, intgroup=c("group_list"), returnData=TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
plotFig <- ggplot(pcaData, aes(PC1, PC2, color=group_list)) +
  geom_point(size=3) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
  coord_fixed()
##保存图片
ggsave(plotFig, filename = "yeast_DESeq2_PCA.pdf")
##MA
pdf("yeast_DESeq2_MAplot.pdf")
plotMA(res,ylim=c(-2,2))
dev.off

##筛选差异基因
##按行筛选非NA的数据
resDF_noNA <- resDF[complete.cases(resDF), ]
##筛选padj<=0.05且log2FoldChange>2或<-2的行
resDFfil = resDF_noNA[resDF_noNA$padj <= 0.05 & 
                        (resDF_noNA$log2FoldChange > 2 | resDF_noNA$log2FoldChange < -2)]

##热图pheatmap
nr_resDF=na.omit(resDF)
suppressMessages(library("pheatmap"))
##筛选差异基因
#第一种方法
select <- order(rowMeans(counts(dds2,normalized=TRUE)),decreasing=TRUE)[1:1000]
#第二种方法
select1 <- order(rowVars(counts(dds2,normalized=TRUE)),decreasing=TRUE)[1:1000]
#归一化
nt <- normTransform(dds2) # defaults to log2(x+1)
#提取归一化后的基因
log2.norm.counts <- assay(nt)[select,]
df <- as.data.frame(colData(dds2))
# pdf('heatmap1000.pdf',width = 6, height = 7)  
pheatmap(log2.norm.counts, cluster_rows=TRUE, show_rownames=FALSE,
         cluster_cols=TRUE, annotation_col=df)

你可能感兴趣的:(RNA-seq分析(上)DESeq&MA_src)