htseq提取gene count
for ((i=1;i<=3;i++));do htseq-count -r pos -f bam ./B-${i}_sorted.bam ./ref.gtf > ./B-${i}.count;done
合并表达矩阵(合并count)
setwd(dir = "/media/panda/Elements/mouse_IPF/3rnaseq/3htseq")
B1 <- read.table("B-1.count", sep = "\t", col.names = c("genename", "B1"))
B2 <- read.table("B-2.count", sep = "\t", col.names = c("genename", "B2"))
B3 <- read.table("B-3.count", sep = "\t", col.names = c("genename", "B3"))
S1 <- read.table("S-1.count", sep = "\t", col.names = c("genename", "S1"))
S2 <- read.table("S-2.count", sep = "\t", col.names = c("genename", "S2"))
S3 <- read.table("S-3.count", sep = "\t", col.names = c("genename", "S3"))
T1 <- read.table("T-1.count", sep = "\t", col.names = c("genename", "T1"))
T2 <- read.table("T-2.count", sep = "\t", col.names = c("genename", "T2"))
T3 <- read.table("T-3.count", sep = "\t", col.names = c("genename", "T3"))
rawcount <- merge(merge(merge(merge(S1, S2, by = "genename"), merge(S3, B1, by = "genename"), by = "genename"), merge(B2, B3, by = "genename"), by = "genename"),merge(T1, T2, by = "genename"),by = "genename")
rawcount <- merge(rawcount, T3, by = "genename")
write.csv(rawcount,file= "rawcount.csv")
获得差异表达基因(DEGs)
library(DESeq2)
work_dir <- "/media/panda/Elements/mouse_IPF/3rnaseq/4DESeq2/"
countData <- as.matrix(read.csv(paste0(work_dir,"rawcount.csv"), sep=",",
row.names= "genename", check.names=F))
colData <- read.csv(paste0(work_dir,"sample_info.txt"), sep="\t", row.names="sample")
colData_1 = colData[colData$Type=="B" | colData$Type=="T",]
all(rownames(colData_1) %in% colnames(countData))
countData_1 <- countData[, rownames(colData_1)]
all(rownames(colData_1) == colnames(countData_1))
Type <- factor(c(rep("B",3),rep("T",3)), levels = c("B","T"))
Type
dds <- DESeqDataSetFromMatrix(countData = countData_1,
colData = colData_1,
design = ~ Type)
dds <- DESeq(dds)
res <- results(dds, contrast = c("Type", "T", "B"))
(resOrdered <- res[order(res$padj), ])
plotMA(res, ylim=c(-5,5))
write.csv(as.data.frame(resOrdered), file=paste0(work_dir,"deseq_T-B.csv"))
clusterprofile GO_BP富集分析
library(clusterProfiler)
library(DOSE)
library(org.Mm.eg.db)
setwd("/media/panda/Elements/mouse_IPF/3rnaseq/5enrichment/")
sig.gene <- read.csv(file = "DEG-T-B.csv", sep = "\t")
head(sig.gene)
gene <- sig.gene[,1]
head(gene)
gene.df<-bitr(gene, fromType = "SYMBOL",
toType = c("ENSEMBL","ENTREZID"),
OrgDb = org.Mm.eg.db)
head(gene.df)
ego_bp <- enrichGO(gene = gene.df$SYMBOL, OrgDb = org.Mm.eg.db,
keyType = 'SYMBOL', ont = "BP", pAdjustMethod = "BH",
pvalueCutoff = 0.01, qvalueCutoff = 0.05)
barplot(ego_bp,showCategory = 20,title="The GO_BP enrichment analysis of TB-DEGs ")
write.csv(ego_bp, file = "T-B_bp.csv")
clusterprofile KEGG富集分析
library(clusterProfiler)
library(DOSE)
library(org.Mm.eg.db)
library(stringr)
setwd("/media/panda/Elements/mouse_IPF/3rnaseq/5enrichment/")
sig.gene <- read.csv(file = "DEG-T-S.csv", sep = "\t")
head(sig.gene)
gene <- sig.gene[,1]
head(gene)
gene.df<-bitr(gene, fromType = "SYMBOL",
toType = c("ENSEMBL","ENTREZID"),
OrgDb = org.Mm.eg.db)
head(gene.df)
kk <- enrichKEGG(gene =gene.df$ENTREZID,
organism = 'mmu',
pvalueCutoff = 0.05)
dotplot(kk,showCategory = 20, title="The KEGG enrichment analysis of all TS-DEGs")
write.csv(ego_bp, file = "T-S_kegg.csv")