根据基因间相关性系数R进行GSEA富集分析

根据rawdata算出来FPKM,然后进行单个基因与其他基因之间相关性的计算

mutwt = read.csv("路径wtmut12-fpkm.csv", row.names=1,header=T)
#转置一下
mutwt_t <- t(mutwt)

转置后文件内容为:纵坐标是样本名称,横坐标是不同基因的Ensembl

#先写空向量
gene_name<-c()##也可用vector
cor_r<-c()
pvalue<-c()
#用循环来算相关性,
for (i in 1:ncol(mutwt_t)){
    g1=colnames(mutwt_t)[i]
    c_r=cor(mutwt_t$目标基因,mutwt_t[,i],method="pearson")
    p=cor.test(mutwt_t$目标基因,mutwt_t[,i],method ="pearson")[[3]]
    gene_name=c(gene_name,g1)
    cor_r=c(cor_r,c_r)
    pvalue=c(pvalue,p)
}

cor_result <- data.frame(gene_name, cor_r, pvalue)
write.csv(cor_result,"路径/cor_result_withsymbol.csv",quote=F)

这样就算好了基因间的相关性系数
接下来做GSEA分析

#GSEA分析
cor_result_rowname <- read.csv("路径/目标基因与其他基因相关性分析.csv", row.names=1, header = T)
row.names(cor_result_rowname) <- cor_result_rowname[,1]
#转换ID 需要ENTREZID作GSEA分析
geneList_tr <- bitr(row.names(cor_result_rowname), 
                    fromType = "ENSEMBL", 
                    toType = c("ENTREZID", "SYMBOL"), 
                    OrgDb = org.Hs.eg.db)
colnames(cor_result_rowname)[1] <- c("ENSEMBL")
cor_result_withsymbol <- merge(cor_result_rowname, geneList_tr)
#按相关性系数进行降序排序
sortlist = cor_result_withsymbol[order(cor_result_withsymbol$cor_r,decreasing = T),]

newlist <- sortlist$cor_r
names(newlist) <- cor_result_withsymbol$ENTREZID
#去掉Na,如果不去掉可能会导致下一步失败
newlist<-na.omit(newlist)


准备作图
library(stringr) 
library(clusterProfiler) 
library(org.Hs.eg.db)

go_result <- gseGO(newlist, 
                   ont = "BP", 
                   OrgDb = org.Hs.eg.db,
                   keyType = "ENTREZID",
                   pvalueCutoff = 0.05,
                   pAdjustMethod = "BH")#p值校正方法


kegg_result <- gseKEGG(
    newlist,
    organism = "hsa",
    keyType = "kegg",
    exponent = 1,
    minGSSize = 10,
    maxGSSize = 500,
    eps = 1e-10,
    pvalueCutoff = 0.05,
    pAdjustMethod = "BH",
    verbose = TRUE,
    use_internal_data = FALSE,
    seed = FALSE,
    by = "fgsea")
head(kegg_result)
dim(kegg_result)
#按照enrichment score从高到低排序,便于查看富集通路

write.table(kegg_result, "路径/GSEA富集_cor.txt",sep = "\t",quote = F,col.names = T,row.names = F)
可以在这个表中看具体的通路信息

开始画图
library(enrichplot)
#感兴趣的通路写在一个path里
paths <- c("hsa04724", "hsa04727","hsa04150","hsa04151")
gseaplot2(kegg_result, paths, pvalue_table = TRUE)
gseaplot2(
    kegg_result, #gseaResult object,即GSEA结果
    "hsa04727",#富集的ID编号
    #标题
    color = "green",#GSEA线条颜色
    base_size = 11,#基础字体大小
    rel_heights = c(1.5, 0.5, 1),#副图的相对高度
    subplots = 1:3, #要显示哪些副图 如subplots=c(1,3) #只要第一和第三个图,subplots=1#只要第一个图
    pvalue_table = FALSE, #是否添加 pvalue table
    ES_geom = "line") #running enrichment score用先还是用点ES_geom = "dot"


你可能感兴趣的:(根据基因间相关性系数R进行GSEA富集分析)