a=readClipboard()
a=as.vector(a)
a =c("DDX3Y", "UTY" , "KDM5D","USP9Y",
"ZFY", "RPS4Y1", "TMSB4Y", "EIF1AY", "NLGN4Y" , "TBL1Y" ,
"HSFY2" ,"PCDH11Y", "SRY" , "HSFY1" , "PRY2" ,"DAZ1" ,"DAZ3", "DAZ4" , "DAZ2", "RPS4Y2",
"RBMY1A1", "BPY2","BPY2B","AMELY",
"TSPY3", "TSPY8","RBMY1B","RBMY1E","BPY2C","TGIF2LY",
"TSPY2","TSPY4","TSPY1","TSPY10",
"VCY" ,"VCY1B","CDY2B" ,"CDY2A", "CDY1B","CDY1A")
save(a,file = "loy_genes.rds")
.libPaths(c( "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2",
"/home/data/t040413/R/yll/usr/local/lib/R/site-library",
"/home/data/refdir/Rlib/", "/usr/local/lib/R/library"))
getwd()
load("/home/data/t040413/ARDS/R_Scripts/mars_ARDS/GSE65682_exprset_phe.RData")
head(phe)
#是否需要log2转换
#https://www.jianshu.com/p/7cdaf811d7d1
#exprSet_normolized=log2(exprSet+1)
#boxplot(exprSet_normolized[,1:68],las=2)#看上去很齐整 就不要去批次效应了
###开始gsea
#ssGSEA给单样本打分
library(GSVA)
library(limma)
library(GSEABase)
getwd()
load("/home/data/t040413/ARDS/R_Scripts/loy_genes.rds")
a
gene.set.list=a
gene.set.list=list('LOY_Gene_features'=c("RPS4Y1","EIF1AY",
"DDX3Y","KDM5D"))
head(gene.set.list)
identical(rownames(phe) ,colnames(exprSet))
exprSet=as.matrix(exprSet)
phe=phe[phe$gender=="male"
,]
exprSet=exprSet[,rownames(phe)]
group_list=data.frame(row.names = colnames(exprSet),
gender=phe$gender,
age=phe$age,
stim=phe$icu_acquired_infection,
grade=phe$endotype_class,
diabete=phe$diabetes_mellitus,
survival=phe$time_to_event_28days,
thromb=phe$thrombocytopenia
)
group_list
exp=exprSet
ssgsea.res <-
gsva(
as.matrix(exp),
gene.set.list,
method = "ssgsea",
kcdf = "Gaussian",
abs.ranking = T
)
View(head(ssgsea.res))
length(ssgsea.res)
# #如果用原始数据去做 得到的分数也是一样的!!
# ssgsea.res <-
# gsva(
# as.matrix(exprSet),
# gene.set.list,
# method = "ssgsea",
# kcdf = "Gaussian",
# abs.ranking = T
# )
###########https://www.jianshu.com/p/04b9c0f8b200
head(phe)
identical(rownames(phe),colnames(exprSet))
phe$LOY_Score=ssgsea.res[1,]
head(phe)
dim(phe)
library(ggplot2)
library(car)
scatterplot(phe$LOY_Score,phe$age)
plot(phe$LOY_Score,phe$age)
library(pheatmap)
tail(exprSet)[,1:5]
exprSet1=exprSet
dim(exprSet)
exprSet1=as.data.frame(exprSet1)
exprSet1['LOY_score',]=as.numeric(ssgsea.res)
exprSet1=as.matrix(exprSet1)
tail(exprSet1)[,1:5]
exprSet1=scale(exprSet1,center = TRUE)
pheatmap::pheatmap(mat = exprSet1[c("RPS4Y1","EIF1AY","LOY_score",
"DDX3Y","KDM5D"),],
show_rownames =TRUE,annotation_col =group_list[,c("stim","age","grade","survival")]
,clustering_method = "average"
,scale = 'row'
)
#------------------------------------------------------------------------------------------------
https://mp.weixin.qq.com/s/9WZNRtortMiuijdd4cASYg
library(msigdbr)
all_gene_sets = msigdbr(species = "Homo sapiens", # Homo sapiens or Mus musculus
category = "H" )
length(unique(table(all_gene_sets$gs_name)))
genes_to_check
TERM2GENE = do.call(rbind, lapply(names(genes_to_check), function(x){
data.frame(gs_name=x,gene_symbol=glist[[x]])
}))
#前面的数据框或者列表,要弄成对象就比较麻烦了,需要做一些转换:
library(GSVA) # BiocManager::install('GSVA')
library(GSEABase)
gs=split(all_gene_sets$gene_symbol,all_gene_sets$gs_name)
gs = lapply(gs, unique)
geneset <- GeneSetCollection(mapply(function(geneIds, keggId) {
GeneSet(geneIds, geneIdType=EntrezIdentifier(),
collectionType=KEGGCollection(keggId),
setName=keggId)
}, gs, names(gs)))
# 这个 gsva 函数可以根据前面的 geneset对象,对任意表达量矩阵进行分析
es.max <- gsva(as.matrix( sce@assays$RNA@counts), geneset,
mx.diff=FALSE, verbose=FALSE,
parallel.sz=8)
res.list <-
gsva(
as.matrix(exp),
gset.idx.list = gs,
method = "ssgsea",
kcdf = "Gaussian",
abs.ranking = T
)