前言
很多时候,韦恩图只适合在维数较低的时候使用,一旦集合多了,看起来会很复杂,可视性比较差,这个时候就需要用到upset图,虽然看起来没那么酷炫,但是可视性极佳。两者结合起来,就会显得很高大上。
图
代码
library(biomaRt)
ensembl <- useMart("ensembl")
dataset <- useDataset("hsapiens_gene_ensembl", mart = ensembl)
filter <- listFilters(dataset)
attributes <- listAttributes(dataset)
#读入数据
#install.packages("xlsx")
library(xlsx)
igg_1 = read.xlsx(file = "IGG-1-proteins.xlsx", sheetIndex = 1)
igg_2 = read.xlsx(file = "IGG-2-proteins.xlsx", sheetIndex = 1)
flag1_1 = read.xlsx(file = "flag-1-proteins.xlsx", sheetIndex = 1)
flag1_2 = read.xlsx(file = "flag-2-proteins.xlsx", sheetIndex = 1)
crapome = read.table(file = "E:\\WORK\\crapome\\crap_human_11.matrix.txt", header = T, sep = "\t")
#出现频率大于0.05的视为污染物
crapome_gt10 = crapome[crapome[, 3] / (length(crapome[1, ]) - 5) > 0.05, ]
#画UpsetR/venn
#install.packages("UpSetR")
library(UpSetR)
#安装VennDiagram包
#install.packages("VennDiagram")
#载入VennDiagram包
library(VennDiagram)
igg_1$Accession = gsub(pattern = "-.", "", igg_1$Accession)
igg_2$Accession = gsub(pattern = "-.", "", igg_2$Accession)
flag1_1$Accession = gsub(pattern = "-.", "", flag1_1$Accession)
flag1_2$Accession = gsub(pattern = "-.", "", flag1_2$Accession)
A = getBM(
attributes = c("hgnc_symbol", "uniprot_gn"),
filters = "uniprot_gn",
values = igg_1$Accession,
mart = dataset
)
B = getBM(
attributes = c("hgnc_symbol", "uniprot_gn"),
filters = "uniprot_gn",
values = igg_2$Accession,
mart = dataset
)
C = getBM(
attributes = c("hgnc_symbol", "uniprot_gn"),
filters = "uniprot_gn",
values = flag1_1$Accession,
mart = dataset
)
D = getBM(
attributes = c("hgnc_symbol", "uniprot_gn"),
filters = "uniprot_gn",
values = flag1_2$Accession,
mart = dataset
)
colnames(A) = c("Gene", "Accession")
colnames(B) = c("Gene", "Accession")
colnames(C) = c("Gene", "Accession")
colnames(D) = c("Gene", "Accession")
igg_1 = merge(igg_1, A, by = "Accession")
igg_2 = merge(igg_2, B, by = "Accession")
flag1_1 = merge(flag1_1, C, by = "Accession")
flag1_2 = merge(flag1_2, D, by = "Accession")
flag1_1 = flag1_1[flag1_1$X..PSMs > 8, ]
flag1_2 = flag1_2[flag1_2$X..PSMs > 8, ]
upsetR_data = list(
IgG_Hs578T = igg_1$Gene,
IgG_MDA_MB_231 = igg_2$Gene,
CRAPome = as.character(crapome_gt10$Gene),
FLAG_Hs578T = flag1_1$Gene,
FLAG_MDA_MB_231 = flag1_2$Gene
)
#install.packages("ggpubr")
library(ggpubr)
#整理最终那6个蛋白信息
pro = as.data.frame(setdiff(intersect(
setdiff(upsetR_data$FLAG_Hs578T, upsetR_data$IgG_Hs578T),
setdiff(upsetR_data$FLAG_MDA_MB_231, upsetR_data$IgG_MDA_MB_231)
), upsetR_data$CRAPome))
colnames(pro) = "Gene"
FLAG = merge(merge(flag1_1, pro, by = "Gene"),
merge(flag1_2, pro, by = "Gene"),
by = "Gene")
FLAG_Lite = FLAG[, c(1, 2, 3, 9, 20)]
FLAG_Lite[, 3] = gsub("OS.*", "", FLAG_Lite[, 3])
FLAG_Lite[, 6] = FLAG_Lite[, 4] + FLAG_Lite[, 5]
colnames(FLAG_Lite) = c(
"Gene",
"Uniprot Accession",
"Description",
"PSMs_Hs578T",
"PSMs_MDA-MB-231",
"PSMs_sum"
)
FLAG_Lite = FLAG_Lite[order(FLAG_Lite$PSMs_sum), ]
#install.packages('venn')
library(venn)
pdf(file = "MS_upsetR_Vennplot_texttable.pdf", width = 12)
#绘制文本表
ggtexttable(FLAG_Lite,
rows = NULL,
theme = ttheme(colnames.style = colnames_style(color = "white", fill = "#8cc257")))
#绘制Venn图
venn(
upsetR_data,
zcolor = 'style',
opacity = 0.5,
cexil = 1,
cexsn = 1
)
#选择区域
zones <- getZones('00011')
#获取区域图心
centroid <- getCentroid(zones)[[1]]
#修改颜色
polygon(zones[[1]], col = "yellow2")
#修改文字
text(
centroid[1],
centroid[2],
labels = "6",
cex = 2,
col = 'red',
font = 2
)
#绘制Upset图
upset(
fromList(upsetR_data),
#柱形图按数量排列
order.by = "freq",
main.bar.color = "black",
matrix.color = "skyblue",
#左下角的Sets bar颜色
sets.bar.color = c("red", "blue", "skyblue", "forestgreen", "gold"),
#字体大小
text.scale = 1.3,
#字体旋转角度
set_size.angles = -30,
#坐标轴名称
mainbar.y.label = "Gene Intersections Size",
#选择需要显示的集合,这里把与别的集合没有交集的都去掉
intersections = list(
list("FLAG_Hs578T", "CRAPome"),
list("FLAG_MDA_MB_231", "CRAPome"),
list("FLAG_Hs578T", "FLAG_MDA_MB_231"),
list("IgG_Hs578T", "CRAPome"),
list("IgG_MDA_MB_231", "CRAPome"),
list("IgG_Hs578T", "IgG_MDA_MB_231"),
list("FLAG_Hs578T", "FLAG_MDA_MB_231", "CRAPome"),
list("IgG_Hs578T", "IgG_MDA_MB_231", "CRAPome"),
list("IgG_Hs578T", "FLAG_Hs578T", "FLAG_MDA_MB_231", "CRAPome"),
list("IgG_MDA_MB_231", "FLAG_Hs578T", "FLAG_MDA_MB_231", "CRAPome"),
list(
"IgG_Hs578T",
"IgG_MDA_MB_231",
"FLAG_Hs578T",
"FLAG_MDA_MB_231",
"CRAPome"
)
),
#选择突出显示的集合
queries = list(
list(
query = intersects,
params = list("FLAG_Hs578T", "FLAG_MDA_MB_231"),
color = "orange",
active = T
)
)
)
dev.off()
#单独绘制文本表
pdf(file = "MS_texttable.pdf", width = 12)
ggtexttable(FLAG_Lite,
rows = NULL,
theme = ttheme(colnames.style = colnames_style(color = "white", fill = "#8cc257")))
dev.off()