目的
- 使用从文件中提取的四组数据绘制venn图,并保存各自独有的数据以及用来制作venn图的原始数据
- 分别提取fpkm<0.3的列,保存数据并绘制venn图
- 保存venn中独有的部分
思路
- which进行判断比较啊~~
- venn图使用:VennDiagram这个包,照着示例代码走就行了,抄的时候,要注意
list中x=xx,y=xxx,一定要注明,
我就在这报了好久的错。
#获取skin1-4各自符合条件的gene_id行,并使用四组数据绘制venn图
#通过修改fpkm阈值,产生不同的venn图,以及相关数据,其中alone可用于单独提取数据
#state:complete
#2019年1月12日16:11:42
#author:liyuan
#step1:initial
#step1_1:load main file
setwd("~/R_Workspace/DATA/1-11/4_特异表达venn/")
library(data.table)
input_table <- fread("~/R_Workspace/DATA/1-11/gene.description.xls",
header = T,sep="\t",
fill=T,quote = "",check.names = F);paste("step1_1_load main file success!")
#step1_2:set params
fpkm <- 10;paste("step_1_2_params_set_success!!")
prefix<-paste("fpkm",fpkm,"/exp.venn",sep="")
out_file_skin1 <- paste(prefix,".skin1.fpkm",fpkm,".xls",sep="")
out_file_skin2 <- paste(prefix,".skin2.fpkm",fpkm,".xls",sep="")
out_file_skin3 <- paste(prefix,".skin3.fpkm",fpkm,".xls",sep="")
out_file_skin4 <- paste(prefix,".skin4.fpkm",fpkm,".xls",sep="")
out_file_venn <- paste(prefix,".skin1-4.fpkm",fpkm,".tiff",sep="")
#step2:get data
#step2_1:ensure which column and edit clean the column
#geneid:1,skin1:56,skin2:55,skin3:67,skin4:51,anno:176:185
#1,anno the gene
#2,rename the sample
skin_name<-c(paste("skin-",c("B","C","RW","W"),sep = ""))
skin_col <- c(56,55,67,51)
colnames(input_table)[skin_col] <- paste(skin_name,
sep="");paste("2_1_1_colnames update success!!")
#注:字符串替换并不改变原来的
library(stringr)
input_table$Gene.Name <- str_replace_all(input_table$KEGG.AnnotInfo,
pattern = "(^.*RefSeq\\) )|(\\;.*$)",replacement = "");paste("2_1_2_gene_anno success!!")
#step2_2,get data
skin1_data <- input_table[which(input_table[,56]>=fpkm),] #get row
skin2_data <- input_table[which(input_table[,55]>=fpkm),] #get row
skin3_data <- input_table[which(input_table[,67]>=fpkm),] #get row
skin4_data <- input_table[which(input_table[,51]>=fpkm),] #get row
write.table(skin1_data[,c(1,skin_col,176:185)],row.names = FALSE,file=out_file_skin1,sep="\t")
write.table(skin2_data[,c(1,skin_col,176:185)],row.names = FALSE,file=out_file_skin2,sep="\t")
write.table(skin3_data[,c(1,skin_col,176:185)],row.names = FALSE,file=out_file_skin3,sep="\t")
write.table(skin4_data[,c(1,skin_col,176:185)],row.names = FALSE,file=out_file_skin4,sep="\t")
# for(i in 1:4){
# write.table(input_table[which(input_table[,skin_col[i]]>=fpkm),],
# file=paste("./R_Workspace/DATA/1-11/venn.skin",i,".fpkm",fpkm,".xls",sep="\t"))
#
# }
#step3,draw venn
library(VennDiagram)
venn_data = list(
"skin-B" = skin1_data$Gene_id,
"skin-C" = skin2_data$Gene_id,
"skin-RW" = skin3_data$Gene_id,
"skin-W" = skin4_data$Gene_id
)
venn.plot <- venn.diagram(
venn_data,filename = out_file_venn,
col = "black",
#fill = c("dodgerblue", "goldenrod1", "darkorange1", "seagreen3", "orchid3"),
fill = c("dodgerblue", "goldenrod1", "darkorange1", "seagreen3"),
alpha = 0.50,
#cex = c(1.5, 1.5, 1.5, 1.5, 1.5, 1, 0.8, 1, 0.8, 1, 0.8, 1, 0.8,
# 1, 0.8, 1, 0.55, 1, 0.55, 1, 0.55, 1, 0.55, 1, 0.55, 1, 1, 1, 1, 1, 1.5),
#cat.col = c("dodgerblue", "goldenrod1", "darkorange1", "seagreen3", "orchid3"),
cat.col = c("dodgerblue", "goldenrod1", "darkorange1", "seagreen3"),
cat.cex = 1.5,
cat.fontface = "bold",
margin = 0.05
);paste("step3_venn_draw_success!!")
#step4,save venn intersection_data
area=calculate.overlap(venn_data)
#typeof(area)
#summary(area)
#a9-skinB;a14-skinC;a1-skinRW;a3-skinW
write.table(area$a9,file=paste(out_file_skin1,"_alone.xls",sep=""),sep="\t",row.names = F,col.names = F)
write.table(area$a14,file=paste(out_file_skin2,"_alone.xls",sep=""),sep="\t",row.names = F,col.names = F)
write.table(area$a1,file=paste(out_file_skin3,"_alone.xls",sep=""),sep="\t",row.names = F,col.names = F)
write.table(area$a3,file=paste(out_file_skin4,"_alone.xls",sep=""),sep="\t",row.names = F,col.names = F)
paste("step4_table_of_alone_write_success!!")