trimws(x, which = c("both", "left", "right"), whitespace = "[ \t\r\n]")
自建函数切割sample
自建函数分离样本中某一列 按照某种要求某种标准 某种格式
??read_xls
pheno_data=openxlsx::read.xlsx("G:/raw_material for omics_chaojie/Sepsis/sepsis和ARDS队列-患者信息.xlsx",
sheet=7)
head(pheno_data)
#制作wgcna的表型信息
{
library(dplyr)
#自建函数从字符串提取想要的字符,如果存在,就变成1,如果不存在则变成0
strsplit("abdomen_sepsis/ pneumonia_abdomen_sepsis/ abdomen_sepsis_Sshock",split = "/")
#r语言去除字符串两端多余空格
trimws(c(" abdo " ,' n_sepsi ',"s/ pneumonia"), which = c("both", "left", "right"), whitespace = "[ \t\r\n]")
any(c(1,3,4)==5)
get_mydesired_string=function(x,strings){
#strings="abdomen_sepsis/ abdomen/ sepsis/ ab"
#x="ab"
elements=strsplit(strings,split = "/")[[1]]
#r语言去除字符串两端多余空格 两边空格
elements=trimws(elements, which = c("both", "left", "right"), whitespace = "[ \t\r\n]")
if(any(which(elements==x))==TRUE)
{ myelements=elements[which(elements==x)]
myelements=1} else { myelements=0}
return(myelements)
}
get_mydesired_string('sepsis',"abdom/en_/sepsis/ asepsis/ sepsis/ abdhock")
head(pheno_data)
pheno_data$'abdomen_sepsis'
str(pheno_data)
lapply(pheno_data$Group, get_mydesired_string,x="abdomen_sepsis")
sapply(pheno_data$Group, get_mydesired_string,x="abdomen_sepsis")
pheno_data$abdomen_sepsis=sapply(pheno_data$Group, get_mydesired_string,x="abdomen_sepsis")
pheno_data$pneumonia_abdomen_sepsis=sapply(pheno_data$Group, get_mydesired_string,x="pneumonia_abdomen_sepsis")
pheno_data$abdomen_sepsis_Sshock=sapply(pheno_data$Group, get_mydesired_string,x="abdomen_sepsis_Sshock")
pheno_data$abdomen_Sshock=sapply(pheno_data$Group, get_mydesired_string,x="abdomen_Sshock")
pheno_data$pneumonia_abdomen_Sshock=sapply(pheno_data$Group, get_mydesired_string,x="pneumonia_abdomen_Sshock")
pheno_data$pneumonia_sepsis=sapply(pheno_data$Group, get_mydesired_string,x="pneumonia_sepsis")
pheno_data$pneumonia_sepsis_Sshock=sapply(pheno_data$Group, get_mydesired_string,x="pneumonia_sepsis_Sshock")
pheno_data$ARDS_moderate=sapply(pheno_data$Group, get_mydesired_string,x="ARDS_moderate")
pheno_data$ARDS_severe=sapply(pheno_data$Group, get_mydesired_string,x="ARDS_severe")
pheno_data$pneumonia_Sshock=sapply(pheno_data$Group, get_mydesired_string,x="pneumonia_Sshock")
}
#pheno_data$pneumonia_abdomen_Sshock=lapply(pheno_data$Group, get_mydesired_string,x="pneumonia_abdomen_Sshock")
head(pheno_data)
str(pheno_data)
??read.xlsx
patient_sample_dia=openxlsx::read.xlsx("G:/raw_material for omics_chaojie/Sepsis/sepsis和ARDS队列-患者信息.xlsx",
sheet=2,rows=1:27,)
#制作patient sample信息
if(1==1){
head(patient_sample_dia)
dim(patient_sample_dia)
patient_sample_dia=patient_sample_dia[,3:27]
colnames(patient_sample_dia)[1:2]=c('patient','sample')
head(patient_sample_dia)
#以逗号为分隔取第几个元素
#自建函数
strsplit("15196、15197",split="、")
get_mydesired_element=function(location, strings){
#location=2
# strings="15196、15197"
elements=strsplit(strings,split="、")[[1]][location]
#去除空格
elements=elements=trimws(elements, which = c("both", "left", "right"), whitespace = "[ \t\r\n]")
return(elements)
}
lapply(patient_sample_dia$sample,
get_mydesired_element,location=1)
patient_sample_dia$sample_1=unlist(lapply(patient_sample_dia$sample,get_mydesired_element,location=1))
patient_sample_dia$sample_2=unlist(lapply(patient_sample_dia$sample,get_mydesired_element,location=2))
patient_sample_dia
patient_sample_dia=patient_sample_dia %>%select(c(patient,sample_1,sample_2),everything())
setdiff(pheno_data$Sample.Name,patient_sample_dia$sample_1) #[1]801 20056 11216
setdiff(patient_sample_dia$sample_1,pheno_data$Sample.Name) #"2201"
setdiff(patient_sample_dia$sample_2,pheno_data$Sample.Name)
str(patient_sample_dia)
}
#24个病人为何有26个样本 找到样本与病人的对应关系
head(pheno_data)
View(pheno_data)
setdiff()
#检查mrna和ciclerna测序的样本名 各26个
if(1==1){
library(readxl)
mrna_exp=readxl::read_xls("G:/raw_material for omics_chaojie/Sepsis/H1712290A Arraystar_Human_LncRNA_8x60k v4.0 1-color 20180206-zq lnca/Report/GeneralData/mRNA Expression Profiling/mRNA Expression Profiling Data.xls",
range = cell_rows(c(9:11)))
head(mrna_exp)
grep(mrna_exp[1,],pattern = '1121',value = T)#是否有2201样本
ciclerna_exp=readxl::read_xls("G:/raw_material for omics_chaojie/Sepsis/H1712290B AS-CR-005 Human Circular RNA Microarray v2 20180201 zq cias/File 2. Data/CircRNA Expression Profiling Data.xls",
range = cell_rows(c(21:23)))
head(mrna_exp)
grep(mrna_exp[1,],pattern = '200',value = T)#是否有2201样本
length(patient_sample_dia$sample_1)
}
getwd()
save(pheno_data,patient_sample_dia,
file ="G:/papers_for_ARDS/r_scripts_for_ARDS_From_zhongda/lncRNA_mRNA_cicleRNA/meta_infor_raw.RData" )
load("G:/papers_for_ARDS/r_scripts_for_ARDS_From_zhongda/lncRNA_mRNA_cicleRNA/meta_infor_raw.RData")
head(patient_sample_dia)