使用R包处理傅立叶变换离子回旋共振质谱数据!本示例数据基于软件Formularity (Tolić et al, 2017 - Anal. Chem.)生成的报告文件
加载工具包
#devtools::install_github("EMSL-Computing/ftmsRanalysis")
#devtools::install_github("clauswilke/ggisoband")
# package found at https://github.com/EMSL-Computing/ftmsRanalysis
suppressPackageStartupMessages(require(ftmsRanalysis))
suppressPackageStartupMessages(require(reshape2))
suppressPackageStartupMessages(require(ggpubr))
suppressPackageStartupMessages(require(dplyr))
suppressPackageStartupMessages(require(RColorBrewer))
suppressPackageStartupMessages(require(scales))
suppressPackageStartupMessages(require(ggisoband))
将质谱峰的化合物预测信息与丰度信息拆分开
report = read.csv('../00_data/TXT/Report.csv')
head(report)
emeta = report[, c("Mass", "C", "H", "O", "N", "C13", "S", "P", "Na", "El_comp", "Class", "NeutralMass", "Error_ppm", "Candidates")]
edata = report[,-which(colnames(report) %in% names(emeta)[-1])]
#导入样本分组信息#
fdata = read.table('./map.txt', header = T)
#格式转换,类似phyloseq对象#
peak_icr = as.peakData(e_data = edata, f_data = fdata, e_meta = emeta, edata_cname = "Mass", mass_cname = "Mass", fdata_cname = "SampleID", c_cname = "C", h_cname = "H", o_cname = "O", n_cname = "N", s_cname = "S", p_cname = "P")
#计算参数#
peak_icr = compound_calcs(peak_icr)
#化合物注释#
peak_icr = assign_class(peak_icr, boundary_set = "bs1")
peak_icr = assign_class(peak_icr, boundary_set = "bs2")
peak_icr = assign_class(peak_icr, boundary_set = "bs3")
#质控过滤#
filter_obj = mass_filter(peak_icr)
peak_icr = applyFilt(filter_obj, peak_icr, min_mass = 150, max_mass = 900)
#输出结果#
write.csv(peak_icr$e_data, "Processed_MS_Data.csv", quote = F, row.names = F)
write.csv(peak_icr$e_meta, "Processed_MolInfor.csv", quote = F, row.names = F)
初步可视化
#自定义配色方案#
qual_col_pals = brewer.pal.info[brewer.pal.info$category == 'qual',]
col_vector = unlist(mapply(brewer.pal, qual_col_pals$maxcolors, rownames(qual_col_pals)))
#van Krevelen diagram
vK_plot = ggplot(peak_icr$e_meta, aes(OtoC_ratio,HtoC_ratio,color = bs2_class))+
geom_point()+
scale_color_manual(values = col_vector[-4])+
theme_bw()+
geom_density_bands(aes(fill = stat(density)), color = "NA", alpha = 0.7, size = 0.2) +
scale_fill_gradientn(colours = c('white','yellow','red','black')) +
guides(color = guide_legend(override.aes = list(size = 5)))+
labs(x = 'O:C',y = 'H:C', color = 'Classes')+
theme(axis.title = element_text(size = 20,color = 'black'),
axis.text.y = element_text(size = 15,color = 'black'),
axis.text.x = element_text(size = 15,color = 'black',angle = 90, vjust = 0.5),
axis.ticks.length = unit(0.3,'cm'),
legend.text = element_text(size = 20,color = 'black'),
legend.title = element_text(size = 20,color = 'black'),
strip.text = element_text(size = 20,color = 'black'),
panel.grid = element_blank())
各参数概览
head(emeta)
emeta_sub = peak_icr$e_meta[16:24] %>% unique.data.frame()
emeta_sub = melt(emeta_sub)
emeta_plot = ggplot(emeta_sub, aes(value, ..scaled..,fill = variable)) +
geom_density(show.legend = FALSE) +
facet_wrap(.~variable,scales = 'free',nrow = 2) +
theme_bw()+
theme(axis.title = element_text(size = 20,color = 'black'),
axis.text.y = element_text(size = 15,color = 'black'),
axis.text.x = element_text(size = 15,color = 'black',angle = 90, vjust = 0.5),
axis.ticks.length = unit(0.3,'cm'),
strip.text = element_text(size = 20,color = 'black'),
panel.grid = element_blank())
对质谱峰化合物信息进行丰度统计,生成类似OTU表和物种分类表
############### Summary generation ###############
summary = TRUE
if(summary == T){
# Setting peak_icr objects and row names
edata = peak_icr$e_data
emeta = peak_icr$e_meta
row.names(edata) = edata$Mass; edata = edata[,-which(colnames(edata) %in% "Mass")]
row.names(emeta) = emeta$Mass; emeta = emeta[,-which(colnames(emeta) %in% "Mass")]
#### Compound class summary
# Finding unique compound classes
uniq.comp = unique(peak_icr$e_meta$bs1_class)
# Looping through each sample to obtain some summary categoreies
classes = matrix(nrow = ncol(edata), ncol = length(uniq.comp)) # Creating empty matrix to store stats
colnames(classes) = uniq.comp
row.names(classes) = colnames(edata)
name.temp = NULL
for(i in 1:ncol(edata)){
temp = edata[which(edata[,i] > 0), i, drop = F] # Need to keep names, looking at columns
temp = emeta[row.names(temp),]
for(j in 1:length(uniq.comp)){
classes[i,j] = length(which(temp$bs1 %in% uniq.comp[j]))
}
name.temp = c(name.temp, colnames(edata)[i])
} # I'm not sure how to do this without the for-loop, but I'm simply just finding the mean/median for peak stats
classes = as.data.frame(classes)
write.csv(classes, "Compound_Class_Summary.csv", quote = F)
#### Compound composition summary
# Finding unique compound composition
uniq.composition = unique(peak_icr$e_meta$El_comp)
# Looping through each sample to obtain some summary compositions
compostion = matrix(nrow = ncol(edata), ncol = length(uniq.composition)) # Creating empty matrix to store stats
colnames(compostion) = uniq.composition
row.names(compostion) = colnames(edata)
name.temp = NULL
for(i in 1:ncol(edata)){
temp = edata[which(edata[,i] > 0), i, drop = F] # Need to keep names, looking at columns
temp = emeta[row.names(temp),]
for(j in 1:length(uniq.composition)){
compostion[i,j] = length(which(temp$El_comp %in% uniq.composition[j]))
}
name.temp = c(name.temp, colnames(edata)[i])
} # I'm not sure how to do this without the for-loop, but I'm simply just finding the mean/median for peak stats
compostion = as.data.frame(compostion)
write.csv(compostion, "Compound_Compostion_Summary.csv", quote = F)
#### Characteristics summary
# Looping through each sample to obtain some summary stats of the peaks
characteristics = data.frame(AI.mean = rep(NA, length(colnames(edata))), AI.median = NA, AI.sd = NA,
AI_Mod.mean = NA, AI_Mod.median = NA, AI_Mod.sd = NA,
DBE.mean = NA, DBE.median = NA, DBE.sd = NA,
DBE_O.mean = NA, DBE_O.median = NA, DBE_O.sd = NA,
KenMass.mean = NA, KenMass.median = NA, KenMass.sd = NA,
KenDef.mean = NA, KenDef.median = NA, KenDef.sd = NA,
NOSC.mean = NA, NOSC.median = NA, NOSC.sd = NA,
Gibbs.mean = NA, Gibbs.median = NA, Gibbs.sd = NA,
row.names = colnames(edata))
for(i in 1:ncol(edata)){
temp = edata[which(edata[,i] > 0), i, drop = F] # Need to keep names, looking at columns
temp = emeta[row.names(temp),]
# AI
characteristics$AI.mean[i] = mean(temp$AI, na.rm = T)
characteristics$AI.median[i] = median(temp$AI, na.rm = T)
characteristics$AI.sd[i] = sd(temp$AI, na.rm = T)
# AI_Mod
characteristics$AI_Mod.mean[i] = mean(temp$AI_Mod, na.rm = T)
characteristics$AI_Mod.median[i] = median(temp$AI_Mod, na.rm = T)
characteristics$AI_Mod.sd[i] = sd(temp$AI_Mod, na.rm = T)
# DBE
characteristics$DBE.mean[i] = mean(temp$DBE, na.rm = T)
characteristics$DBE.median[i] = median(temp$DBE, na.rm = T)
characteristics$DBE.sd[i] = sd(temp$DBE, na.rm = T)
# DBE-O
characteristics$DBE_O.mean[i] = mean(temp$DBE_O, na.rm = T)
characteristics$DBE_O.median[i] = median(temp$DBE_O, na.rm = T)
characteristics$DBE_O.sd[i] = sd(temp$DBE_O, na.rm = T)
# Kendrick Mass
characteristics$KenMass.mean[i] = mean(temp$kmass, na.rm = T)
characteristics$KenMass.median[i] = median(temp$kmass, na.rm = T)
characteristics$KenMass.sd[i] = sd(temp$kmass, na.rm = T)
# Kendrick Defect
characteristics$KenDef.mean[i] = mean(temp$kdefect, na.rm = T)
characteristics$KenDef.median[i] = median(temp$kdefect, na.rm = T)
characteristics$KenDef.sd[i] = sd(temp$kdefect, na.rm = T)
# NOSC
characteristics$NOSC.mean[i] = mean(temp$NOSC, na.rm = T)
characteristics$NOSC.median[i] = median(temp$NOSC, na.rm = T)
characteristics$NOSC.sd[i] = sd(temp$NOSC, na.rm = T)
# Gibbs Free Energy
characteristics$Gibbs.mean[i] = mean(temp$GFE, na.rm = T)
characteristics$Gibbs.median[i] = median(temp$GFE, na.rm = T)
characteristics$Gibbs.sd[i] = sd(temp$GFE, na.rm = T)
} # I'm not sure how to do this without the for-loop, but I'm simply just finding the mean/median for peak stats
write.csv(characteristics, "MolInfo_Summary.csv", quote = F)
rm(i,j,name.temp,summary,uniq.comp,temp)
}
自定义函数绘制累积图
cal_avg_abundance = function(dat,relative,legend.position){
if(relative == T){
dat = dat/rowSums(dat)
}
df = as.data.frame(t(dat))
df$taxa = rownames(df)
df = df[order(df$taxa,decreasing = F),c('taxa',as.character(rownames(classes)))]
link_dat <- df %>%
mutate_if(is.numeric, cumsum) %>%
as.data.frame()
bar.width <- 0.7
link_dat <- link_dat[, c(1:2,rep(3:(ncol(link_dat)-1),each=2),ncol(link_dat))]
link_dat <- data.frame(y=t(matrix(t(link_dat[,-1]), nrow=2)))
link_dat$x.1 <- 1:(ncol(df)-2) + bar.width/2
link_dat$x.2 <- 1:(ncol(df)-2) + (1-bar.width/2)
plot_df = melt(df)
plot_df$taxa = as.factor(plot_df$taxa)
plot_df$taxa = factor(plot_df$taxa, levels = rev(levels(plot_df$taxa)))
plot_df$variable = factor(plot_df$variable, levels = levels(plot_df$variable))
#
ifelse(relative == T,
p <- ggplot(data = plot_df, aes(x = variable, y = value, fill = taxa)) +
theme_classic()+
labs(x= NULL,y ='Relative Abundance',fill = NULL) +
guides(fill=guide_legend(nrow=5)) +
geom_bar(stat = "identity", width=bar.width, col='black') +
#
geom_segment(data = link_dat, aes(x = x.1, xend = x.2, y = y.1, yend = y.2), inherit.aes = F) +
scale_y_continuous(labels = percent,expand = c(0,0)) +
scale_fill_manual(values = col_vector) +
theme(axis.title = element_text(size = 20,color = 'black'),
axis.text.y = element_text(size = 15,color = 'black'),
axis.text.x = element_text(size = 15,color = 'black',angle = 90, vjust = 0.5),
axis.ticks.length = unit(0.3,'cm'),
legend.text = element_text(size = 20),
# legend.title = element_text(size = 10),
legend.position = legend.position,
panel.grid = element_blank(),
panel.border = element_rect(fill = "transparent", color = "transparent"),
plot.background = element_rect(fill = "transparent", color = "transparent"),
panel.background = element_rect(fill = "transparent", color = "transparent")),
p <- ggplot(data = plot_df, aes(x = variable, y = value, fill = taxa)) +
theme_classic()+
labs(x= NULL,y ='Absolute Abundance',fill = NULL) +
guides(fill=guide_legend(nrow=5)) +
geom_bar(stat = "identity", width=bar.width, col='black') +
geom_segment(data = link_dat, aes(x = x.1, xend = x.2, y = y.1, yend = y.2), inherit.aes = F) +
scale_y_continuous(expand = c(0,0)) +
scale_fill_manual(values = col_vector) +
theme(axis.title = element_text(size = 20,color = 'black'),
axis.text.y = element_text(size = 15,color = 'black'),
axis.text.x = element_text(size = 15,color = 'black',angle = 90, vjust = 0.5),
axis.ticks.length = unit(0.3,'cm'),
legend.text = element_text(size = 20),
# legend.title = element_text(size = 10),
legend.position = legend.position,
panel.grid = element_blank(),
panel.border = element_rect(fill = "transparent", color = "transparent"),
plot.background = element_rect(fill = "transparent", color = "transparent"),
panel.background = element_rect(fill = "transparent", color = "transparent")))
return(p)
}
预测化合物的累积图
ggsave(cal_avg_abundance(classes,relative = TRUE,legend.position = 'bottom'),filename = 'classes_relative.jpg', width = 15,height = 10,dpi = 300)
ggsave(cal_avg_abundance(classes,relative = FALSE,legend.position = 'bottom'),filename = 'classes_absolute.jpg', width = 15,height = 10,dpi = 300)
预测元素组成的累积图
ggsave(cal_avg_abundance(compostion,relative = TRUE,legend.position = 'right'),filename = 'compostion_relative.jpg', width = 15,height = 10,dpi = 300)
ggsave(cal_avg_abundance(compostion,relative =
FALSE,legend.position = 'right'),filename = 'compostion_absolute.jpg', width = 15,height = 10,dpi = 300)