hdwgcna 实战教程

https://github.com/smorabit/hdWGCNA

Module customization • hdWGCNA



if(F){
  https://github.com/smorabit/hdWGCNA
  https://smorabit.github.io/hdWGCNA/articles/basic_tutorial.html
  
  
  
  getwd()
  file="G:/silicosis/hdgcna"
  dir.create(file)
  setwd(file)
  getwd()
  
  #wget https://swaruplab.bio.uci.edu/public_data/Zhou_2020.rds
  #r下载网上的内容
  download.file("https://swaruplab.bio.uci.edu/public_data/Zhou_2020.rds","Zhou_2020.rds")
  list.files()
  
  
  1.# single-cell analysis package
  library(Seurat)
  # plotting and data science packages
  library(tidyverse)
  library(cowplot)
  library(patchwork)
  # co-expression network analysis packages:
  library(WGCNA)
  #devtools::install_github('smorabit/hdWGCNA', ref='dev')
  library(hdWGCNA)
  # using the cowplot theme for ggplot
  theme_set(theme_cowplot())
  
  # set random seed for reproducibility
  set.seed(12345)
  2# load the Zhou et al snRNA-seq dataset
  seurat_obj <- readRDS('./hdgcna/Zhou_2020.rds')
  seurat_obj@misc
  head([email protected])
  
  #check
  p <- DimPlot(seurat_obj, group.by='cell_type', label=TRUE) +
    umap_theme() + ggtitle('Zhou et al Control Cortex') + NoLegend()
  
  p
  
  
  3#Set up Seurat object for WGCNA
  length(VariableFeatures(seurat_obj))
  names(seurat_obj)
  
  #In this example, we will select genes that are expressed in at least 5% of cells in this dataset, and we will name our hdWGCNA experiment “tutorial”.
  names(seurat_obj@misc)
   seurat_obj <- SetupForWGCNA(
    seurat_obj,
    gene_select = "fraction", # the gene selection approach
    fraction = 0.05, # fraction of cells that a gene needs to be expressed in order to be included
    wgcna_name = "tutorial" # the name of the hdWGCNA experiment
  )
  Assays(seurat_obj)
  names(seurat_obj@misc)
  
  seurat_obj@assays
  head([email protected])
  dim(seurat_obj)
  head(seurat_obj)[,1:116]
  seurat_obj@assays$RNA[,1:10]
  dim(seurat_obj@assays$RNA@counts)
  colnames([email protected])
  table(seurat_obj$cell_type)
  table(seurat_obj$Sample)
  head([email protected])
  
  4#metacell
  ## construct metacells  in each group
  seurat_obj <- MetacellsByGroups(
    seurat_obj = seurat_obj,
    group.by = c("cell_type", "Sample"), # specify the columns in [email protected] to group by
    k = 25, # nearest-neighbors parameter
    max_shared = 10, # maximum number of shared cells between two metacells
    ident.group = 'cell_type', # set the Idents of the metacell seurat object
    reduction = "harmony"#指定降维方法 不然会报错
  )
  # normalize metacell expression matrix:
  seurat_obj <- NormalizeMetacells(seurat_obj)
  
 
  4.1可选
  if(F){
    metacell_obj <- GetMetacellObject(seurat_obj)
    seurat_obj <- NormalizeMetacells(seurat_obj)
    seurat_obj<-FindVariableFeatures(seurat_obj)#我自己加的一个步骤本来没有的
    seurat_obj <- ScaleMetacells(seurat_obj, features=VariableFeatures(seurat_obj))
    seurat_obj <- RunPCAMetacells(seurat_obj, features=VariableFeatures(seurat_obj))
    seurat_obj <- RunHarmonyMetacells(seurat_obj, group.by.vars='Sample')
    seurat_obj <- RunUMAPMetacells(seurat_obj, reduction='harmony', dims=1:15)
    p1 <- DimPlotMetacells(seurat_obj, group.by='cell_type') + umap_theme() + ggtitle("Cell Type")
    p2 <- DimPlotMetacells(seurat_obj, group.by='Sample') + umap_theme() + ggtitle("Sample")
    
    p1 | p2
  
    ##########  
    seurat_obj <- SetDatExpr(
      seurat_obj,
      group_name = "INH", # the name of the group of interest in the group.by column
      group.by='cell_type', # the metadata column containing the cell type info. This same column should have also been used in MetacellsByGroups
      assay = 'RNA', # using RNA assay
      slot = 'data' # using normalized data
    )
   head(seurat_obj@misc$tutorial$datExpr) [,1:20]
    dim(seurat_obj@misc$tutorial$wgcna_metacell_obj)
    table(seurat_obj@misc$tutorial$wgcna_metacell_obj$cell_type)
    
  ########3
    # Test different soft powers:
    seurat_obj <- TestSoftPowers(
      seurat_obj,use_metacells = TRUE,
      networkType = 'signed' # you can also use "unsigned" or "signed hybrid"
    )
    # plot the results:
    plot_list <- PlotSoftPowers(seurat_obj)
    
    # assemble with patchwork
    wrap_plots(plot_list, ncol=2)
    power_table <- GetPowerTable(seurat_obj)
    head(power_table)
    
 ??ConstructNetwork   
    ######3
    # construct co-expression network:
    seurat_obj <- ConstructNetwork(
      seurat_obj, soft_power=8,
      setDatExpr=FALSE,use_metacells = TRUE,
      saveConsensusTOMs = FALSE
     # tom_name = 'INH' # name of the topoligical overlap matrix written to disk
    )
    
    PlotDendrogram(seurat_obj, main='INH hdWGCNA Dendrogram') 
   
###################3  
    memory.size() ### Checking your memory size
    memory.limit() ## Checking the set limit
    memory.limit(size=56000) ### 
     # need to run ScaleData first or else harmony throws an error:
    seurat_obj <- ScaleData(seurat_obj, features=VariableFeatures(seurat_obj))
    
    # compute all MEs in the full single-cell dataset
    seurat_obj <- ModuleEigengenes(
      seurat_obj,
      group.by.vars="Sample"
    )
    
    dim(seurat_obj)
    # harmonized module eigengenes:
    hMEs <- GetMEs(seurat_obj)
    
    # module eigengenes:
    MEs <- GetMEs(seurat_obj, harmonized=FALSE)
 
     ###################
    
    # compute eigengene-based connectivity (kME):
    seurat_obj <- ModuleConnectivity(
      seurat_obj,
      group.by = 'cell_type', group_name = 'INH'
    ) 
    
    
    # rename the modules
    seurat_obj <- ResetModuleNames(
      seurat_obj,
      new_name = "INH-M" )  
      
      
      # plot genes ranked by kME for each module
      p <- PlotKMEs(seurat_obj, ncol=5)
      
      p
   
      # get the module assignment table:
      modules <- GetModules(seurat_obj)
      
      # show the first 6 columns:
      head(modules[,1:6])  

      getwd()  
      save(seurat_obj,file = "G:/silicosis/hdgcna/hdWGCNA_object.rds")
  } 

  
  
  
  
  
 names(seurat_obj@misc)
   

  
  
  
  
  
  rm(ls())
  gc()
  
}

加载各种包
if(T){
  # single-cell analysis package
  library(Seurat)
  
  # plotting and data science packages
  library(tidyverse)
  library(cowplot)
  library(patchwork)
  
  # co-expression network analysis packages:
  library(WGCNA)
  library(hdWGCNA)
  
  # using the cowplot theme for ggplot
  theme_set(theme_cowplot())
  
  # set random seed for reproducibility
  set.seed(12345)

  
}


load("D:/ARDS_scripts_1012/ARDS/Step2_harmony_f200_R3/0805/cluster_merge/sepsis_cluster_merge.rds")##	改路径
table(All.merge$stim)
table(Idents(All.merge),All.merge$stim)



1创建对象,选择基因
length(VariableFeatures(All.merge))

seurat_obj <- SetupForWGCNA(
  All.merge,
  gene_select = "fraction", # the gene selection approach
  fraction = 0.05, #  use genes that are expressed in a certain fraction of cells for in the whole dataset or in each group of cells, specified by group.by fraction of cells that a gene needs to be expressed in order to be included
  #variable=
  wgcna_name = "tutorial" # the name of the hdWGCNA experiment
)

length(seurat_obj@misc$tutorial$wgcna_genes) #[1] 5735

table(seurat_obj$cell.type)

table(seurat_obj$stim)

seurat_obj$Sample=seurat_obj$stim
seurat_obj$cell_type=seurat_obj$cell.type

2# construct metacells  in each group
seurat_obj <- MetacellsByGroups(
  seurat_obj = seurat_obj,
  group.by = c("cell_type", "Sample"), # specify the columns in [email protected] to group by
  k = 20, # nearest-neighbors parameter
  reduction = "harmony", # 降维方法
  slot='counts',
  max_shared = 10, # maximum number of shared cells between two metacells
  ident.group = 'cell_type' # set the Idents of the metacell seurat object
)


3#标准化:NormalizeMetacells
#提取metacell:GetMetacellObject
# normalize metacell expression matrix:
seurat_obj <- NormalizeMetacells(seurat_obj)
#  get the metacell object from the hdWGCNA experiment
metacell_obj <- GetMetacellObject(seurat_obj)
metacell_obj
seurat_obj
table(seurat_obj$cell_type,seurat_obj$Sample)
table(metacell_obj$cell_type,metacell_obj$Sample)


3.1#可选,是否处理metacell 进行可视化
if(F){
  seurat_obj <- NormalizeMetacells(seurat_obj)
  seurat_obj <- ScaleMetacells(seurat_obj, features=VariableFeatures(seurat_obj))
  seurat_obj <- RunPCAMetacells(seurat_obj, features=VariableFeatures(seurat_obj))
  seurat_obj <- RunHarmonyMetacells(seurat_obj, group.by.vars='Sample')
  seurat_obj <- RunUMAPMetacells(seurat_obj, reduction='harmony', dims=1:15)
  
  
  p1 <- DimPlotMetacells(seurat_obj, group.by='cell_type') + umap_theme() + ggtitle("Cell Type")
  p2 <- DimPlotMetacells(seurat_obj, group.by='Sample') + umap_theme() + ggtitle("Sample")
  
  p1 | p2
  
}



4#共表达网络分析
table(seurat_obj$cell_type)
table(metacell_obj$cell_type)
4.1#提取感兴趣的细胞亚群 如果是多个亚群 group_name = c("INH", "EX")

seurat_obj <- SetDatExpr(
  seurat_obj,
  group_name = c("Monocyte","Macrophage","Dendritic cell","Neutrophil",
                 "T cell","Fibroblast","Endothelial cell",
                 "Epithelial cell","B cell",
                 "Mesenchymal progenitor cell"), # the name of the group of interest in the group.by column
  group.by='cell_type', # the metadata column containing the cell type info. This same column should have also been used in MetacellsByGroups
  assay = 'RNA', # using RNA assay
  use_metacells = TRUE, # use the metacells (TRUE) or the full expression matrix (FALSE)
  slot = 'data' # using normalized data
)



4.2#选取软阈值 软阈值可以自动函数选择,也可以人工指定selected_power = NULL进行绘图

# Test different soft powers:
seurat_obj <- TestSoftPowers(
  seurat_obj,
  setDatExpr=FALSE,
  powers = c(seq(1, 10, by = 1), seq(12, 30, by = 2))) # 选取soft powers(默认)
# networkType = 'signed' # 网络类型 "unsigned" or "signed hybrid"

# plot the results:
plot_list <- PlotSoftPowers(seurat_obj,
                            point_size = 5,
                            text_size = 3)
# assemble with patchwork
wrap_plots(plot_list, ncol=2) 
# plot the results:
plot_list <- PlotSoftPowers(seurat_obj)
# assemble with patchwork
wrap_plots(plot_list, ncol=2)

power_table <- GetPowerTable(seurat_obj)
head(power_table)




4.3#拓扑重叠矩阵(TOM) Construct co-expression network

seurat_obj <- ConstructNetwork(
  seurat_obj, 
  soft_power=7, # 因为上面一张图看上去7比较好
  setDatExpr=FALSE,
  corType = "pearson",
  networkType = "signed",
  TOMType = "signed",
  detectCutHeight = 0.995,
  minModuleSize = 50,
  mergeCutHeight = 0.2,
  tom_outdir = "TOM", # 输出文件夹
  tom_name = 'many_celltypes' # name of the topoligical overlap matrix written to disk
)
PlotDendrogram(seurat_obj, main='INH hdWGCNA Dendrogram')

4.3.1 可选
#TOM <- GetTOM(seurat_obj)

4.4#计算ME值
DefaultAssay(seurat_obj)
length(VariableFeatures(seurat_obj))

# need to run ScaleData first or else harmony throws an error:
seurat_obj <- ScaleData(seurat_obj, features=VariableFeatures(seurat_obj))

# compute all MEs in the full single-cell dataset
seurat_obj <- ModuleEigengenes(
  seurat_obj,
  scale.model.use = "linear", #  choices are "linear", "poisson", or "negbinom"
  assay = NULL, # 默认:DefaultAssay(seurat_obj)
  pc_dim = 1,
  group.by.vars="Sample" # 根据样本去批次化 harmonize
)

#得到模型基因
##########################################################################################
# harmonized module eigengenes:
hMEs <- GetMEs(seurat_obj)
# module eigengenes:
MEs <- GetMEs(seurat_obj, harmonized=FALSE)

colnames(seurat_obj@misc[["tutorial"]][["hMEs"]])
seurat_obj@misc[["tutorial"]][["MEs"]][1:6,1:6]
seurat_obj@misc[["tutorial"]][["hMEs"]][1:6,1:6]


??write.xlsx
xlsx::write.xlsx(MEs,row.names = TRUE,sheetName = "MEs",
                     file = "./hdgcna/data/model_or_hub_genes.xlsx")
xlsx::write.xlsx(hMEs,row.names = TRUE,sheetName = "hMEs",append = T,
                 file = "./hdgcna/data/model_or_hub_genes.xlsx")


4.5#模块与性状间的相关性
#其实本质上还是设置成二分类变量或者数字,这样得到的结果就具有生物学意义
library(ggplot2)
library(Seurat)

if(T){
  # convert sex to factor
  seurat_obj$msex <- as.factor(seurat_obj$stim)
  
  # convert age_death to numeric
  seurat_obj$age_death <- as.numeric(seurat_obj$seurat_clusters)
  # list of traits to correlate
  head([email protected])
  cur_traits <- c( 'msex', 'age_death',  
                   'nCount_RNA', 'nFeature_RNA', 'percent.mt')
  
  str([email protected][,cur_traits])
  # 使用去批次化后的hME
  seurat_obj <- ModuleTraitCorrelation(
    seurat_obj,
    traits = cur_traits,
    features = "hMEs", # Valid choices are hMEs, MEs, or scores
    cor_method = "pearson", # Valid choices are pearson, spearman, kendall.
    group.by='cell_type'
  )
  # get the mt-correlation results
  mt_cor <- GetModuleTraitCorrelation(seurat_obj)
  
  names(mt_cor)
  # "cor"  "pval" "fdr"
  p=PlotModuleTraitCorrelation(
    seurat_obj,
    label = 'fdr', # add p-val label in each cell of the heatmap
    label_symbol = 'stars', # labels as 'stars' or as 'numeric'
    text_size = 2,
    text_digits = 2,
    text_color = 'white',
    high_color = '#fc9272',
    mid_color = '#ffffbf',
    low_color = '#9ecae1',
    plot_max = 0.2,
    combine=T # 合并结果
  )
  getwd()
 ggsave(plot=p,filename = "G:/silicosis/hdgcna/modle_traits_cor.png",
        height = 20,width = 20) 
  
  PlotModuleTraitCorrelation(
    seurat_obj,
    label = 'fdr', # add p-val label in each cell of the heatmap
    label_symbol = 'stars', # labels as 'stars' or as 'numeric'
    text_size = 2,
    text_digits = 2,
    text_color = 'white',
    high_color = '#fc9272',
    mid_color = '#ffffbf',
    low_color = '#9ecae1',
    plot_max = 0.2,
    combine=F # 合并结果
  )
  
  
  
}


4.6 #hubgenes和功能评分 Connectivity

# compute eigengene-based connectivity (kME):
seurat_obj <- ModuleConnectivity(
  seurat_obj,
  group.by = 'cell_type', 
  corFnc = "bicor", # to obtain Pearson correlation
  corOptions = "use='p'", # to obtain Pearson correlation
  harmonized = TRUE,
  assay = NULL,
  slot = "data", # default to normalized 'data' slot
  group_name = c("Monocyte","Macrophage","Dendritic cell","Neutrophil",
                 "T cell","Fibroblast","Endothelial cell",
                 "Epithelial cell","B cell",
                 "Mesenchymal progenitor cell") # 感兴趣的细胞群
)
# rename the modules
# 改名后后模块赋予新的名称,以new_name为基础,后续附加个数字
seurat_obj <- ResetModuleNames( #https://smorabit.github.io/hdWGCNA/articles/customization.html
  seurat_obj,
  new_name = "NEWname_"  # the base name for the new modules
)

# print out the new module names
modules <- GetModules(seurat_obj)

print(levels(modules$module))
# show the first 6 columns:
head(modules[,1:6]) #hub基因
head(modules)
options(java.parameters = "-Xmx2048m")
xlsx::write.xlsx(modules,row.names = TRUE,sheetName = "modules",append = T,
                 file = "./hdgcna/data/model_or_hub_genes.xlsx")
#openxlsx::addWorksheet(wb="./hdgcna/data/model_or_hub_genes.xlsx",sheetName = "modules")
#rm(ls())
#gc()

# plot genes ranked by kME for each module
p <- PlotKMEs(seurat_obj, 
              ncol=5,
              n_hubs = 10, # number of hub genes to display
              text_size = 2,
              plot_widths = c(3, 2) # the relative width between the kME rank plot and the hub gene text
)
p

# get hub genes  #hub基因
hub_df <- GetHubGenes(seurat_obj, n_hubs = 100)

head(hub_df)

getwd()
dir.create("./data")
openxlsx::write.xlsx(hub_df,file = "G:/silicosis/hdgcna/data/hub_df.xlsx")
saveRDS(seurat_obj, file='data/hdWGCNA_object.rds')
seurat_obj=readRDS("G:/silicosis/hdgcna/data/hdWGCNA_object.rds")

head([email protected])
table(seurat_obj$msex)


4.6.2 #基因功能评分
library(dplyr)
# compute gene scoring for the top 25 hub genes by kME for each module
# with Seurat method
seurat_obj <- ModuleExprScore(
  seurat_obj,
  n_genes = 25,
  method='Seurat'
)

# compute gene scoring for the top 25 hub genes by kME for each module
# with UCell method
#library(UCell)
seurat_obj <- ModuleExprScore(
  seurat_obj,
  n_genes = 25,
  method='UCell'
)



4.7#可视化
# make a featureplot of hMEs for each module
#其实features可以展示四种分数(hMEs, MEs, scores, or average)
plot_list <-ModuleFeaturePlot(
  seurat_obj,
  reduction = "umap",
  features = "hMEs",
  #features='scores', # plot the hub gene scores
  order_points = TRUE, # order so the points with highest hMEs are on top
  restrict_range = TRUE,
  point_size = 0.5,
  alpha = 1,
  label_legend = FALSE,
  raster_dpi = 500,
  raster_scale = 1,
  plot_ratio = 1,
  title = TRUE
)
# stitch together with patchwork
wrap_plots(plot_list, ncol=6)

# make a featureplot of hub scores for each module
plot_list <- ModuleFeaturePlot(
  seurat_obj,
  features='scores', # plot the hub gene scores
  order='shuffle', # order so cells are shuffled
  ucell = TRUE # depending on Seurat vs UCell for gene scoring
)

# stitch together with patchwork
wrap_plots(plot_list, ncol=6)


library(hdWGCNA)
4.71 模块间相关性
# plot module correlagram
ModuleCorrelogram(seurat_obj,
                  exclude_grey = TRUE, # 默认删除灰色模块
                  features = "hMEs" # What to plot? Can select hMEs, MEs, scores, or average
)





#气泡图
# get hMEs from seurat object
MEs <- GetMEs(seurat_obj, harmonized=TRUE)
mods <- colnames(MEs); mods <- mods[mods != 'grey']

# add hMEs to Seurat meta-data:
[email protected] <- cbind([email protected], MEs)
# plot with Seurat's DotPlot function
p <- DotPlot(seurat_obj, features=mods, group.by = 'cell_type')

# flip the x/y axes, rotate the axis labels, and change color scheme:
p <- p +
  coord_flip() +
  RotatedAxis() +
  scale_color_gradient2(high='red', mid='grey95', low='blue')

# plot output
p


#气泡图
if(T){# Plot INH-M4 hME using Seurat VlnPlot function
  p <- VlnPlot(
    seurat_obj,
    features = 'NEWname_2',
    group.by = 'cell_type',
    pt.size = 0 # don't show actual data points
  )
  
  # add box-and-whisker plots on top:
  p <- p + geom_boxplot(width=.25, fill='white')
  
  # change axis labels and remove legend:
  p <- p + xlab('') + ylab('hME') + NoLegend()
  
  # plot output
  p
  }

#批量出图
plot_list <- lapply(mods, function(x) {
  print(x)
  p <- VlnPlot(
    seurat_obj,
    features = x,
    group.by = 'cell_type',
    pt.size = 0 # don't show actual data points
  )
  
  # add box-and-whisker plots on top:
  p <- p + geom_boxplot(width=.25, fill='white')
  
  # change axis labels and remove legend:
  p <- p + xlab('') + ylab('hME') + NoLegend()
  
  p
})
wrap_plots(plot_list, ncol = 5)



单个模块网络图
library(igraph)
# Visualizes the top hub genes for selected modules as a circular network plot
ModuleNetworkPlot(
  seurat_obj,
  mods = "all", # all modules are plotted.
  outdir = "ModuleNetworks", # The directory where the plots will be stored.
  plot_size = c(6, 6),
  label_center = FALSE,
  edge.alpha = 0.25,
  vertex.label.cex = 1, # 基因标签的字体大小
  vertex.size = 6 # 节点的大小
)



组合网络图

if(T){
  # hubgene network
  HubGeneNetworkPlot(
    seurat_obj,
    mods = "all", # all modules are plotted.
    n_hubs = 3, 
    n_other=6,
    edge_prop = 0.75,
  )
  
  g <- HubGeneNetworkPlot(seurat_obj,  return_graph=TRUE)
  
  
  #umap网络图
  这里使用另一种方法umap来可视化共表达网络中的所有基因,主要在topological overlap matrix (TOM)矩阵计算出umap坐标
  seurat_obj <- RunModuleUMAP(
    seurat_obj,
    n_hubs = 10, # number of hub genes to include for the UMAP embedding
    n_neighbors=15, # neighbors parameter for UMAP
    min_dist=0.1 # min distance between points in UMAP space
  )
  
  # get the hub gene UMAP table from the seurat object
  umap_df <- GetModuleUMAP(seurat_obj)
  
  # plot with ggplot
  ggplot(umap_df, aes(x=UMAP1, y=UMAP2)) +
    geom_point(
      color=umap_df$color, # color each point by WGCNA module
      size=umap_df$kME*2 # size of each point based on intramodular connectivity
    ) +
    umap_theme()
  
  ModuleUMAPPlot(
    seurat_obj,
    edge.alpha=0.25,
    sample_edges=TRUE,
    edge_prop=0.1, # proportion of edges to sample (20% here)
    label_hubs=2 ,# how many hub genes to plot per module?
    keep_grey_edges=FALSE
    
  )
  dev.off()
  
}

你可能感兴趣的:(HDWGCNA)