Seurat -- SCTransform

  • 官方教程的流程
    整个流程缺少QC部分以及检查细胞周期和线粒体是否对聚类有影响时候regressout。
# install glmGamPoi 加速sctransform的运算
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager")
BiocManager::install("glmGamPoi")
# install sctransform from CRAN
install.packages("sctransform")


# 加载其他包
library(Seurat)
library(SeuratData)
library(patchwork)
library(dplyr)
library(ggplot2)


# 获取数据集
# The dataset is available through our SeuratData package
# install dataset
InstallData("ifnb")
# load dataset
LoadData("ifnb")

# split the dataset into a list of two seurat objects (stim and CTRL)
ifnb.list <- SplitObject(ifnb, split.by = "stim")

ctrl <- ifnb.list[["CTRL"]]  # control 组单细胞数据
stim <- ifnb.list[["STIM"]] # 干扰素刺激组单细胞数据

# normalize and run dimensionality reduction on control dataset
ctrl <- SCTransform(ctrl, vst.flavor = "v2", verbose = FALSE) %>%
    RunPCA(npcs = 30, verbose = FALSE) %>%
    RunUMAP(reduction = "pca", dims = 1:30, verbose = FALSE) %>%
    FindNeighbors(reduction = "pca", dims = 1:30, verbose = FALSE) %>%
    FindClusters(resolution = 0.7, verbose = FALSE)

stim <- SCTransform(stim, vst.flavor = "v2", verbose = FALSE) %>%
    RunPCA(npcs = 30, verbose = FALSE)

# To perform integration using the pearson residuals calculated above
# we use the PrepSCTIntegration() function after selecting a list of informative features using SelectIntegrationFeatures()
ifnb.list <- list(ctrl = ctrl, stim = stim)
features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000)
ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = features)

# To integrate the two datasets, we use the FindIntegrationAnchors() function
#  and use these anchors to integrate the two datasets together with IntegrateData()
immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, normalization.method = "SCT",
    anchor.features = features)
immune.combined.sct <- IntegrateData(anchorset = immune.anchors, normalization.method = "SCT")

#  Perform an integrated analysis
immune.combined.sct <- RunPCA(immune.combined.sct, verbose = FALSE)
immune.combined.sct <- RunUMAP(immune.combined.sct, reduction = "pca", dims = 1:30, verbose = FALSE)
immune.combined.sct <- FindNeighbors(immune.combined.sct, reduction = "pca", dims = 1:30)
immune.combined.sct <- FindClusters(immune.combined.sct, resolution = 0.3)

# Identify differential expressed genes across conditions
# To run differential expression, we make use of ‘corrected counts’ that are stored in the data slot of the the SCT assay.
# we first run PrepSCTFindMarkers, which ensures that the fixed value is set properly
immune.combined.sct <- PrepSCTFindMarkers(immune.combined.sct)

# Then we use FindMarkers(assay="SCT") to find differentially expressed genes
b.interferon.response <- FindMarkers(immune.combined.sct, assay = "SCT", ident.1 = "B_STIM", ident.2 = "B_CTRL",
    verbose = FALSE)
head(b.interferon.response, n = 15)

# If running on a subset of the original object after running PrepSCTFindMarkers(), 
# FindMarkers() should be invoked with recorrect_umi = FALSE to use the existing corrected counts
immune.combined.sct.subset <- subset(immune.combined.sct, idents = c("B_STIM", "B_CTRL"))
b.interferon.response.subset <- FindMarkers(immune.combined.sct.subset, assay = "SCT", ident.1 = "B_STIM",
    ident.2 = "B_CTRL", verbose = FALSE, recorrect_umi = FALSE)


# We can also use the corrected counts for visualization
Idents(immune.combined.sct) <- "seurat_annotations"
DefaultAssay(immune.combined.sct) <- "SCT"
FeaturePlot(immune.combined.sct, features = c("CD3D", "GNLY", "IFI6"), split.by = "stim", max.cutoff = 3,
    cols = c("grey", "red"))
  • 第一个变化是运行FindMarkers的流程
# 获取数据集
# The dataset is available through our SeuratData package
# install dataset
InstallData("ifnb")
# load dataset
LoadData("ifnb")

# split the dataset into a list of two seurat objects (stim and CTRL)
ifnb.list <- SplitObject(ifnb, split.by = "stim")

ctrl <- ifnb.list[["CTRL"]]  # control 组单细胞数据
stim <- ifnb.list[["STIM"]] # 干扰素刺激组单细胞数据

# normalize and run dimensionality reduction on control dataset
ctrl <- SCTransform(ctrl, vst.flavor = "v2", verbose = FALSE) %>%
    RunPCA(npcs = 30, verbose = FALSE) %>%
    RunUMAP(reduction = "pca", dims = 1:30, verbose = FALSE) %>%
    FindNeighbors(reduction = "pca", dims = 1:30, verbose = FALSE) %>%
    FindClusters(resolution = 0.7, verbose = FALSE)

#聚类结束一般就是找marker gene然后注释细胞类群了嘛
# we first run PrepSCTFindMarkers, which ensures that the fixed value is set properly
ctrl.sct <- PrepSCTFindMarkers(ctrl)
# 然后传入assay参数,以及要比较的细胞类群参数
response <- FindMarkers(ctrl.sct, assay = "SCT", ident.1 = 0, ident.2 = 1,verbose = FALSE)
head(response, n = 15)

# 还有就是运行过一次PrepSCTFindMarkers(),后面取子集再运行 FindMarkers() 时需要加上recorrect_umi = FALSE
# If running on a subset of the original object after running PrepSCTFindMarkers(), 
# FindMarkers() should be invoked with recorrect_umi = FALSE to use the existing corrected count

subset <- subset(immune.combined.sct, idents = c("B_STIM", "B_CTRL"))
response.subset <- FindMarkers(subset, assay = "SCT", ident.1 = "B_STIM",
    ident.2 = "B_CTRL", verbose = FALSE, recorrect_umi = FALSE)


# We can also use the corrected counts for visualization
# 可视化的时候指定数据集就可以利用SCTransform矫正后的数据了
Idents(immune.combined.sct) <- "seurat_annotations"
DefaultAssay(immune.combined.sct) <- "SCT"
FeaturePlot(immune.combined.sct, features = c("CD3D", "GNLY", "IFI6"), split.by = "stim", max.cutoff = 3,
    cols = c("grey", "red"))
  • 第二个变化是数据集整合的流程
# 获取数据集
# The dataset is available through our SeuratData package
# install dataset
InstallData("ifnb")
# load dataset
LoadData("ifnb")

# split the dataset into a list of two seurat objects (stim and CTRL)
ifnb.list <- SplitObject(ifnb, split.by = "stim")

ctrl <- ifnb.list[["CTRL"]]  # control 组单细胞数据
stim <- ifnb.list[["STIM"]] # 干扰素刺激组单细胞数据

# normalize and run dimensionality reduction on control dataset
ctrl <- SCTransform(ctrl, vst.flavor = "v2", verbose = FALSE) %>%
    RunPCA(npcs = 30, verbose = FALSE) 

stim <- SCTransform(stim, vst.flavor = "v2", verbose = FALSE) %>%
    RunPCA(npcs = 30, verbose = FALSE)

# 整合第一步:确定整合的用到的Features
# To perform integration using the pearson residuals calculated above
# we use the PrepSCTIntegration() function after selecting a list of informative features using SelectIntegrationFeatures()

ifnb.list <- list(ctrl = ctrl, stim = stim)
features <- SelectIntegrationFeatures(object.list = ifnb.list, nfeatures = 3000)
ifnb.list <- PrepSCTIntegration(object.list = ifnb.list, anchor.features = features)

# 整合第二步:找到anchors 然后transform
# To integrate the two datasets, we use the FindIntegrationAnchors() function
#  and use these anchors to integrate the two datasets together with IntegrateData()

immune.anchors <- FindIntegrationAnchors(object.list = ifnb.list, normalization.method = "SCT",
    anchor.features = features)
immune.combined.sct <- IntegrateData(anchorset = immune.anchors, normalization.method = "SCT")

# 整合完成,可以进行后续分析了
#  Perform an integrated analysis
immune.combined.sct <- RunPCA(immune.combined.sct, verbose = FALSE)
immune.combined.sct <- RunUMAP(immune.combined.sct, reduction = "pca", dims = 1:30, verbose = FALSE)
immune.combined.sct <- FindNeighbors(immune.combined.sct, reduction = "pca", dims = 1:30)
immune.combined.sct <- FindClusters(immune.combined.sct, resolution = 0.3)

# DE 分析
# Identify differential expressed genes across conditions
# To run differential expression, we make use of ‘corrected counts’ that are stored in the data slot of the the SCT assay.
# we first run PrepSCTFindMarkers, which ensures that the fixed value is set properly

immune.combined.sct <- PrepSCTFindMarkers(immune.combined.sct)

# Then we use FindMarkers(assay="SCT") to find differentially expressed genes
b.interferon.response <- FindMarkers(immune.combined.sct, assay = "SCT", ident.1 = "B_STIM", ident.2 = "B_CTRL",
    verbose = FALSE)
head(b.interferon.response, n = 15)

# If running on a subset of the original object after running PrepSCTFindMarkers(), 
# FindMarkers() should be invoked with recorrect_umi = FALSE to use the existing corrected counts
immune.combined.sct.subset <- subset(immune.combined.sct, idents = c("B_STIM", "B_CTRL"))
b.interferon.response.subset <- FindMarkers(immune.combined.sct.subset, assay = "SCT", ident.1 = "B_STIM",
    ident.2 = "B_CTRL", verbose = FALSE, recorrect_umi = FALSE)

你可能感兴趣的:(scRNAseq,R,bioinfo,r语言,scRNA-seq)