单细胞实战1:两样本合并

从读取表达矩阵到建立seurat对象都在服务器上进行(文件太大)
建立Seurat对象后,save.image,之后转移到电脑上操作。
读取表达矩阵

> rm(list=ls())
> library(readr)
> control.matrix <- read_tsv("control_matrix.tsv")
── Column specification ───────────────────────────────────────────
cols(
  .default = col_double(),
  X1 = col_character()
)
ℹ Use `spec()` for the full column specifications.

|====================================================| 100% 1068 MB
Warning message:
Missing column names filled in: 'X1' [1] 

> F.matrix <- read_tsv("F_matrix.tsv")

── Column specification ───────────────────────────────────────────
cols(
  .default = col_double(),
  X1 = col_character()
)
ℹ Use `spec()` for the full column specifications.

|=====================================================| 100% 814 MB
Warning message:
Missing column names filled in: 'X1' [1] 
> class(control.matrix)
[1] "spec_tbl_df" "tbl_df"      "tbl"         "data.frame" 
> View(control.matrix)
control.matrix

将data.frame转化为matrix,遇到报错

> control.matrix <- as.matrix(control.matrix)
Error: vector memory exhausted (limit reached?)

原因:电脑内存不够
mac解决:打开终端
步骤1:打开终端,

步骤2:
cd〜
touch .Renviron
打开.Renviron

步骤3:将以下内容保存为 .Renviron 的第一行:

< pre> R_MAX_VSIZE = 100Gb

注意:此限制包括物理限制和虚拟内存;因此,在具有16Gb物理内存的计算机上设置_MAX_VSIZE = 16Gb可能不会防止此错误。您可能需要使用此参数,具体取决于计算机的规格
(参考:https://www.it1352.com/1587330.html)

> F.matrix <- as.matrix(F.matrix)
> class(F.matrix)
[1] "matrix" "array" 

构建行名为基因名,列名为细胞名的表达矩阵

#处理矩阵
> F.matrix[1:6,1:6]
      X1      AAACATCGAAACATCGCGACTGGA AAACATCGAAACATCGGCTAACGA
[1,] "Gnai3" "  1"                    "  1"                   
[2,] "Cdc45" "  0"                    "  0"                   
[3,] "H19"   "  0"                    "  0"                   
[4,] "Apoh"  "  0"                    "  0"                   
[5,] "Narf"  "  0"                    "  0"                   
[6,] "Cav2"  "  0"                    "  0"                   
     AAACATCGAAACATCGTAGGATGA AAACATCGAACAACCAGCCACATA AAACATCGAACCGAGAACTATGCA
[1,] "  1"                    "  0"                    "  1"                   
[2,] "  0"                    "  0"                    "  0"                   
[3,] "  0"                    "  0"                    "  0"                   
[4,] "  0"                    "  0"                    "  0"                   
[5,] "  0"                    "  0"                    "  0"                   
[6,] "  0"                    "  0"                    "  0"    
> rownames(F.matrix)=F.matrix[,1]
> F.matrix[1:6,1:6]
> F.matrix <- F.matrix[,2:ncol(F.matrix)]
> F.matrix[1:6,1:6]
AAACATCGAAACATCGCGACTGGA AAACATCGAAACATCGGCTAACGA AAACATCGAAACATCGTAGGATGA
Gnai3 "  1"                    "  1"                    "  1"                   
Cdc45 "  0"                    "  0"                    "  0"                   
H19   "  0"                    "  0"                    "  0"                   
Apoh  "  0"                    "  0"                    "  0"                   
Narf  "  0"                    "  0"                    "  0"                   
Cav2  "  0"                    "  0"                    "  0"                   
      AAACATCGAACAACCAGCCACATA AAACATCGAACCGAGAACTATGCA AAACATCGAACCGAGAGTCTGTCA
Gnai3 "  0"                    "  1"                    "  0"                   
Cdc45 "  0"                    "  0"                    "  0"                   
H19   "  0"                    "  0"                    "  0"                   
Apoh  "  0"                    "  0"                    "  0"                   
Narf  "  0"                    "  0"                    "  0"                   
Cav2  "  0"                    "  0"                    "  0"    
#处理control矩阵
> rownames(control.matrix)=control.matrix[,1]
> control.matrix <- control.matrix[,2:ncol(control.matrix)]

保留行名(基因名)和列名(样本名)信息

> dimnames=list(rownames(F.matrix),colnames(F.matrix))

矩阵中的数字为字符串,将其转换为数值。

>F.matrix=matrix(as.numeric(F.matrix),nrow = nrow(F.matrix),dimnames = dimnames)

在列名的barcode前 添加样本信息

colnames(con.matrix)=paste0('con_', colnames(con.matrix))
colnames(F.matrix)=paste0('F_', colnames(F.matrix))

保存矩阵

write.table(control.matrix,file="control.matrix.txt",sep="\t",row.names=T,quote=F)
write.table(F.matrix,file="F.matrix.txt",sep="\t",row.names=T,quote=F)

重新导入

control.matrix=read.table(file="control.matrix.txt",sep="\t")
F.matrix=read.table(file="F.matrix.txt",sep="\t")

方法1: 先将两个矩阵各自构建Seurat对象,再进行merge

con <- CreateSeuratObject(counts = control.matrix)
F <- CreateSeuratObject(counts = F.matrix)
all.matrix1 <- merge(con,F, add.cell.ids=c("con","F")) 
#add.cell.ids是为了防止数据集之间的barcodes重叠,给细胞添加ID
#这个方法得到的seurat对象的orig.ident是character,不是直接两个level。但active.ident是正常的两个level。WHY???(目前看没什么影响)
table(Idents(scRNA1)) #查看每个样多少个细胞

  con    F
21963 17682 

方法2:先将两个矩阵合并再构建一个Seurat对象
按行合并矩阵,取并集

stringsAsFactors=F
all.matrix=merge(control.matrix, F.matrix, by = "row.names", all = TRUE)
#Replace NA's with zeros
all.matrix[is.na(all.matrix)] <- 0
#两个矩阵合并的到了data.frame? 为啥?
#将数据框转化为矩阵
all.matrix=as.matrix(all.matrix)
all.matrix[1:6,1:6]
    Row.names       con_AAACATCGAAACATCGAATGTTGC con_AAACATCGAAACATCGAGCAGGAA
[1,] "0610005C13Rik" "  0"                        "  0"                       
[2,] "0610006L08Rik" "  0"                        "  0"                       
[3,] "0610009B22Rik" "  0"                        "  0"                       
[4,] "0610009E02Rik" "  0"                        "  0"                       
[5,] "0610009L18Rik" "  0"                        "  0"                       
[6,] "0610009O20Rik" "  0"                        "  0"                       
    con_AAACATCGAAACATCGCAGCGTTA con_AAACATCGAACAACCAAAGAGATC
[1,] "  0"                        "  0"                       
[2,] "  0"                        "  0"                       
[3,] "  1"                        "  0"                       
[4,] "  0"                        "  0"                       
[5,] "  0"                        "  0"                       
[6,] "  0"                        "  0"                       
    con_AAACATCGAACCGAGAACAGCAGA
[1,] "  0"                       
[2,] "  0"                       
[3,] "  0"                       
[4,] "  0"                       
[5,] "  0"              
#!!!为什么会这样
#按上面处理单个矩阵的方法,处理all.matrix

#保存矩阵
write.table(all.matrix,file="all.matrix.txt",sep="\t",row.names=T,quote=F)

建立Seurat对象

scRNA1 <- CreateSeuratObject(counts = all.matrix)
save.image(file = "scRNA1.RData") #保存

使用filezilla传到电脑,使用Rstudio操作

你可能感兴趣的:(单细胞实战1:两样本合并)