基因表达趋势分析-1:TCseq

TCseq包可以应用于转录组,单细胞转录组,ATAC-seq,Chip-seq等的表观基因组时序型数据分析。该包主要用于不同时间点的差异分析,时间趋势分析及可视化作图。

官网:http://bioconductor.org/packages/release/bioc/html/TCseq.html

## 下载
if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")
BiocManager::install("TCseq")

## 查看网页版说明
browseVignettes("TCseq")

1. 数据准备,构建TCA对象

data("genomicIntervals") #基因的位置信息文件(染色体,起始及终止位点,基因id四列)
head(genomicIntervals)
#    chr    start      end    id
# 1 chr1  6453992  6454731 peak1
# 2 chr1  7823890  7824372 peak2
# 3 chr1  8029820  8030138 peak3
# 4 chr1  8030317  8030627 peak4
# 5 chr1 10880000 10880701 peak5
# 6 chr1 13154468 13154786 peak6

data("experiment_BAMfile") 
head(experiment_BAMfile)
#   sampleid timepoint group       bamfile
# 1       s1        0h     1   rep1_0h.bam
# 2       s2       24h     2  rep1_24h.bam
# 3       s3       40h     3  rep1_40h.bam
# 4       s4       56h     4  rep1_56h.bam
# 5       s5       72h     5  rep1_72h.bam
# 6       s6      120h     6 rep1_120h.bam

tca <- TCA(design = experiment_BAMfile, genomicFeature = genomicIntervals)
tca
data("experiment") #分组文件(包含样品,时间点,分组信息)
head(experiment)
#   sampleid timepoint group
# 1       s1        0h     1
# 2       s2       24h     2
# 3       s3       40h     3
# 4       s4       56h     4
# 5       s5       72h     5
# 6       s6      120h     6
data("countsTable")#基因表达矩阵文件(可以直接拿原始数据)
head(countsTable)
#        s1  s2  s3  s4  s5  s6  s7  s8  s9 s10 s11 s12
# peak1 344 243 169  70  57  20 298 199 135  63  54  34
# peak2  72 114  91  93 133 164  55  71  93 116 150 191
# peak3  28  50 109 115  94 109  60  89  75 129  85 101
# peak4  28  49 110 113 103 108  59  89  77 131  83 104
# peak5 464 344 280 154 108  33 444 255 259 155  97  32
# peak6 175 129  77  29  15   4 134  91  44  33  11   5
tca <- TCA(design = experiment, genomicFeature = genomicIntervals,counts = countsTable)
tca
counts(tca) <- countsTable
suppressWarnings(library(SummarizedExperiment))
se <- SummarizedExperiment(assays=list(counts = countsTable), colData = experiment)
tca <- TCAFromSummarizedExperiment(se = se, genomicFeature = genomicIntervals)
2. 差异分析

该包内置了我们常用的差异分析edgeR包,通过使用广义线性模型(GLM)方法去鉴定差异基因。

tca <- DBanalysis(tca)
tca <- DBanalysis(tca, filter.type = "raw", filter.value = 10, samplePassfilter = 2)
DBres <- DBresult(tca, group1 = "0h", group2 = c("24h","40h","72h"))
str(DBres, strict.width =  "cut")
head(DBres$`24hvs0h`)
DBres.sig <- DBresult(tca, group1 = "0h", group2 = c("24h","40h","72h"), top.sig = TRUE)
str(DBres.sig, strict.width =  "cut")

3. 时间趋势分析

tca <- timecourseTable(tca, value = "FC", norm.method = "rpkm", filter = TRUE)
tca <- timecourseTable(tca, value = "expression", norm.method = "rpkm", filter = TRUE)
t <- tcTable(tca)
head(t)
tca <- timeclust(tca, algo = "cm", k = 6, standardize = TRUE)
p <- timeclustplot(tca, value = "z-score(PRKM)", cols = 3)
print(p[[1]])

你可能感兴趣的:(基因表达趋势分析-1:TCseq)