首先,准备callpeak后的narrowpeak文件,和bam文件
利用bedtools multicov先计算每一个peak的count数。得到
然后R开始
# load packages ------
library(ChIPseeker)
library(org.Mm.eg.db)
library(gggenes)
library(ggplot2)
library(GenomeInfoDb)
library(GenomicFeatures)
library(diffloop)
library(GenomicRanges)
library(rio)
library(patchwork)
library(tidyverse)
library(venn)
library(ggsci)
library(scales)
library(edgeR)
library(stringi)
cols.use <- pal_d3("category10")(10)
show_col(cols.use)
options(scipen = 20)
options(ChIPseeker.ignore_1st_exon = T)
options(ChIPseeker.ignore_1st_intron = T)
options(ChIPseeker.ignore_downstream = T)
options(ChIPseeker.ignore_promoter_subcategory = F)
# prepera annotation files-------
mus102.anno <- makeTxDbFromGFF("C:/High-throughput sequencing/ensmble基因组/Mus_musculus.GRCm38.102.chr.gff3.gz",organism='Mus musculus')
seqlevels(mus102.anno)
promoter_ens <- getPromoters(TxDb=mus102.anno, upstream=3000, downstream=3000)
# load data-------
## chip reads counts
h3k4_0h_count <- read.table('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/计算reads/bedtools_multicov/H3K4me3_hCG_0h.peaks.reads.xls',header = F,sep = '\t')
h3k4_4h_count <- read.table('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/计算reads/bedtools_multicov//H3K4me3_hCG_4h.peaks.reads.xls',header = F,sep = '\t')
h3k27_0h_count <- read.table('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/计算reads/bedtools_multicov/H3K27ac_hCG_0h.peaks.reads.xls',header = F,sep = '\t')
h3k27_4h_count <- read.table('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/计算reads/bedtools_multicov/H3K27ac_hCG_4h.peaks.reads.xls',header = F,sep = '\t')
colnames(h3k4_0h_count) <- c('chr','start', 'end', 'peak_id', '-log10p', 'counts')
colnames(h3k4_4h_count) <- colnames(h3k4_0h_count)
colnames(h3k27_0h_count) <- colnames(h3k4_0h_count)
colnames(h3k27_4h_count) <- colnames(h3k4_0h_count)
## chip peaks
h3k4_0h_peaks <- readPeakFile('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/H3K4ME3/H3K4me3_hCG_0h_peaks.narrowPeak')
h3k4_4h_peaks <- readPeakFile('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/H3K4ME3/H3K4me3_hCG_4h_peaks.narrowPeak')
h3k27_0h_peaks <- readPeakFile('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/H3K27ac/H3K27ac_hCG_0h_peaks.narrowPeak')
h3k27_4h_peaks <- readPeakFile('C:/High-throughput sequencing/!!!!GSE167940/ChIP-seq/H3K27ac/H3K27ac_hCG_4h_peaks.narrowPeak')
# peaks anno-------
# k4
k4_peaks <- list(h3k4_0h_peaks = h3k4_0h_peaks, h3k4_4h_peaks = h3k4_4h_peaks)
k4_peaks <- lapply(k4_peaks, rmchr)
seqlevels(k4_peaks[[1]])
k4_peakAnnoList <- lapply(k4_peaks, annotatePeak, TxDb=mus102.anno,tssRegion=c(-3000, 3000), verbose=FALSE,addFlankGeneInfo=TRUE, flankDistance=5000,annoDb="org.Mm.eg.db")
k4.anno.0h <- data.frame(k4_peakAnnoList$h3k4_0h_peaks)
k4.anno.4h <- data.frame(k4_peakAnnoList$h3k4_4h_peaks)
# k27
k27_peaks <- list(h3k27_0h_peaks = h3k27_0h_peaks, h3k27_4h_peaks = h3k27_4h_peaks)
k27_peaks <- lapply(k27_peaks, rmchr)
seqlevels(k27_peaks[[2]])
k27_peakAnnoList <- lapply(k27_peaks, annotatePeak, TxDb=mus102.anno,overlap='all',annoDb="org.Mm.eg.db")
k27.anno.0h <- data.frame(k27_peakAnnoList$h3k27_0h_peaks)
k27.anno.4h <- data.frame(k27_peakAnnoList$h3k27_4h_peaks)
# get cpm for chip------
h3k4_0h_count$cpm <- cpm(h3k4_0h_count$counts)
h3k4_4h_count$cpm <- cpm(h3k4_4h_count$counts)
h3k27_0h_count$cpm <- cpm(h3k27_0h_count$counts)
h3k27_4h_count$cpm <- cpm(h3k27_4h_count$counts)
# combine cpm with peaks's gene
colnames(k4.anno.0h)
h3k4_0h <- h3k4_0h_count %>% left_join( y = k4.anno.0h[,c(4,6,13,17,21,23)], by = c('peak_id' = 'V4'))
h3k4_4h <- h3k4_4h_count %>% left_join( y = k4.anno.4h[,c(4,6,13,17,21,23)], by = c('peak_id' = 'V4'))
h3k4_0h_gene.unique <- h3k4_0h %>% group_by(SYMBOL) %>% summarise(gene.cpm = sum(cpm))
h3k4_4h_gene.unique <- h3k4_4h %>% group_by(SYMBOL) %>% summarise(gene.cpm = sum(cpm))
h3k4.gene.cpm <- list(h3k4_0h_gene.unique = h3k4_0h_gene.unique, h3k4_4h_gene.unique = h3k4_4h_gene.unique)
export(h3k4.gene.cpm, file = 'h3k4.gene.cpm.corrected.xlsx')
h3k27_0h <- h3k27_0h_count %>% left_join( y = k27.anno.0h[,c(4,6,13,17,21,23)], by = c('peak_id' = 'V4'))
h3k27_4h <- h3k27_4h_count %>% left_join( y = k27.anno.4h[,c(4,6,13,17,21,23)], by = c('peak_id' = 'V4'))
h3k27_0h_gene.unique <- h3k27_0h %>% group_by(SYMBOL) %>% summarise(gene.cpm = sum(cpm))
h3k27_4h_gene.unique <- h3k27_4h %>% group_by(SYMBOL) %>% summarise(gene.cpm = sum(cpm))
h3k27.gene.cpm <- list(h3k27_0h_gene.unique = h3k27_0h_gene.unique, h3k27_4h_gene.unique = h3k27_4h_gene.unique)
export(h3k27.gene.cpm, file = 'h3k27.gene.cpm_2.xlsx')