技巧系列
截断
types<-c("protein_coding","lncRNA","processed_pseudogene","others")
p1<-ggboxplot(human_new_df, x = "type", y = "polya_length",
palette ="npg",fill = "type",legend = "none",
ylab = "Poly(A) Length")+coord_cartesian(ylim = c(0,160))+
theme(axis.text.x = element_text( vjust = 1, hjust = 1, angle = 45))
my_comparisons <- list(combn(types,2)[,1],combn(types,2)[,2],combn(types,2)[,3],combn(types,2)[,4],combn(types,2)[,5],combn(types,2)[,6] )
p2<-ggboxplot(human_new_df, x = "type", y = "polya_length",
palette ="npg",fill = "type")+
labs(x=NULL,y=NULL,fill=NULL) +
theme(axis.text.x = element_blank(),axis.ticks.x = element_blank(),axis.line.x= element_blank())+
coord_cartesian(ylim = c(600,800))+scale_y_continuous(breaks = c(600,800,100))
# p2<-p2+stat_compare_means(comparisons = my_comparisons,
# paired = F,method = "t.test",position = "identity",label = "p.signif")
ggarrange(p2,p1,heights=c(1/8, 7/8),ncol = 1, nrow = 2,common.legend = TRUE,legend="top",align = "v")
1、成对分析
常规用法
rm(list=ls())
suppressMessages(library(ggpubr))
suppressMessages(library(tidyverse))
suppressMessages(library(vcd))
my_comparisons <- list(c("control", "treatment"))
ggpaired(comb_fre_new, x = "xj", y ="percentage",ylab ="Gene 3' UTR Modification Frequence\n(% of Total Modificantion Events)",
line.color = "gray", line.size = 0.05,
palette = "npg"#c("#0A5EB9","#DF3D8C"
)
p<-p+stat_compare_means(comparisons = my_comparisons,
paired = T,method = "t.test",position = "identity",label = "p.signif")
做完是一根线,所以进阶改良(还是ggplot2 靠谱)
主要就是定义一个数xj 让它实现偏移
comb_fre_new$Group <- factor(comb_fre_new$Group, levels=unique(comb_fre_new$Group))
comb_fre_new$"x"<-c(rep(1,nrow(fed_fre)),rep(2,nrow(fasting_fre)))
comb_fre_new$xj <- jitter(comb_fre_new$x, amount=.04)
p<-ggplot(data=comb_fre_new, aes(y=percentage)) +
geom_boxplot(aes(x=Group, group=Group), width=0.2, outlier.shape = NA) +
geom_point(aes(x=xj,colour = factor(Group))) + scale_colour_manual(name="",values = c("control"="#0A5EB9", "treatment"="#DF3D8C"))+
geom_line(aes(x=xj, group=gene_name),size=0.05,color="gray") +theme_bw()
2、相关性分析
suppressMessages(library(ggpubr))
sp<-ggscatter(utr3_mean, x = "control", y = "treatment",
add.params = list(color = "black",linetype="dashed"), # Customize reg. line
add = "reg.line", # Add regressin line
conf.int = F,color ="#54C6DC",
ellipse.alpha=0.5,
xlab = "control",
ylab="treatment",
label = "Gene_symbol",
label.select = targets,
repel = T,
font.label = c(11, "bold", "#f16446")
)
sp<-sp + stat_cor(method = "pearson")
3、 boxplot
#例子1
new_data<-new_ccle_data %>% gather(key=Gene_name, value=expression_levels,-pick_levels)
colnames(new_data)<-c("Levels","Gene_name","Relative_expression_levels")
filename=paste0(data_dir,"/",j,"_expression_levels_in_",i,"_Median.pdf")
pdf(filename,width=3.5,height=5)
p <- ggboxplot(new_data, x = "Gene_name", y = "Relative_expression_levels",
color = "Levels", palette ="jco",
shape = "Levels",group="Levels")
p<-p + stat_compare_means(aes(group = Levels),label = "p.signif",hide.ns=F, paired=F,method = "t.test")
plot(p)
dev.off()
4、小提琴图
my_comparisons<-list(c("control","treatment"))
p<-ggviolin(human, x = "Group", y = "ELAVL1",
palette = c("#54C6DC","#F16446"),fill = "Group",
add = "boxplot", add.params = list(fill = "white"),
ylab = "Relative Expression Levels\n(TPM)")
p<-p + stat_compare_means(comparisons = my_comparisons,label = "p.signif",method = "t.test")
ggsave(p,filename ="~/Desktop/hur_kd/human_hur_kd_violin.pdf",width = 3,height = 5,units = "in" )
5、直方图
#例子1
p <- ggbarplot(data, x = "New_name", y = "Relative_Exp",
color = "Treatment", palette = c("#4DBBD5","#E64B35"),
ylab = "Relative Expression Levels",position = position_dodge(0.9),
fill = "Treatment",alpha=0.2,add = c("mean_se", "jitter"),add.params = list(size=0.5))
p<-p + stat_compare_means(aes(group = Treatment),label = "p.signif",hide.ns=F, paired=F,method = "t.test")
p<-p+theme(axis.text.x = element_text(size = 7, vjust = 0.7, hjust =0.7, angle = 45))
#例子2 横向
ggbarplot(df_top20, "pathway", "Frequence", orientation = "horiz",
fill = "Treatment", color = "Treatment", palette = c("#DF3D8C","#0A5EB9"),
label = TRUE,
position = position_dodge(0.95))+ scale_x_discrete(labels = wrap_format(30))
#例子3 横向占比
dis_rt$distribution <- factor(dis_rt$distribution, levels=unique(dis_rt$distribution))
ggbarplot(dis_rt, "donors", "Freq",
fill = "distribution", color = "black", palette = "npg",orientation = "horiz",
label = TRUE,lab.col = "white", lab.pos = "in",ylab = "Percentage (%)")
#例子4 计算各组比例
suppressMessages(library(tidyverse))
suppressMessages(library(ggpubr))
[email protected] %>%
group_by(seurat_clusters,orig.ident) %>%
count() %>%
group_by(seurat_clusters) %>%
mutate(percent=100*n/sum(n)) %>%
ungroup() %>%
ggbarplot(x="seurat_clusters",y="percent", fill="orig.ident",color = "black",palette = "jco")
#例子5
data<-degs %>% left_join(pcr,by="New_name")
data<-data[complete.cases(data$Relative_Exp),]
p <- ggbarplot(data, x = "New_name", y = "Relative_Exp",
color = "Treatment", palette = c("#4DBBD5","#E64B35"),
ylab = "Relative Expression Levels",position = position_dodge(0.9),
fill = "Treatment",alpha=0.2,add = c("mean_se", "jitter"),add.params = list(size=0.5))
p<-p + stat_compare_means(aes(group = Treatment),label = "p.signif",hide.ns=F, paired=F,method = "t.test")
p<-p+theme(axis.text.x = element_text(size = 7, vjust = 0.7, hjust =0.7, angle = 45))
#例子6
#带点的bar图
p<-ggbarplot(df, x = "group", y = "exp",add = c("mean_se", "dotplot"),
color = "group", palette = c("#0A5EB9","#EBB208"),
fill = "group",alpha=0.2,
ylab = "Plasma TG Levels (mg/dL)")+coord_cartesian(ylim = c(120,165))
my_comparisons<-list(c("V5","hAS1"))
p + stat_compare_means(comparisons = my_comparisons,label = "p.signif",method = "t.test")
6、散点图
#外圈为黑色的图
#fill 为固定颜色
library(ggplot2)
ggplot(data=new_data, aes(x =groups , y = Motif)) +
geom_point(aes(size = value),shape = 21,fill="#b2eb08", colour = "black")+theme_bw()+theme_classic()
#fill为变量
plot_pathway<-function(new_data){
suppressMessages(library(ggplot2))
suppressMessages(library(scales))
suppressMessages(library(gridExtra))
suppressMessages(library(ggthemes))
suppressMessages(library(stringr))
new_data$Group <- factor(new_data$Group, levels=unique(new_data$Group))
new_data$Description<-str_to_title(as.character(new_data$Description), locale = "")
p<-ggplot(data=new_data, # you can replace the numbers to the row number of pathway of your interest
aes(x =Group , y = Description)) +
geom_point(aes(size = Count,fill = -log10(pvalue)),shape = 21, colour = "black")+scale_fill_gradient2(low = "blue", high = "red",midpoint = 1.3,limit=c(min(new_data$'-log10(pvalue)'), max(new_data$'-log10(pvalue)')))+
theme_bw()+theme_classic()+
theme(axis.title.x = element_text(size=12,face="bold",colour = "black"),
axis.text.x = element_text(size=12,face="bold",colour = "black"),
axis.title.y = element_text(size=12,colour = "black",face = "bold"),
axis.text.y= element_text(size=12,face="bold",colour = "black"))+
theme(axis.text.x = element_text( face = "bold", vjust = 1, hjust = 1, angle = 45))+scale_y_discrete(labels = wrap_format(40))
return(p)
}
#火山图
rm(list = ls())
suppressMessages(library(ggpubr))
suppressMessages(library(tidyverse))
suppressMessages(library(ggrepel))
plot_degs<-function(degs_dir,fc,padj,top){
degs<-read.csv(degs_dir)
degs$sigORnot = as.factor(ifelse(degs$padj < padj & abs(degs$log2FoldChange) > fc,
ifelse(degs$log2FoldChange > fc ,'Increased','Decreased'),'Not Significant'))
degs_down<-degs %>% filter(sigORnot=="down")%>% arrange(log2FoldChange)
degs_down_list<-as.character(degs_down$Gene)[1:top]
degs_up<-degs %>% filter(sigORnot=="up")%>% arrange(desc(log2FoldChange))
degs_up_list<-as.character(degs_up$Gene)[1:top]
degs_not<-degs %>% filter(sigORnot=="Not Significant")
degs<-as.data.frame(rbind(degs_down,degs_not,degs_up))
degs$log_p<-(-log10(degs$padj))
leables<-c(degs_down_list,degs_up_list)
degs$sigORnot <- factor(degs$sigORnot, levels=unique(degs$sigORnot))
p<-ggscatter(degs,x="log2FoldChange",y="log_p",fill="sigORnot",
palette=c("#4DBBD5FF","grey70","#E64B35FF"),size=3,
ellipse.alpha=0.3,shape=21,color="black",
xlab="log2(FoldChange)",
ylab="-log10(Adjustedp-value)",
label="Gene",
label.select=leables,
repel=T,label.rectangle=T,
font.label=c(8,"bold","#3C5488FF")
)
p=p+geom_hline(yintercept=(-log10(padj)),linetype=3)+geom_vline(xintercept=c(-(fc),fc),linetype=3)
return(p)
}
#点图
ggplot(data=cv_data, # you can replace the numbers to the row number of pathway of your interest
aes(x =rank , y = cv)) +
geom_point(aes(fill = group,alpha=0.7),shape = 21,colour = "black" )+scale_fill_manual(values =c("#DF3D8C","#0A5EB9"))+
theme_bw()+theme_classic()
#点图 label
ggplot(data=df_pick,
aes(x =log2(tpm) , y = log2(n_c_ratio),label = treatment))+
geom_point(aes(fill = gene_name),shape = 21, colour = "black",size=2)+
scale_fill_manual(values =c("#F16446","#54C6DC"))+theme_bw()+theme_classic()+
xlim(c(-2,12))+ylim(c(-4,5.5))+
xlab("Log2(Whole Cell TPM)")+ylab("Log2(nucl. TPM/cyto. TPM)")+
geom_text(aes(colour = factor(gene_name)),hjust = 0, nudge_x = 0.2,size=3)
7、分布图
ggdensity(rt, x = "polya_length",
palette = c("#54C6DC","#F16446"), add = "mean", color = "doners",
ggtheme = theme_light(), legend = "top",xlab = "Poly(A) length")
8、画饼图
rm(list = ls())
suppressMessages(library(tidyverse))
suppressMessages(library(ggpubr))
suppressMessages(library(vcd))
distribution<-function(x){
if (x=="protein_coding"){
y="protein_coding"
}else if(x=="lncRNA"){
y="lncRNA"
}else if(x=="processed_pseudogene"){
y="processed_pseudogene"
} else {
y="others"
}
return(y)
}
data_1st$type<-unlist(lapply(as.character(data_1st$gene_type), FUN = distribution))
signature<-function(rt){
signature<-with(rt,table(type))
signature<-as.data.frame(prop.table(signature)*100)
signature$labs<-paste0(as.character(round(as.numeric(signature$Freq),2)),"%")
signature_rt=data.frame()
for (i in c("lncRNA","protein_coding","processed_pseudogene","others")){
signature_rrt<-signature[signature$type==i,]
signature_rt<-rbind(signature_rt,signature_rrt)
}
return(signature_rt)
}
data_1st_type<-signature(data_1st)
plot<-function(signature){signature$type <- factor(signature$type, levels=unique(signature$type))
p<-ggpie(signature,"Freq",
label = "labs",
lab.pos = "out", lab.font = "white",
fill = "type",
color = "black",
palette = "npg")
return(p)
}
data_1st_plot<-plot(data_1st_type)
data_1st_plot
点线图
ggdotchart(human_mouse_mean_rt, x = "human_name", y = "logFC",
color = "group", # Color by groups
palette = c("#4DBBD5FF","#E64B35FF"), # Custom color palette
sorting = "descending", # Sort value in descending order
add = "segments", # Add segments from y = 0 to dots
add.params = list(color = "lightgray", size = 0.2), # Change segment color and size
group = "group", # Order by groups
dot.size = 2.5,
shape = "group",ylab = "log2FoldChange",
ggtheme = theme_pubr(),
label="human_name",
label.select=human_mouse_combine_label,
label.rectangle=F
# ggplot2 theme
)+geom_hline(yintercept=0,linetype=2)
#折线分位图
gene_dist <- dist(df_zscore)
gene_hclust <- hclust(gene_dist, method = "complete")
plot(gene_hclust, labels = FALSE)
gene_cluster <- cutree(gene_hclust, k = 5) %>%
# turn the named vector into a tibble
enframe() %>%
# rename some of the columns
rename(gene = name, cluster = value)
df_zscore$gene<-row.names(df_zscore)
df_zscore_spread<-df_zscore %>% gather(key = "sample",value="frequence",-gene)
gene_cluster_zscore<-gene_cluster %>% left_join(df_zscore_spread,by="gene") %>%
mutate(group=unlist(lapply(as.character(sample), FUN = function(x) {return(strsplit(x, split = "_",fixed = T)[[1]][1])}))) %>%
mutate(treatment=unlist(lapply(as.character(sample), FUN = function(x) {return(strsplit(x, split = "_",fixed = T)[[1]][2])})))
gene_cluster_zscore$"group"<-factor(gene_cluster_zscore$group, levels=unique(gene_cluster_zscore$group))
gene_cluster_zscore$"treatment"<-factor(gene_cluster_zscore$treatment, levels=unique(gene_cluster_zscore$treatment))
gene_cluster_zscore$"cluster"<-factor(gene_cluster_zscore$cluster, levels=unique(gene_cluster_zscore$cluster))
gene_cluster_zscore %>%
ggplot(aes(group, frequence)) +
geom_line(aes(group = gene,colour = cluster),size=0.1) +
facet_grid(rows = vars(treatment), cols = vars(cluster)) +
scale_colour_manual(values =c("#E64B35FF", "#4DBBD5FF","#00A087FF","#3C5488FF","#F39B7FFF"))+
theme_bw()
9、相关性分析
10、韦恩图
11、个性化绘制Pathway
12 、Heatmap
13、sample distance heatmap
suppressMessages(library(DESeq2))
suppressMessages(library("gplots"))
sampleDists <- dist( t(tpm_zscore_tissues) )
sampleDistMatrix <- as.matrix( sampleDists )
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
hc <- hclust(sampleDists)
heatmap.2( sampleDistMatrix, Rowv=as.dendrogram(hc),
symm=TRUE, trace="none", col=colors,
margins=c(2,10), labCol=FALSE )
14、雷达密度图
ggplot(df, aes(x = log2(all_tpm_mean), y = log2(total_n_c))) +
stat_density2d(geom="density2d", aes(color = type,alpha = ..level..),contour=T,position = "identity")+
scale_color_manual(values =c("#54C6DC","#F16446"))+theme_bw()+theme_classic()+
geom_hline(yintercept=0,linetype=3)+xlim(c(-2,12))+ylim(c(-4,5.5))+
xlab("Log2(Whole Cell TPM)")+ylab("Log2(nucl. TPM/cyto. TPM)")
ggplot(df, aes(x=log2(al_n_c), y=log2(fast_n_c))) +
stat_density_2d(aes(fill = ..density..), geom = "raster", contour = FALSE) +
scale_fill_distiller(palette=4, direction=-1) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_continuous(expand = c(0, 0)) +
theme(
legend.position='none'
)
ggplot(df, aes(x=log2(al_n_c), y=log2(fast_n_c)) ) +
geom_hex(bins = 60) +
scale_fill_continuous(type = "viridis") +
theme_bw()
ggplot(df, aes(x=log2(al_n_c), y=log2(fast_n_c)) ) +
stat_density_2d(aes(fill = ..level..), geom = "polygon", colour="white")
15、GSEA自定义做图