R语言绘图系列:
- R语言可视化及作图1--基础绘图(par函数,散点图,盒形图,条形图,直方图)
- R语言可视化及作图2--低级绘图函数
- R语言可视化及作图3--图形颜色选取
- R语言可视化及作图4--qplot和ggplot2美学函数
- R语言可视化及作图5--ggplot2基本要素和几何对象汇总
- R语言可视化及作图6--ggplot2之点图、条形图、盒形图、直方图、线图
- R语言可视化及作图7--ggplot2之标签、图例和标题绘制
- R语言可视化及作图8--坐标轴自定义和坐标系转换
- R语言可视化及作图9--主题函数
- R语言可视化及作图10--ggplot2的theme函数
- R语言可视化及作图11--图片分面函数和一页多图
venn图:展示不同的分类变量之间互相重叠的关系
火山图:展示基因表达差异
热图:展示不同基因表达聚类
1. venn图
1.1 limma包中的venn
library(limma)
#随机生成一个矩阵(没有实际意义)
Y <- matrix(rnorm(100*6),100,6)
Y[1:10,3:4] <- Y[1:10,3:4]+3
Y[1:20,5:6] <- Y[1:20,5:6]+3
desigh <- cbind(1,c(0,0,1,1,0,0),c(0,0,0,0,1,1))
fit <- eBayes(lmFit(Y,desigh)) #进行贝叶斯回归,生成fit拟合模型,fit作为venn图绘图对象。
results <- decideTests(fit) #decideTests函数在不同的基因和样本间进行多重对照
a <- vennCounts(results)
print(a)
mfrow.old <- par()$mfrow
op <- par(mfrow=c(1,2))
vennDiagram(results,
include = c('up','down'),
counts.col = c('red','blue'),
circle.col = c('red','blue','green3'))
par(op)
1.2 venn.diagram()函数
library(VennDiagram)
#Then generate 3 sets of words, There I generate 3 times 200 SNPs names.
SNP_pop_1=paste(rep('SNP_',200),sample(c(1:1000),200,replace = F),sep = '')
SNP_pop_2=paste(rep('SNP_',200),sample(c(1:1000),200,replace = F),sep = '')
SNP_pop_3=paste(rep('SNP_',200),sample(c(1:1000),200,replace = F),sep = '')
venn.diagram(
x=list(SNP_pop_1,SNP_pop_2,SNP_pop_3),
category.names =c('SNP_pop_1','SNP_pop_2','SNP_pop_3'),
filename = 'venn_diagramm.png', #输出路径,最好是完整路径。
output=TRUE,
imagetype = 'png',
height = 800,
width = 800,
resolution = 300,
compression = 'lzw',
lwd=2,
lty='blank',
fill=c('yellow','purple','green'),
cex=1,
fontface='bold',
fontfamily='sans',
cat.cex=0.6, #cat设置分类标签的格式
cat.fontface='bold',
cat.default.pos='outer',
cat.pos=c(-27,27,135),
cat.dist=c(0.055,0.055,0.085),
cat.fontfamily='sans',
rotation=1
)
2. 火山图
2.1 limma包volcanoplot()
sd <- 0.3*sqrt(4/rchisq(100,df=4))
y <- matrix(rnorm(100*6,sd=sd),100,6)
rownames(y) <- paste('Gene',1:100)
y[1:2,4:6] <- y[1:2,4:6]+2
design <- cbind(Grp1=1,Grp2vs1=c(0,0,0,1,1,1))
options(degits=3)
fit <- lmFit(y,design)
fit <- eBayes(fit)
topTable(fit,coef = 2)
dim(fit)
colnames(fit)
rownames(fit)[1:10]
names(fit)
#Fold change thresholding
fit2 <- treat(fit,lfc=0.1)
topTreat(fit2,coef = 2)
#volcano plot
volcanoplot(fit,coef = 2,highlight = 5)
2.2 ggplot2
data_df <- fit2$coefficients
class(data_df)
data_df <- as.data.frame(data_df)
data_df$fac <- as.factor(ifelse(data_df$Grp1>0,1,0))
class(data_df$fac)
library(ggplot2)
p <- ggplot()+geom_point(aes(Grp2vs1,Grp1,color=fac),data=data_df)
#对前五个差异最大的基因标上名字
data_df$name <- paste('gene',1:100)
data_df <- data_df[order(data_df$Grp1,decreasing = T),]
data_df$name[6:100] <- NA
p+geom_text(aes(Grp2vs1,Grp1,label=name),data = data_df,nudge_y = -0.02,nudge_x = 0.05)+theme_classic()
3. 热图
data=as.matrix(mtcars)
head(data)
heatmap(data)
可以看到有些值过高,因此需要标准化
heatmap(data,scale = 'column')
heatmap(data,Colv = NA, Rowv = NA, scale = 'column')
更改颜色:
1: native palette from R
heatmap(data,scale = 'column',col=cm.colors(256))
heatmap(data,scale = 'column',col=terrain.colors(256))
- RColorBrewer palette
library(RColorBrewer)
coul=colorRampPalette(brewer.pal(8,'PiYG'))(25)
heatmap(data,scale = 'column',col=coul)
**Custom x and y labels with CexRow and labRow (col respectively)
heatmap(data,scale = 'column',cexRow = 1.5,labRow = paste('new_',rownames(data),sep=''),col=colorRampPalette(brewer.pal(8,'Blues'))(25))