#第11章中级绘图(与ggplot2包进行对比)
#散点图
#图1 plot()函数
attach(mtcars)
plot(wt,mpg,
main="基本散点图",
xlab="车重",
ylab="每加仑英里数",pch=15)
abline(lm(mpg~wt),col="red",lwd=2,lty=1)
lines(lowess(wt,mpg),col="blue",lwd=2,lty=2)
#专门针对R,lowess和loess差异很小。这里有一个非常详细的解释:
#https : //support.bioconductor.org/p/2323/#图2 car::scatterplot
library(car)
car::scatterplot(mpg~wt|cyl,data=mtcars,span=0.75,
main="增强散点图",
xlab="车重",
ylab="每加仑英里数",
legend.plot=TRUE,
boxplots="xy")
#ggplot2绘制图1
library(ggplot2)
ggplot(aes(x=wt,y=mpg),data = mtcars)+geom_point(shape=16)+
geom_smooth(method="lm",se=FALSE,col="red",linewidth=1,lty=1)+
geom_smooth(method="loess",se=FALSE,col="blue",linewidth=1,lty=2)+
xlab("车重")+ylab("每加仑英里数")#ggplot2绘制图2
mtcars$cyl <- as.factor(mtcars$cyl)#转化成因子,否则会成为连续性变量,无法分类别绘制
ggplot(aes(x=wt,y=mpg,col=cyl,shape=cyl),data = mtcars)+geom_point()+
geom_smooth(method="lm",se=FALSE,lty=1)+
geom_smooth(method="loess",se=FALSE,lty=2)+
xlab("车重")+ylab("每加仑英里数")#散点矩阵图
#图3 pairs()函数绘制
pairs(~mpg+disp+drat+wt,data = mtcars,
main="基本散点图矩阵")#图4 car::scatterplotMatrix()函数绘制
car::scatterplotMatrix(~mpg+disp+drat+wt,data = mtcars,
spread=FALSE,smooth.args=list(lty=2),
main="car包创建的散点图矩阵")
detach(mtcars)
#高密度散点图
set.seed(004)
n <- 10000
c1 <- matrix(rnorm(n,mean=0,sd=0.5),ncol=2)
c2 <- matrix(rnorm(n,mean=3,sd=2),ncol=2)
mydata <- rbind(c1,c2)
mydata <- as.data.frame(mydata)
names(mydata) <- c("x","y")
str(mydata)
with(mydata,
plot(x,y,pch=19,
main="10000 个观测点的散点图"))#如何处理数据点重叠导致识别x与y的关系变得异常困难
#方法1 smoothScatter()
with(mydata,smoothScatter(x,y,main="利用光平滑密度估计绘制的散点图",
colramp = colorRampPalette(c("white", "black"))))#方法2 hexbin包中的hexbin函数将二元变量的封箱放到六边形单元格中
library(hexbin)
with(mydata,
{bin <- hexbin(x,y,xbins=50)
plot(bin,main="用六边形封箱图展示的各点上\n覆盖观测点数目的散点图")
})
#ggplot2 扰动点图 箱线图 小提琴图都能解决图形重叠的问题library(ggplot2)
ggplot(aes(x,y),data = mydata)+geom_jitter()# 扰动点图
ggplot(aes(x,y),data = mydata)+geom_boxplot(col="red",fill="gold")# 箱线图
ggplot(aes(x,y),data = mydata)+geom_violin(fill="pink",col="black")# 小提琴图#三维散点图
#scatterplot3d 一次性对三个定量变量的交互关系进行可视化
library(scatterplot3d)
with(mtcars,
scatterplot3d(wt,disp,mpg,
pch = 16,
highlight.3d=TRUE,#颜色
type = "h",#添加连接点与水平面的垂线
col.axis = "blue",#3D图外框颜色
col.grid = "gold",#网格线颜色
main="基本3D scatter图"))#添加回归面
attach(mtcars)
s3d <- scatterplot3d(wt,disp,mpg,
pch = 16,
highlight.3d=TRUE, #颜色
type = "h", #添加连接点与水平面的垂线
main="基本3D scatter图")
fit <- lm(mpg~wt+disp)
s3d$plane3d(fit)# Hemisphere半球图
temp <- seq(-pi, 0, length = 50)
x <- c(rep(1, 50) %*% t(cos(temp)))
y <- c(cos(temp) %*% t(sin(temp)))
z <- c(sin(temp) %*% t(sin(temp)))
scatterplot3d(x, y, z, highlight.3d = TRUE, angle = 120,
col.axis = "blue", col.grid = "lightblue", cex.axis = 1.3,
cex.lab = 1.1, main = "Hemisphere", pch = 20)#旋转三维散点图
#1 使用rgl::plot3d(x,y,z)
library(rgl)
attach(mtcars)
plot3d(wt,disp,mpg,col = "red",size = 5)#2 使用car::scatter3d
attach(mtcars)
car::scatter3d(wt,disp,mpg) #默认添加线性平面#气泡图 先创建一个二维散点图 然后用气泡大小来表示第三个变量的值
attach(mtcars)
symbols(wt,mpg,circles = disp,
inches = 0.2,#设置最大圆圈的尺寸,默认为1
fg="black",bg="lightblue",#圆圈的填充色和边沿色
main = "气泡图")
text(wt,mpg,row.names(mtcars),cex=0.6)#为圆圈添加汽车的名称
detach(mtcars)
#折线图 采用ggplot2绘制
#使用数据集Orange
#三散点图
t1 <- subset(Orange,Tree==1)
str(t1)
p1 <- ggplot(aes(age,circumference),data = t1)+
geom_point(size=2,col="forestgreen")+
xlab("Age(days)")+ylab("circumference(mm)")+
title("Orange Tree 1 Growth")
p2 <- ggplot(aes(age,circumference),data = t1)+
geom_line(linewidth=1,col="forestgreen")+geom_point(size=2.5)+
xlab("Age(days)")+ylab("circumference(mm)")+
title("Orange Tree 1 Growth")
library(cowplot)
plot_grid(p1,p2,ncol=2)#用pplot2展示物种橘树的生长情况
ggplot(Orange,aes(age,circumference,col=Tree))+
geom_line(aes(linetype=Tree))+
geom_point(size=2.5)+
xlab("Age(days)")+ylab("circumference(mm)")+
theme(legend.position = c(0.1,0.7))+
scale_colour_manual(values = c("red","blue","black","grey","gold"))+
ggtitle("Orange Tree 1 Growth")+
theme_bw() #背景透明
#有关背景的代码
p <- ggplot(Orange,aes(age,circumference,col=Tree))
p=p+theme_bw()#背景透明
p=p+theme(panel.grid.major = element_blank())#去掉主刻度
p=p+theme(panel.grid.minor = element_blank())#去掉次刻度
p=p+theme(panel.border = element_blank())#去掉外框线
#一次性实现透明 去掉主次刻度和外框线
p=p+theme(panel.background = element_rect(fill = "transparent"))#相关图
options(digits = 2)
str(mtcars)
mtcars$cyl <- as.numeric(mtcars$cyl)
cor(mtcars)
library(corrgram)
#表达式 corrgram(x,order=,panel=,text.panel=,diag.panel=)
corrgram::corrgram(mtcars,
order = TRUE,#order = TRUE矩阵行和列通过主成分分析法进行重排序
lower.panel = panel.shade,
upper.panel = panel.pie,
text.panel = panel.txt,
main = "mtcars各变量相关系数图")
#颜色表示相关性 越深相关性越大
#左下角:蓝色和↗表示两个变量呈正相关 红色和↘表示两个变量呈负相关
#右上角:从12点方向开始 正相关性顺时针 负相关性逆时针
#panel=有不同选择 panel.ellipse panel.pts panel.confcorrgram::corrgram(mtcars,
order = TRUE,#order = TRUE矩阵行和列通过主成分分析法进行重排序
lower.panel = panel.pts,
upper.panel = panel.ellipse,
text.panel = panel.txt,
main = "mtcars各变量相关系数图")#控制颜色
cols <- colorRampPalette(c("gold","red","green","blue"))
corrgram::corrgram(mtcars,col.regions = cols,
order = TRUE,#order = TRUE矩阵行和列通过主成分分析法进行重排序
lower.panel = panel.pie,
upper.panel = NULL,
text.panel = panel.txt,
main = "mtcars各变量相关系数图")
#ggplot2给热图添加显著性标记
library(tidyverse)
library(magrittr)
library(Hmisc)
#定义函数
cors <- function(df) {
m <- Hmisc::rcorr(as.matrix(df))
mdf <- map(m, ~data.frame(.x))
return(mdf)
}df_cor <- function(df){
cors(df) %>%
map(~rownames_to_column(.x, var="ID1")) %>%
map(~pivot_longer(.x, -ID1,"ID2")) %>%
bind_rows(.id = "id") %>%
pivot_wider(names_from = id, values_from = value) %>%
mutate(p_if = ifelse(P <.05, P, NA), # P <0.05 返回P值
r_if = ifelse(P <.05, r, NA), # p <0.05 返回r值
p_signif=symnum(P, corr = FALSE, na = FALSE,
cutpoints = c(0, 0.01, 0.05, 0.1, 1),
symbols = c("**", "*", ".", " "))) # 将P值转化为*
}
#计算相关性
corda <- df_cor(mtcars) %>% select(1,2,3) %>%
pivot_wider(.,names_from = ID2,values_from = r) %>%
column_to_rownames(var="ID1")corda[upper.tri(corda)] <- NA # 取上部分
#导出相关性表格
corda %>% write.table(file="cor.xls",sep="\t",quote = F,col.names = NA)
#导出p_value
P_value <- df_cor(mtcars) %>% select(1,2,P) %>%
pivot_wider(.,names_from = ID2,values_from = P) %>%
column_to_rownames(var="ID1")P_value[upper.tri(P_value)] <- NA
P_value %>% write.table(file="P_value.xls",sep="\t",quote = F,col.names = NA)
#数据整合
p <- corda %>% rownames_to_column(var="ID1") %>%
pivot_longer(-ID1) %>% drop_na() %>%
set_colnames(c("ID1","ID2","Pearson's\nCorrclation")) %>%
left_join(.,df_cor(mtcars) %>% select(1,2,p_signif),by=c("ID1","ID2"))p$ID1 <- factor(p$ID1,levels = unique(p$ID1))
p$ID2 <- factor(p$ID2,levels = unique(p$ID2))
#数据可视化 绘图
p %>% ggplot(aes(ID1,ID2,color=`Pearson's\nCorrclation`,fill=`Pearson's\nCorrclation`)) +
geom_tile(col="grey", fill="white",size=0.2)+
geom_point(aes(size = abs(`Pearson's\nCorrclation`)),shape=21)+
geom_text(aes(label=p_signif),size=5,color="white",hjust=0.5,vjust=0.7)+
labs(x = NULL, y = NULL)+
theme(axis.text.y=element_text(color="black",face="bold",family="Tims"),
panel.grid = element_blank(),
axis.ticks = element_blank(),
panel.background = element_blank(),
axis.text.x=element_text(color="black",face="bold",family="Tims",angle = 90,vjust=0.5,hjust=1),
legend.title = element_text(color="black",family="Tims",face="bold",size=8),
legend.text=element_text(color="black",size=8))+
scale_color_gradientn(colours = RColorBrewer::brewer.pal(11,"RdBu"))+
scale_fill_gradientn(colours = RColorBrewer::brewer.pal(11,"RdBu"))+
scale_x_discrete(expand=c(0,0),position = "bottom") +
scale_y_discrete(expand=c(0,0),position="right")+
scale_size(range=c(1,8), guide=NULL)+
guides(color = guide_colorbar(direction = "vertical",reverse = F,barwidth = unit(.5, "cm"),
barheight = unit(7.2,"cm")))#马赛克图 以Titanic数据集为例
library(vcd)
ftable(Titanic)
vcd::mosaic(~Class+Sex+Age+Survived,data=Titanic,shade=TRUE,legend=TRUE)
#根据模拟模型的皮尔逊残差值对图形上色
#ggplot2给热图添加显著性标记代码来源
#https://mp.weixin.qq.com/s/omJcvlxfiXZzLbqKIp_3yA