参考资料:根据《R数据科学(中文完整版)》第一章内容总结。
1、散点图
library(pacman)
p_load(tidyverse)
df <- read.csv("./data_set/class.csv",header = T) %>% tbl_df();str(df)
## Classes 'tbl_df', 'tbl' and 'data.frame': 19 obs. of 5 variables:
## $ name : Factor w/ 19 levels "Alfred","Alice",..: 2 3 5 10 11 12 15 16 17 1 ...
## $ sex : Factor w/ 2 levels "F","M": 1 1 1 1 1 1 1 1 1 2 ...
## $ age : int 13 13 14 12 12 15 11 15 14 14 ...
## $ height: num 56.5 65.3 64.3 56.3 59.8 66.5 51.3 62.5 62.8 69 ...
## $ weight: num 84 98 90 77 84.5 ...
p1 <- ggplot(df,aes(height,weight,col = sex)) + # shape = sex
geom_point() +
theme_get() +
labs(title = "",x="身高(CM)",y="体重(KG)") +
theme(plot.title = element_text(hjust = 0.5),legend.position = "none") +
scale_x_continuous(breaks = seq(55,70,5),
labels = seq(55,70,5) * 2.54) +
scale_y_continuous(breaks = seq(50,150,25),
labels = round(seq(50,150,25) * 0.45,0)) +
geom_smooth(data = df %>% filter(sex=="F"),se=T,formula = y ~ x,method = "loess");p1
2、分面
p2 <- ggplot(df) +
geom_point(aes(height,weight)) +
theme_get() +
labs(title = "",x="身高(CM)",y="体重(KG)") +
theme(plot.title = element_text(hjust = 0.5)) +
scale_x_continuous(breaks = seq(55,70,5),
labels = seq(55,70,5) * 2.54) +
scale_y_continuous(breaks = seq(50,150,25),
labels = round(seq(50,150,25) * 0.45,0)) +
facet_wrap(~ sex,ncol = 2);p2 # facet_grid(drv ~ cyl)
3、柱状图
p3 <- ggplot(df) +
# position="identity"将每个对象直接显示在图中,"fill"效果与堆叠相似,但每组堆叠条形具有同样的高度
# "dodge"将每组中的条形依次并列放置
# "jitter" 为每个数据点添加一个很小的随机扰动,或者使用geom_jitter()
geom_bar(stat = "identity",aes(reorder(name,weight),weight),fill="dodgerblue") +
theme_get() +
labs(title = "",x="",y="体重") +
theme(plot.title = element_text(hjust = 0.5)) +
coord_flip();p3
4、箱线图
p4 <- ggplot(df) +
geom_boxplot(aes(sex,weight),fill=c("violetred","dodgerblue")) +
labs(title = "",x="",y="") +
theme(plot.title = element_text(hjust = 0.5)) +
coord_flip();p4
5、饼图
df.sex <- table(df$sex) %>% as.data.frame()
label = paste(df.sex$Var1, "(", round(df.sex$Freq / sum(df.sex$Freq) * 100,2), "%)", sep = "")
# 创建空白主题
blank_theme <- theme_minimal()+
theme(
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.border = element_blank(),
panel.grid=element_blank(),
axis.ticks = element_blank(),
axis.text = element_blank(),
plot.title=element_text(size=14, face="bold", hjust = 0.5)
)
p5 <- ggplot(df.sex,aes(x="",y=Freq,fill=Var1)) +
geom_bar(stat = "identity",width = 10) + # width>=1去除中心杂点
coord_polar(theta = "y", start=0) +
blank_theme +
scale_fill_manual(values=c("violetred","dodgerblue")) + # 手动填充颜色
geom_text(aes(y = Freq/2 + c(0, cumsum(Freq)[-length(Freq)]),
label = label), size=5) +
theme(legend.position = "none") + # 去掉图例
labs(title = "",x="",y="");p5 # 标签设为空
6、统计变换
p6 <- ggplot(df) +
stat_summary(aes(sex,height),fun.ymin = min,fun.ymax = max,fun.y = mean,na.rm = T) +
theme_get() +
labs(title = "性别体重分布图",x="性别",y="体重") +
theme(plot.title = element_text(hjust = 0.5),legend.position = "top");p6
7、拼图
p_load(patchwork)
(p1 | p4) /
p2
p4 + p5 + plot_layout(nrow = 1)