在网上偶然间发现的一个R语言ggplot2做数据可视化的实例,提供数据和代码,今天的推文把代码拆解一下
实例数据下载链接
https://www.kaggle.com/berkeleyearth/climate-change-earth-surface-temperature-data?select=GlobalLandTemperaturesByCountry.csv
下载这个数据需要注册kaggle
代码链接
https://github.com/cnicault/30DayChartChallenge/blob/main/day12/day12_strips.Rmd
结果图
这个图展示的是法国1980年前后的温度差异,数据里提供很多个国家的数据,可以自己更改成其他国家的数据试试
首先是读取数据
这里接触了两个新的R包
vroom
here
climate <- vroom::vroom(here::here("GlobalLandTemperaturesByCountry.csv"))
关于lubridate包中的函数的一些用法
lubridate::year("1743-11-01")
lubridate::month("1743-11-01")
lubridate::month("1743-11-01",label = T)
lubridate::month("1743-11-01",label = F)
lubridate::day("1743-11-01")
构建作图的数据集
library(tidyverse)
monthly <- climate %>%
filter(Country == "France", !is.na(AverageTemperature)) %>%
mutate(year = lubridate::year(dt),
month = lubridate::month(dt, label = TRUE),
pos = lubridate::month(dt, label = FALSE),
color = ifelse(year > 1980, "Recent", "Past")) %>%
filter(year >=1900)
他这里先做了一个空白的热图
注释里写的是为了得到一个矩形的图例
library(ggplot2)
ggplot() +
# empty tile to get a legend with rectangle key
geom_tile(data = monthly,
aes(x = 0, y =0, width =0,
height = 0, fill = color))
接下来是添加线段
seg <- tibble(x = c(0, 0, 10, 0, 9, 3, 8, 5, 6),
xend = c(12.5, 3, 12.5, 5, 12.5, 6, 11, 10, 8),
y = c(0, 5, 5, 10, 10, 15, 15, 20, 25),
yend = c(0, 5, 5, 10, 10, 15, 15, 20, 25))
ggplot() +
# empty tile to get a legend with rectangle key
geom_tile(data = monthly,
aes(x = 0, y =0,
width =0,
height = 0,
fill = color)) +
# y-axis
geom_segment(data = seg,
aes(x = x, xend = xend,
y = y, yend = yend),
color = "red",
linetype = "12")
添加文本注释
seg_lab <- tibble(x = c(0, 0, 0, 3, 5, 6),
y = seq(0,25, 5))
ggplot() +
# empty tile to get a legend with rectangle key
geom_tile(data = monthly,
aes(x = 0, y =0,
width =0,
height = 0,
fill = color)) +
# y-axis
geom_segment(data = seg,
aes(x = x, xend = xend,
y = y, yend = yend),
color = "black", linetype = "12") +
geom_text(data = seg_lab, aes(x = x, y = y,
label = glue::glue("{y} °C")),
color = "black", nudge_y = 1,
family = "serif", hjust = 0)
添加抖动的散点
ggplot() +
# empty tile to get a legend with rectangle key
geom_tile(data = monthly,
aes(x = 0, y =0,
width =0,
height = 0,
fill = color)) +
# y-axis
geom_segment(data = seg,
aes(x = x, xend = xend,
y = y, yend = yend),
color = "white",
linetype = "12") +
geom_text(data = seg_lab,
aes(x = x, y = y,
label = glue::glue("{y} °C")),
color = "white", nudge_y = 1,
family = "serif", hjust = 0) +
# show.legend = FALSE to remove the shape of the point in the legend
geom_jitter(data = filter(monthly, color == "Recent"),
aes(x = pos+0.2, y = AverageTemperature,
fill = color), width = 0.15,
height =0, size = 3, shape = 21,
stroke = 0.3, color = "#FFDADC",
show.legend = FALSE) +
geom_jitter(data = filter(monthly, color == "Past"),
aes(x = pos-0.2, y = AverageTemperature,
fill = color), width = 0.15,
height =0, size = 2.5, shape = 21,
stroke = 0.3, color = "#93E2F5",
show.legend = FALSE)
接下来就是对细节的调整了
axis_labels <- tibble(month = lubridate::month(seq(1,12,1),
label = TRUE),
pos = seq(1,12,1))
txt_clr <- "white"
pal1 <- c("#105182", "#1a7bc5", "#42a2f1", "#E9F1F2", "#ff9193", "#f1434a", "#c91022", "#8d0613", "#4D030A")
monthly_plt <- ggplot() +
# empty tile to get a legend with rectangle key
geom_tile(data = monthly,
aes(x = 0, y =0,
width =0, height = 0,
fill = color)) +
# y-axis
geom_segment(data = seg,
aes(x = x, xend = xend,
y = y, yend = yend),
color = "white", linetype = "12") +
geom_text(data = seg_lab,
aes(x = x, y = y, label = glue::glue("{y} °C")),
color = "white", nudge_y = 1,
family = "serif", hjust = 0) +
# show.legend = FALSE to remove the shape of the point in the legend
geom_jitter(data = filter(monthly, color == "Recent"),
aes(x = pos+0.2, y = AverageTemperature, fill = color),
width = 0.15, height =0, size = 3,
shape = 21, stroke = 0.3, color = "#FFDADC", show.legend = FALSE) +
geom_jitter(data = filter(monthly, color == "Past"),
aes(x = pos-0.2, y = AverageTemperature, fill = color),
width = 0.15, height =0, size = 2.5,
shape = 21, stroke = 0.3, color = "#93E2F5",
show.legend = FALSE) +
# x-axis labels
geom_text(data = axis_labels,
aes(x = pos, y = -2, label = month),
color = "white", vjust = 0,
angle = 90, size = 5, family = "serif")+
# scales
scale_fill_manual(values = c("Recent" = "#f1434a", "Past" = "#1a7bc5"),
labels = c("Recent" = "> 1980", "Past" = "<= 1980")) +
scale_y_continuous(limits = c(-4,26),
breaks = seq(0,25,5)) +
labs(fill = "Observations") +
theme_void() +
guides(fill = guide_legend(label.position = "top",
title.hjust = 0.5,
keyheight = unit(1, "line"),
keywidth = unit(4, "line"),
nrow = 1),
color = FALSE) +
theme(plot.background = element_rect(fill = "grey40", color = NA),
legend.position = c(0.13, 0.85),
legend.text = element_text(face = "bold",
size = 12, color = txt_clr),
legend.title = element_text(face = "bold", size = 14, color = txt_clr))
monthly_plt
欢迎大家关注我的公众号
小明的数据分析笔记本
小明的数据分析笔记本 公众号 主要分享:1、R语言和python做数据分析和数据可视化的简单小例子;2、园艺植物相关转录组学、基因组学、群体遗传学文献阅读笔记;3、生物信息学入门学习资料及自己的学习笔记!