1、导入数据
> library(pacman)
> p_load(dplyr)
> covid <- readr::read_csv("./data_set/novel-corona-virus-2019-dataset/time_series_covid_19_confirmed.csv") %>%
+ rename(province=`Province/State`,region=`Country/Region`)
> names(covid)
## [1] "province" "region" "Lat" "Long" "1/22/20" "1/23/20"
## [7] "1/24/20" "1/25/20" "1/26/20" "1/27/20" "1/28/20" "1/29/20"
## [13] "1/30/20" "1/31/20" "2/1/20" "2/2/20" "2/3/20" "2/4/20"
## [19] "2/5/20" "2/6/20" "2/7/20" "2/8/20" "2/9/20" "2/10/20"
## [25] "2/11/20" "2/12/20" "2/13/20" "2/14/20" "2/15/20" "2/16/20"
## [31] "2/17/20" "2/18/20" "2/19/20" "2/20/20" "2/21/20" "2/22/20"
## [37] "2/23/20" "2/24/20" "2/25/20" "2/26/20" "2/27/20" "2/28/20"
## [43] "2/29/20" "3/1/20" "3/2/20" "3/3/20" "3/4/20" "3/5/20"
## [49] "3/6/20" "3/7/20" "3/8/20" "3/9/20" "3/10/20" "3/11/20"
## [55] "3/12/20" "3/13/20" "3/14/20" "3/15/20" "3/16/20" "3/17/20"
## [61] "3/18/20" "3/19/20" "3/20/20" "3/21/20" "3/22/20" "3/23/20"
## [67] "3/24/20" "3/25/20" "3/26/20" "3/27/20" "3/28/20" "3/29/20"
## [73] "3/30/20" "3/31/20" "4/1/20" "4/2/20" "4/3/20" "4/4/20"
## [79] "4/5/20" "4/6/20" "4/7/20" "4/8/20" "4/9/20" "4/10/20"
## [85] "4/11/20" "4/12/20" "4/13/20" "4/14/20" "4/15/20" "4/16/20"
## [91] "4/17/20" "4/18/20" "4/19/20" "4/20/20" "4/21/20" "4/22/20"
## [97] "4/23/20" "4/24/20" "4/25/20" "4/26/20"
2、数据适配
因为REmap包中获取的国家(region)名称和我们数据中的名称可能不一致,需要手动将其一一对应。比如美国在我们数据中为US,而在REmap中为United States of America。
> covid$region[which(covid$region=="Antigua and Barbuda")] <- "Bermuda"
> covid$region[which(covid$region=="Czechia")] <- "Czech Republic"
> covid$region[which(covid$region=="Congo (Kinshasa)")] <- "Democratic Republic of the Congo"
> covid$region[which(covid$region=="Timor-Leste")] <- "East Timor"
> covid$region[which(covid$region=="Guinea-Bissau")] <- "Guinea Bissau"
> covid$region[which(covid$region=="Cote d'Ivoire")] <- "Ivory Coast"
> covid$region[which(covid$region=="North Macedonia")] <- "Macedonia"
> covid$region[which(covid$region=="Serbia")] <- "Republic of Serbia"
> covid$region[which(covid$region=="Congo (Brazzaville)")] <- "Republic of the Congo"
> covid$region[which(covid$region=="Korea, South")] <- "South Korea"
> covid$region[which(covid$region=="Eswatini")] <- "Swaziland"
> covid$region[which(covid$region=="Bahamas")] <- "The Bahamas"
> covid$region[which(covid$region=="Tanzania")] <- "United Republic of Tanzania"
> covid$region[which(covid$region=="US")] <- "United States of America"
> covid$region[which(covid$region=="West Bank and Gaza")] <- "West Bank"
> # covid$region[which(covid$region=="china")] <- "xianggang"
3、汇总数据
> # 按行求和
> covid.sel <- covid %>%
+ reshape2::dcast(`province` + `region` + `Long` + `Lat` ~ .,
+ fun.aggregate = sum) %>%
+ # 更名
+ rename(vol=".")
> str(covid.sel)
## 'data.frame': 264 obs. of 5 variables:
## $ province: chr "Alberta" "Anguilla" "Anhui" "Aruba" ...
## $ region : chr "Canada" "United Kingdom" "China" "Netherlands" ...
## $ Long : num -116.6 -63.1 117.2 -70 149 ...
## $ Lat : num 53.9 18.2 31.8 12.5 -35.5 ...
## $ vol : num 4480 3 991 100 106 ...
4、画图
> p_load(REmap)
> # 获取REmap中的国家名
> country <- data.frame(region=mapNames("world"))
> # 根据名称对应数据
> mapdata <- left_join(country,covid.sel,by="region") %>%
+ select(region,vol)
> str(mapdata)
## 'data.frame': 257 obs. of 2 variables:
## $ region: chr "Afghanistan" "Angola" "Albania" "United Arab Emirates" ...
## $ vol : num 1531 26 726 10349 3892 ...
> covid.world <- remapC(mapdata,maptype = "world",
+ color = c('#FD0100','#FFB8B5'),
+ theme = get_theme("dark"),
+ title = "2019-nCoV全球分布图",
+ subtitle = "截止2020年4月26日",)
> covid.world
## Save img as: C:\Users\Admin\AppData\Local\Temp\RtmpKs931s/ID_20200504184333_2814895.html
使用REmap包画的图会通过浏览器展示出来,鼠标放在某一区域,会自动显示相应数值。
灰色地带为region名称没有对应上的区域。
5、按时间序列画动态图
> p_load(animation,sp,maptools,ggplot2)
> # 读取中国地图的多边形数据
> china <- readShapePoly("./data_set/china_basic_map/bou2_4p.shp")
> # 提取省级名称
> province.name <- china$NAME %>% as.character()
> # 转换为数据框
> china.sel <- fortify(china) %>% select(long,lat,group,id)
> # 添加province列
> china.sel$province[!duplicated(china.sel$id)] <- province.name
> # 填充其他列
> china.sel <- tidyr::fill_(china.sel,fill_cols="province",.direction="down")
## 'data.frame': 89912 obs. of 5 variables:
## $ long : num 121 121 122 122 122 ...
## $ lat : num 53.3 53.3 53.3 53.3 53.3 ...
## $ group : Factor w/ 925 levels "0.1","1.1","2.1",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ id : chr "0" "0" "0" "0" ...
## $ province: chr "黑龙江省" "黑龙江省" "黑龙江省" "黑龙江省" ...
> # 抽取中国的疫情数据
> china.data <- covid %>% filter(region=="China" | region=="Taiwan*") %>%
+ select(-"region")
>
> china.data$province <-
+ c("安徽省","北京市","重庆市","福建省","甘肃省","广东省",
+ "广西壮族自治区","贵州省","海南省","河北省","黑龙江省","河南省",
+ "香港特别行政区","湖北省","湖南省","内蒙古自治区","江苏省",
+ "江西省","吉林省","辽宁省","澳门特别行政区","宁夏回族自治区","青海省",
+ "陕西省","山东省","上海市","山西省","四川省","天津市",
+ "西藏自治区","新疆维吾尔自治区","云南省","浙江省","台湾省")
>
> # 合并数据
> china.data <- china.data[,-c(2:3)] %>%
+ right_join(china.sel,by="province")
> china.data <- select(china.data,-c("id"))
> str(china.data)
> # 设置播放速度及图片大小,转换器路径
> ani.options(interval=0.5,
+ convert=shQuote("C:/Program Files/ImageMagick-7.0.10-Q16/convert.exe"),
+ ani.width=800,ani.height=800)
>
> saveGIF(
+ for(i in seq(2,97,by=11)) {
+ data <- china.data[,c(1,98:100,i)] %>% as.data.frame()
+ names(data)[5] <- "vol"
+ p <- ggplot(data,aes(long,lat,group=group,fill=vol)) +
+ geom_polygon(col="gray60") +
+ scale_fill_gradient(low="white",high="red") +
+ labs(title = names(china.data)[i],x="",y="") +
+ theme(panel.grid = element_blank(),
+ panel.background = element_blank(),
+ axis.text = element_blank(),
+ axis.ticks = element_blank(),
+ legend.position = "none")
+ print(p)
+ }
+ )
因为湖北的数据太大,导致其他区域的填充颜色太浅。
这个地方需要注意的有两点:1、数据一定要转换为data.frame,因为目前还不支持tibble;2、图片的长宽比例需要手动调整,不然图片会很难看。
6、动态点图
> # 画一个空白的中国地图
> p <- ggplot() +
+ geom_polygon(data=china,aes(long,lat,group=group),col="gray40",fill="white") +
+ labs(title = "疫情地图",x="",y="") +
+ theme(panel.grid = element_blank(),
+ panel.background = element_blank(),
+ axis.text = element_blank(),
+ axis.ticks = element_blank(),
+ legend.position = "none")
>
> china.data2 <- covid %>% filter(region=="China" | region=="Taiwan*") %>%
+ select(-c("region","province"))
> saveGIF(
+ for(i in seq(3,98,by=11)) {
+ data <- china.data2[,c(1,2,i)] %>% as.data.frame()
+ names(data)[3] <- "vol"
+ pic <- p + geom_jitter(aes(Long,Lat,size=vol),
+ data=data,shape=19,col="red") +
+ scale_size(range = c(1.2,10)) +
+ labs(title = names(china.data2)[i],x="",y="")
+ print(pic)
+ })