71-R语言新冠疫情地图可视化

1、导入数据

> library(pacman)
> p_load(dplyr)
> covid <- readr::read_csv("./data_set/novel-corona-virus-2019-dataset/time_series_covid_19_confirmed.csv") %>%
+   rename(province=`Province/State`,region=`Country/Region`)
> names(covid)
##   [1] "province" "region"   "Lat"      "Long"     "1/22/20"  "1/23/20" 
##   [7] "1/24/20"  "1/25/20"  "1/26/20"  "1/27/20"  "1/28/20"  "1/29/20" 
##  [13] "1/30/20"  "1/31/20"  "2/1/20"   "2/2/20"   "2/3/20"   "2/4/20"  
##  [19] "2/5/20"   "2/6/20"   "2/7/20"   "2/8/20"   "2/9/20"   "2/10/20" 
##  [25] "2/11/20"  "2/12/20"  "2/13/20"  "2/14/20"  "2/15/20"  "2/16/20" 
##  [31] "2/17/20"  "2/18/20"  "2/19/20"  "2/20/20"  "2/21/20"  "2/22/20" 
##  [37] "2/23/20"  "2/24/20"  "2/25/20"  "2/26/20"  "2/27/20"  "2/28/20" 
##  [43] "2/29/20"  "3/1/20"   "3/2/20"   "3/3/20"   "3/4/20"   "3/5/20"  
##  [49] "3/6/20"   "3/7/20"   "3/8/20"   "3/9/20"   "3/10/20"  "3/11/20" 
##  [55] "3/12/20"  "3/13/20"  "3/14/20"  "3/15/20"  "3/16/20"  "3/17/20" 
##  [61] "3/18/20"  "3/19/20"  "3/20/20"  "3/21/20"  "3/22/20"  "3/23/20" 
##  [67] "3/24/20"  "3/25/20"  "3/26/20"  "3/27/20"  "3/28/20"  "3/29/20" 
##  [73] "3/30/20"  "3/31/20"  "4/1/20"   "4/2/20"   "4/3/20"   "4/4/20"  
##  [79] "4/5/20"   "4/6/20"   "4/7/20"   "4/8/20"   "4/9/20"   "4/10/20" 
##  [85] "4/11/20"  "4/12/20"  "4/13/20"  "4/14/20"  "4/15/20"  "4/16/20" 
##  [91] "4/17/20"  "4/18/20"  "4/19/20"  "4/20/20"  "4/21/20"  "4/22/20" 
##  [97] "4/23/20"  "4/24/20"  "4/25/20"  "4/26/20"

2、数据适配

因为REmap包中获取的国家(region)名称和我们数据中的名称可能不一致,需要手动将其一一对应。比如美国在我们数据中为US,而在REmap中为United States of America。

> covid$region[which(covid$region=="Antigua and Barbuda")] <- "Bermuda"
> covid$region[which(covid$region=="Czechia")] <- "Czech Republic"
> covid$region[which(covid$region=="Congo (Kinshasa)")] <- "Democratic Republic of the Congo"
> covid$region[which(covid$region=="Timor-Leste")] <- "East Timor"
> covid$region[which(covid$region=="Guinea-Bissau")] <- "Guinea Bissau"
> covid$region[which(covid$region=="Cote d'Ivoire")] <- "Ivory Coast"
> covid$region[which(covid$region=="North Macedonia")] <- "Macedonia"
> covid$region[which(covid$region=="Serbia")] <- "Republic of Serbia"
> covid$region[which(covid$region=="Congo (Brazzaville)")] <- "Republic of the Congo"
> covid$region[which(covid$region=="Korea, South")] <- "South Korea"
> covid$region[which(covid$region=="Eswatini")] <- "Swaziland"
> covid$region[which(covid$region=="Bahamas")] <- "The Bahamas"
> covid$region[which(covid$region=="Tanzania")] <- "United Republic of Tanzania"
> covid$region[which(covid$region=="US")] <- "United States of America"
> covid$region[which(covid$region=="West Bank and Gaza")] <- "West Bank"
> # covid$region[which(covid$region=="china")] <- "xianggang"

3、汇总数据

> # 按行求和
> covid.sel <- covid %>% 
+   reshape2::dcast(`province` + `region` + `Long` + `Lat` ~ .,
+                   fun.aggregate = sum) %>%
+   # 更名
+   rename(vol=".")
> str(covid.sel)
## 'data.frame':    264 obs. of  5 variables:
##  $ province: chr  "Alberta" "Anguilla" "Anhui" "Aruba" ...
##  $ region  : chr  "Canada" "United Kingdom" "China" "Netherlands" ...
##  $ Long    : num  -116.6 -63.1 117.2 -70 149 ...
##  $ Lat     : num  53.9 18.2 31.8 12.5 -35.5 ...
##  $ vol     : num  4480 3 991 100 106 ...

4、画图

> p_load(REmap)
> # 获取REmap中的国家名
> country <- data.frame(region=mapNames("world"))
> # 根据名称对应数据
> mapdata <- left_join(country,covid.sel,by="region") %>%
+   select(region,vol)
> str(mapdata)
## 'data.frame':    257 obs. of  2 variables:
##  $ region: chr  "Afghanistan" "Angola" "Albania" "United Arab Emirates" ...
##  $ vol   : num  1531 26 726 10349 3892 ...
> covid.world <- remapC(mapdata,maptype = "world",
+                       color = c('#FD0100','#FFB8B5'),
+                       theme = get_theme("dark"),
+                       title = "2019-nCoV全球分布图",
+                       subtitle = "截止2020年4月26日",)
> covid.world
## Save img as: C:\Users\Admin\AppData\Local\Temp\RtmpKs931s/ID_20200504184333_2814895.html
REmap地图

使用REmap包画的图会通过浏览器展示出来,鼠标放在某一区域,会自动显示相应数值。
灰色地带为region名称没有对应上的区域。

5、按时间序列画动态图

> p_load(animation,sp,maptools,ggplot2)
> # 读取中国地图的多边形数据
> china <- readShapePoly("./data_set/china_basic_map/bou2_4p.shp")
> # 提取省级名称
> province.name <- china$NAME %>% as.character()
> # 转换为数据框
> china.sel <- fortify(china) %>% select(long,lat,group,id)
> # 添加province列
> china.sel$province[!duplicated(china.sel$id)] <- province.name
> # 填充其他列
> china.sel <- tidyr::fill_(china.sel,fill_cols="province",.direction="down")
## 'data.frame':    89912 obs. of  5 variables:
##  $ long    : num  121 121 122 122 122 ...
##  $ lat     : num  53.3 53.3 53.3 53.3 53.3 ...
##  $ group   : Factor w/ 925 levels "0.1","1.1","2.1",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ id      : chr  "0" "0" "0" "0" ...
##  $ province: chr  "黑龙江省" "黑龙江省" "黑龙江省" "黑龙江省" ...
> # 抽取中国的疫情数据
> china.data <- covid %>% filter(region=="China" | region=="Taiwan*") %>%
+   select(-"region")
> 
> china.data$province <-
+   c("安徽省","北京市","重庆市","福建省","甘肃省","广东省",
+   "广西壮族自治区","贵州省","海南省","河北省","黑龙江省","河南省",
+   "香港特别行政区","湖北省","湖南省","内蒙古自治区","江苏省",
+   "江西省","吉林省","辽宁省","澳门特别行政区","宁夏回族自治区","青海省",
+   "陕西省","山东省","上海市","山西省","四川省","天津市",
+   "西藏自治区","新疆维吾尔自治区","云南省","浙江省","台湾省")
> 
> # 合并数据
> china.data <- china.data[,-c(2:3)] %>% 
+   right_join(china.sel,by="province")
> china.data <- select(china.data,-c("id"))
> str(china.data)
> # 设置播放速度及图片大小,转换器路径
> ani.options(interval=0.5,
+             convert=shQuote("C:/Program Files/ImageMagick-7.0.10-Q16/convert.exe"),
+             ani.width=800,ani.height=800)
> 
> saveGIF(
+  for(i in seq(2,97,by=11)) {
+     data <- china.data[,c(1,98:100,i)] %>% as.data.frame()
+     names(data)[5] <- "vol"
+     p <- ggplot(data,aes(long,lat,group=group,fill=vol)) +
+          geom_polygon(col="gray60") +
+          scale_fill_gradient(low="white",high="red") +
+          labs(title = names(china.data)[i],x="",y="") +
+          theme(panel.grid = element_blank(),
+                panel.background = element_blank(),
+                axis.text = element_blank(),
+                axis.ticks = element_blank(),
+                legend.position = "none")
+     print(p)
+   }
+ )
疫情时间序列图

因为湖北的数据太大,导致其他区域的填充颜色太浅。
这个地方需要注意的有两点:1、数据一定要转换为data.frame,因为目前还不支持tibble;2、图片的长宽比例需要手动调整,不然图片会很难看。

6、动态点图

> # 画一个空白的中国地图
> p <- ggplot() +
+   geom_polygon(data=china,aes(long,lat,group=group),col="gray40",fill="white") +
+   labs(title = "疫情地图",x="",y="") +
+   theme(panel.grid = element_blank(),
+        panel.background = element_blank(),
+        axis.text = element_blank(),
+        axis.ticks = element_blank(),
+        legend.position = "none")
>
> china.data2 <- covid %>% filter(region=="China" | region=="Taiwan*") %>%
+   select(-c("region","province"))

> saveGIF(
+   for(i in seq(3,98,by=11)) {
+     data <- china.data2[,c(1,2,i)] %>% as.data.frame()
+     names(data)[3] <- "vol"
+     pic <- p + geom_jitter(aes(Long,Lat,size=vol),
+                           data=data,shape=19,col="red") +
+       scale_size(range = c(1.2,10)) +
+       labs(title = names(china.data2)[i],x="",y="")
+     print(pic)
+   })
疫情时间序列点图

你可能感兴趣的:(71-R语言新冠疫情地图可视化)