基于R语言制作2019-nCov新型冠状病毒可视化报表

数据源:丁香园

* 基于丁香园官网数据,制作此次新型冠状病毒(2019-nCov)的疫情感染可视化图表

* 应用到的R语言包主要有:rvest&stringr(爬取数据并规整)、REmap(绘制地理密度图&迁徙图)、echarts4r(制作交互式图表,也可用ggplot2做可视化)

* 本文仅供进行技术交流,如有其他意见和建议还请评论或私信告知; 如有侵权请私信删除

* 未完待续。。。

基础数据爬取

library(REmap)
library(baidumap)
require(rvest)
require(magrittr)
library(dplyr)
library(tidyr)
library(stringr)
library(knitr)
library(ggplot2)
library(ggthemes)
library(echarts4r)
library(data.table)
library(downloader)

#数据源网址
url <- 'https://3g.dxy.cn/newh5/view/pneumonia'

#city&province  数据爬取
c <- read_html(url) %>% 
  html_nodes("#getAreaStat") %>% 
  html_text()

#total 数据爬取
total_data <- read_html(url) %>% 
  html_nodes("#getStatisticsService") %>% 
  html_text()

#total info ;正则
total_info <- str_extract_all(total_data,'(?<=confirmedCount\\"\\:).+?(?=\\"virus)')

total_data <- data.frame(x = gsub('[^0-9]',' ',total_info )) %>% 
   separate(x, c('confirmedCount','suspectedCount','curedCount','deadCount')) %>% 
  mutate(id = 1:nrow(.)) %>% 
  melt(length(names(.)))

#province info ;正则
province_info <- str_extract_all(c,'(?=provinceShortName\\"\\:).+?(?=\\,\\"cities)')

province_name_info <- str_extract_all(province_info[[1]],'(?<=provinceShortName\\"\\:\\").+?(?=\\"\\,)') %>% unlist()

province_comment_info <- str_extract_all(province_info[[1]],'(?<=comment\\"\\:).+?(?=\\")') %>% unlist()

province_other_info <- data.frame(x = gsub("[^0-9]"," ",province_info[[1]])) %>%
  separate(x, c('provincename','confirmedCount','suspectedCount','curedCount','deadCount','comment')) %>% 
  mutate(provincename = as.character(province_name_info),
         comment = as.character(province_comment_info),
         confirmedCount = as.numeric(confirmedCount),
         record_date = Sys.Date())

province_other_info %>% head()

#city info ;正则
cities_all_info <- str_extract_all(c,'(?=cityName\\"\\:).+?(?=\\}\\,)')

city_name_info <- str_extract_all(cities_all_info[[1]],'(?<=cityName\\"\\:\\").+?(?=\\"\\,)') %>% unlist()

city_other_info <- data.frame(x = gsub("[^0-9]"," ",cities_all_info[[1]])) %>%
  separate(x, c('cityname','confirmedCount','suspectedCount','curedCount','deadCount')) %>% 
  mutate(cityname = as.character(city_name_info),
         confirmedCount = as.numeric(confirmedCount),
         record_date = Sys.Date())

city_other_info %>% head()

设定日期

 cur_date <- as.character(Sys.Date())

可视化模块

REmap包-地图可视化


#省份密度图\迁徙图; 该图会直接显示到新HTML页面中,如果需要显示在Rmarkdown里,可以制作成gif,然后嵌入Rmarkdown即可

province_REmap <- province_other_info %>% 
  arrange(suspectedCount) %>% 
  mutate(city_wuhan = '湖北') %>% 
  as.data.frame() 

#如果生成的地图位置异常,再次运行API设置即可
options(remap.ak = 'your baidumapde API KEY')

REmap_feiyan <- remapC(province_other_info[,1:2],
                   maptype = "china",
                   title=paste0("2019-nCov_map_",cur_date),
                   theme = get_theme("Bright"),
                    color=c("#CD0000","#FFEC8B"), 
                  #显示5个城市的迁徙图
markLineData=province_REmap[2:6,c('city_wuhan','provincename')],   
                   markLineTheme=markLineControl(
                     color="black", 
                     lineWidth=2, 
                     lineType="dashed"  
                   ),
       
                   markPointData=province_REmap[2:6,'provincename'],
                   markPointTheme=markPointControl(
                     symbolSize=13,  
                      effect=T,
                      effectType="scale",
                     color="red"
                   )
)
options(remap.js.web=T)    #动态网页图保存命令
plot(REmap_feiyan)             #保存的同时自动调用浏览器窗口

基于R语言制作2019-nCov新型冠状病毒可视化报表_第1张图片

echarts4r 图表可视化


province_other_info_melt <- province_other_info %>% melt(1)
province_other_info_melt$value <- as.numeric(province_other_info_melt$value)

#数据汇总
total_data %>% 
  e_charts(variable) %>% 
  e_bar(value) %>% 
  e_title(paste0('2019-nCov_summary_',cur_date)) %>% 
  e_labels() %>% 
  e_tooltip()

#基于省份汇总
province_other_info_melt %>% 
  filter(variable %in% c('confirmedCount','deadCount','curedCount','suspectedCount')) %>% 
  group_by(provincename,variable) %>% 
  summarise(sum_count = sum(value)) %>% 
  ungroup() %>% 
  group_by(variable) %>% 
  arrange(desc(sum_count)) %>% 
  e_charts(provincename, timeline = T) %>% 
  e_bar(sum_count) %>%
  e_title(paste0('2019-nCov_city_',cur_date)) %>% 
  e_labels() %>% 
  e_x_axis(axisLabel = list(interval = 0, rotate = 45)) %>% 
  e_tooltip() %>% 
  e_timeline_opts(autoPlay = T)

基于R语言制作2019-nCov新型冠状病毒可视化报表_第2张图片
基于R语言制作2019-nCov新型冠状病毒可视化报表_第3张图片

地理热图 & 时间线走势

#这部分由于无累积数据,因此直接调取网页图表

#地理热图
#下载地理热图
download('https://img1.dxycdn.com/2020/0201/450/3394153392393266839-135.png',paste0('picture/',cur_date,'_map.jpg'), mode = "wb")
#导入Rmarkdown
![](picture/2020-02-02_map.jpg)

#时间线走势
download('https://img1.dxycdn.com/2020/0202/725/3394327332126027029-135.png',paste0('picture/',cur_date,'_sjx.jpg'), mode = "wb")

![](picture/2020-02-02_sjx.jpg)

基于R语言制作2019-nCov新型冠状病毒可视化报表_第4张图片
基于R语言制作2019-nCov新型冠状病毒可视化报表_第5张图片

你可能感兴趣的:(基于R语言制作2019-nCov新型冠状病毒可视化报表)