数据源:丁香园
library(REmap)
library(baidumap)
require(rvest)
require(magrittr)
library(dplyr)
library(tidyr)
library(stringr)
library(knitr)
library(ggplot2)
library(ggthemes)
library(echarts4r)
library(data.table)
library(downloader)
#数据源网址
url <- 'https://3g.dxy.cn/newh5/view/pneumonia'
#city&province 数据爬取
c <- read_html(url) %>%
html_nodes("#getAreaStat") %>%
html_text()
#total 数据爬取
total_data <- read_html(url) %>%
html_nodes("#getStatisticsService") %>%
html_text()
#total info ;正则
total_info <- str_extract_all(total_data,'(?<=confirmedCount\\"\\:).+?(?=\\"virus)')
total_data <- data.frame(x = gsub('[^0-9]',' ',total_info )) %>%
separate(x, c('confirmedCount','suspectedCount','curedCount','deadCount')) %>%
mutate(id = 1:nrow(.)) %>%
melt(length(names(.)))
#province info ;正则
province_info <- str_extract_all(c,'(?=provinceShortName\\"\\:).+?(?=\\,\\"cities)')
province_name_info <- str_extract_all(province_info[[1]],'(?<=provinceShortName\\"\\:\\").+?(?=\\"\\,)') %>% unlist()
province_comment_info <- str_extract_all(province_info[[1]],'(?<=comment\\"\\:).+?(?=\\")') %>% unlist()
province_other_info <- data.frame(x = gsub("[^0-9]"," ",province_info[[1]])) %>%
separate(x, c('provincename','confirmedCount','suspectedCount','curedCount','deadCount','comment')) %>%
mutate(provincename = as.character(province_name_info),
comment = as.character(province_comment_info),
confirmedCount = as.numeric(confirmedCount),
record_date = Sys.Date())
province_other_info %>% head()
#city info ;正则
cities_all_info <- str_extract_all(c,'(?=cityName\\"\\:).+?(?=\\}\\,)')
city_name_info <- str_extract_all(cities_all_info[[1]],'(?<=cityName\\"\\:\\").+?(?=\\"\\,)') %>% unlist()
city_other_info <- data.frame(x = gsub("[^0-9]"," ",cities_all_info[[1]])) %>%
separate(x, c('cityname','confirmedCount','suspectedCount','curedCount','deadCount')) %>%
mutate(cityname = as.character(city_name_info),
confirmedCount = as.numeric(confirmedCount),
record_date = Sys.Date())
city_other_info %>% head()
cur_date <- as.character(Sys.Date())
#省份密度图\迁徙图; 该图会直接显示到新HTML页面中,如果需要显示在Rmarkdown里,可以制作成gif,然后嵌入Rmarkdown即可
province_REmap <- province_other_info %>%
arrange(suspectedCount) %>%
mutate(city_wuhan = '湖北') %>%
as.data.frame()
#如果生成的地图位置异常,再次运行API设置即可
options(remap.ak = 'your baidumapde API KEY')
REmap_feiyan <- remapC(province_other_info[,1:2],
maptype = "china",
title=paste0("2019-nCov_map_",cur_date),
theme = get_theme("Bright"),
color=c("#CD0000","#FFEC8B"),
#显示5个城市的迁徙图
markLineData=province_REmap[2:6,c('city_wuhan','provincename')],
markLineTheme=markLineControl(
color="black",
lineWidth=2,
lineType="dashed"
),
markPointData=province_REmap[2:6,'provincename'],
markPointTheme=markPointControl(
symbolSize=13,
effect=T,
effectType="scale",
color="red"
)
)
options(remap.js.web=T) #动态网页图保存命令
plot(REmap_feiyan) #保存的同时自动调用浏览器窗口
province_other_info_melt <- province_other_info %>% melt(1)
province_other_info_melt$value <- as.numeric(province_other_info_melt$value)
#数据汇总
total_data %>%
e_charts(variable) %>%
e_bar(value) %>%
e_title(paste0('2019-nCov_summary_',cur_date)) %>%
e_labels() %>%
e_tooltip()
#基于省份汇总
province_other_info_melt %>%
filter(variable %in% c('confirmedCount','deadCount','curedCount','suspectedCount')) %>%
group_by(provincename,variable) %>%
summarise(sum_count = sum(value)) %>%
ungroup() %>%
group_by(variable) %>%
arrange(desc(sum_count)) %>%
e_charts(provincename, timeline = T) %>%
e_bar(sum_count) %>%
e_title(paste0('2019-nCov_city_',cur_date)) %>%
e_labels() %>%
e_x_axis(axisLabel = list(interval = 0, rotate = 45)) %>%
e_tooltip() %>%
e_timeline_opts(autoPlay = T)
#这部分由于无累积数据,因此直接调取网页图表
#地理热图
#下载地理热图
download('https://img1.dxycdn.com/2020/0201/450/3394153392393266839-135.png',paste0('picture/',cur_date,'_map.jpg'), mode = "wb")
#导入Rmarkdown
![](picture/2020-02-02_map.jpg)
#时间线走势
download('https://img1.dxycdn.com/2020/0202/725/3394327332126027029-135.png',paste0('picture/',cur_date,'_sjx.jpg'), mode = "wb")
![](picture/2020-02-02_sjx.jpg)