本文采用爬虫、AR模型以及pyrcharts技术,实现从百度上取疫情数据,并将数据用于训练模型,最后将模型的预测结果可视化展示,代码如下
from random import random
import pandas as pd
import requests
import numpy as np
import pyecharts.options as opts
from pyecharts.globals import ThemeType
from pyecharts.commons.utils import JsCode
from pyecharts.charts import Timeline, Grid, Bar, Map, Pie, Line
import datetime
from typing import List
from datetime import date
from requests.models import codes
from lxml import etree
from urllib import parse
from statsmodels.tsa.arima.model import ARIMA
url = "https://api.inews.qq.com/newsqa/v1/query/inner/publish/modules/list?modules=chinaDayList,chinaDayAddList,nowConfirmStatis,provinceCompare"
headers = {
"Referer": "https://news.qq.com/",
"Host": "api.inews.qq.com",
"Origin": "https://news.qq.com",
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/88.0.4324.182 Safari/537.36"
}
response = requests.post(url, headers=headers)
datas = eval(response.text)
datas = datas['data']['provinceCompare']
key = list(datas.keys())
provinceAdd = {}
for i in key:
temp = []
urls = "https://api.inews.qq.com/newsqa/v1/query/pubished/daily/list?province=%s&"
URL = urls % (parse.quote(i))
responseProvince = eval(requests.post(URL, headers=headers).text)[
'data'][-30:-1]
for j in responseProvince:
temp.append(int(j['confirm_add']))
provinceAdd[i] = temp
"""时间序列模型"""
def AR(data):
result = []
for i in range(7):
model = ARIMA(data, order=[2, 0, 1])
model_fit = model.fit()
yhat = round(model_fit.predict(len(data), len(data))[0])
data.append(yhat)
del data[0]
result.append(yhat)
return result
result = {}
for i in key:
data = provinceAdd[i]
result[i] = AR(data)
total_num = []
DATA = []
for i in range(7):
temp = {}
dates = datetime.date.today()+datetime.timedelta(days=i)
temp["time"] = ("%s年%s月%s日" % (dates.year, dates.month, dates.day))
temp['data'] = []
num = 0
for j in key:
num = num+result[j][i]
total_num.append(num)
for j in key:
temp_data = {}
temp_data["name"] = j
temp_data["value"] = [result[j][i], round(result[j][i]/num, 4), j]
temp['data'].append(temp_data)
DATA.append(temp)
time_list = [
("%s年%s月%s日" % ((datetime.date.today()+datetime.timedelta(days=d)).year, (datetime.date.today()+datetime.timedelta(days=d)).month, (datetime.date.today()+datetime.timedelta(days=d)).day)) for d in range(7)]
maxNum = 300
minNum = 0
def get_year_chart(year: str):
map_data = [
[[x["name"], x["value"]] for x in d["data"]] for d in DATA if d["time"] == year
][0]
min_data, max_data = (minNum, maxNum)
data_mark: List = []
i = 0
for x in time_list:
if x == year:
data_mark.append(total_num[i])
else:
data_mark.append("")
i = i + 1
map_chart = (
Map()
.add(
series_name="",
data_pair=map_data,
zoom=1,
center=[119.5, 34.5],
is_map_symbol_show=False,
itemstyle_opts={
"normal": {"areaColor": "#323c48", "borderColor": "#404a59"},
"emphasis": {
"label": {"show": Timeline},
"areaColor": "rgba(255,255,255, 0.5)",
},
},
)
.set_global_opts(
title_opts=opts.TitleOpts(
title="预测" + str(year) + "全国分地区新冠患病人数",
subtitle="",
pos_left="center",
pos_top="top",
title_textstyle_opts=opts.TextStyleOpts(
font_size=25, color="rgba(255,255,255, 0.9)"
),
),
tooltip_opts=opts.TooltipOpts(
is_show=True,
formatter=JsCode(
"""function(params) {
if ('value' in params.data) {
return params.data.value[2] + ': ' + params.data.value[0];
}
}"""
),
),
visualmap_opts=opts.VisualMapOpts(
is_calculable=True,
dimension=0,
pos_left="30",
pos_top="center",
range_text=["High", "Low"],
range_color=["lightskyblue", "yellow", "orangered"],
textstyle_opts=opts.TextStyleOpts(color="#ddd"),
min_=min_data,
max_=max_data,
),
)
)
line_chart = (
Line()
.add_xaxis(time_list)
.add_yaxis("", total_num)
.add_yaxis(
"",
data_mark,
markpoint_opts=opts.MarkPointOpts(
data=[opts.MarkPointItem(type_="max")]),
)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
title_opts=opts.TitleOpts(
title="预测"+str(datetime.date.today())+"~"+str(datetime.timedelta(days=6))+"号全国新冠患者人数", pos_left="72%", pos_top="5%"
)
)
)
bar_x_data = [x[0] for x in map_data]
bar_y_data = [{"name": x[0], "value": x[1][0]} for x in map_data]
bar = (
Bar()
.add_xaxis(xaxis_data=bar_x_data)
.add_yaxis(
series_name="",
y_axis=bar_y_data,
label_opts=opts.LabelOpts(
is_show=True, position="right", formatter="{b} : {c}"
),
)
.reversal_axis()
.set_global_opts(
xaxis_opts=opts.AxisOpts(
max_=maxNum, axislabel_opts=opts.LabelOpts(is_show=False)
),
yaxis_opts=opts.AxisOpts(
axislabel_opts=opts.LabelOpts(is_show=False)),
tooltip_opts=opts.TooltipOpts(is_show=False),
visualmap_opts=opts.VisualMapOpts(
is_calculable=True,
dimension=0,
pos_left="10",
pos_top="top",
range_text=["High", "Low"],
range_color=["lightskyblue", "yellow", "orangered"],
textstyle_opts=opts.TextStyleOpts(color="#ddd"),
min_=min_data,
max_=max_data,
),
)
)
pie_data = [[x[0], x[1][0]] for x in map_data]
pie = (
Pie()
.add(
series_name="",
data_pair=pie_data,
radius=["15%", "35%"],
center=["80%", "82%"],
itemstyle_opts=opts.ItemStyleOpts(
border_width=1, border_color="rgba(0,0,0,0.3)"
),
)
.set_global_opts(
tooltip_opts=opts.TooltipOpts(is_show=True, formatter="{b} {d}%"),
legend_opts=opts.LegendOpts(is_show=False),
)
)
grid_chart = (
Grid()
.add(
bar,
grid_opts=opts.GridOpts(
pos_left="10", pos_right="45%", pos_top="50%", pos_bottom="5"
),
)
.add(
line_chart,
grid_opts=opts.GridOpts(
pos_left="65%", pos_right="80", pos_top="10%", pos_bottom="50%"
),
)
.add(pie, grid_opts=opts.GridOpts(pos_left="45%", pos_top="60%"))
.add(map_chart, grid_opts=opts.GridOpts())
)
return grid_chart
if __name__ == "__main__":
timeline = Timeline(
init_opts=opts.InitOpts(
width="1600px", height="900px", theme=ThemeType.DARK)
)
for y in time_list:
g = get_year_chart(year=y)
timeline.add(g, time_point=str(y))
timeline.add_schema(
orient="vertical",
is_auto_play=True,
is_inverse=True,
play_interval=5000,
pos_left="null",
pos_right="5",
pos_top="20",
pos_bottom="20",
width="60",
label_opts=opts.LabelOpts(is_show=True, color="#fff"),
)
timeline.render("2018.html")
最后生成的是一个html文件,展示结果如下: