该项目参考B站视频:https://www.bilibili.com/video/BV177411j7qJ
基于Python+Flask+Echarts打造一个疫情监控系统,涉及到的技术:
数据获取(爬虫)>>数据持久化(MySQL)>>flask搭建Web后台>>数据可视化(html5+Echarts)
爬虫,就是给网站发起请求,并从响应中提取需要的数据
1、发起请求,获取响应
2、解析内容
3、保存数据
可以到各大平台爬取最终数据,https://news.qq.com//zt2020/page/feiyan.htm
import pymysql
import time
import json
import traceback #追踪异常
import requests
def get_tencent_data():
"""
:return: 返回历史数据和当日详细数据
"""
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
#腾讯的后台有改动,history表中需要的数据获取不到了,因为换了地方,要到另一个网址去找
url_his='https://view.inews.qq.com/g2/getOnsInfo?name=disease_other'
#最基本的反爬虫
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
}
r = requests.get(url, headers) #使用requests请求
res = json.loads(r.text) # json字符串转字典
data_all = json.loads(res['data'])
#再加上history的配套东西
r_his=requests.get(url_his,headers)
res_his=json.loads(r_his.text)
data_his=json.loads(res_his['data'])
history = {
} # 历史数据
for i in data_his["chinaDayList"]:
ds = "2020." + i["date"]
tup = time.strptime(ds, "%Y.%m.%d")
ds = time.strftime("%Y-%m-%d", tup) # 改变时间格式,不然插入数据库会报错,数据库是datetime类型
confirm = i["confirm"]
suspect = i["suspect"]
heal = i["heal"]
dead = i["dead"]
history[ds] = {
"confirm": confirm, "suspect": suspect, "heal": heal, "dead": dead}
for i in data_his["chinaDayAddList"]:
ds = "2020." + i["date"]
tup = time.strptime(ds, "%Y.%m.%d")
ds = time.strftime("%Y-%m-%d", tup)
confirm = i["confirm"]
suspect = i["suspect"]
heal = i["heal"]
dead = i["dead"]
history[ds].update({
"confirm_add": confirm, "suspect_add": suspect, "heal_add": heal, "dead_add": dead})
details = [] # 当日详细数据
update_time = data_all["lastUpdateTime"]
data_country = data_all["areaTree"] # list 25个国家
data_province = data_country[0]["children"] # 中国各省
for pro_infos in data_province:
province = pro_infos["name"] # 省名
for city_infos in pro_infos["children"]:
city = city_infos["name"]
confirm = city_infos["total"]["confirm"]
confirm_add = city_infos["today"]["confirm"]
heal = city_infos["total"]["heal"]
dead = city_infos["total"]["dead"]
details.append([update_time, province, city, confirm, confirm_add, heal, dead])
return history, details
创建数据库cov,然后建两张表
CREATE TABLE
history
(
ds
datetime NOT NULL COMMENT ‘日期’,
confirm
int(11) DEFAULT NULL COMMENT ‘累计确诊’,
confirm_add
int(11) DEFAULT NULL COMMENT ‘当日新增确诊’,
suspect
int(11) DEFAULT NULL COMMENT ‘剩余疑似’,
suspect_add
int(11) DEFAULT NULL COMMENT ‘当日新增疑似’,
heal
int(11) DEFAULT NULL COMMENT ‘累计治愈’,
heal_add
int(11) DEFAULT NULL COMMENT ‘当日新增治愈’,
dead
int(11) DEFAULT NULL COMMENT ‘累计死亡’,
dead_add
int(11) DEFAULT NULL COMMENT ‘当日新增死亡’,
PRIMARY KEY (ds
) USING BTREE ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
CREATE TABLE
details
(
id
int(11) NOT NULL AUTO_INCREMENT,
update_time
datetime DEFAULT NULL COMMENT ‘数据最后更新时间’,
province
varchar(50) DEFAULT NULL COMMENT ‘省’,
city
varchar(50) DEFAULT NULL COMMENT ‘市’,
confirm
int(11) DEFAULT NULL COMMENT ‘累计确诊’,
confirm_add
int(11) DEFAULT NULL COMMENT ‘新增确诊’,
heal
int(11) DEFAULT NULL COMMENT ‘累计治愈’,
dead
int(11) DEFAULT NULL COMMENT ‘累计死亡’,
PRIMARY KEY (id
) ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4;
def get_conn():
"""
:return: 连接,游标
"""
# 创建连接
conn = pymysql.connect(host="localhost",
user="root",
password="123456",
db="cov",
charset="utf8")
# 创建游标
cursor = conn.cursor() # 执行完毕返回的结果集默认以元组显示
return conn, cursor
def close_conn(conn, cursor):
if cursor:
cursor.close()
if conn:
conn.close()
def update_details():
"""
更新 details 表
:return:
"""
cursor = None
conn = None
try:
li = get_tencent_data()[1] # 0 是历史数据字典,1 最新详细数据列表
conn, cursor = get_conn()
sql = "insert into details(update_time,province,city,confirm,confirm_add,heal,dead) values(%s,%s,%s,%s,%s,%s,%s)"
sql_query = 'select %s=(select update_time from details order by id desc limit 1)' #对比当前最大时间戳
cursor.execute(sql_query,li[0][0])
if not cursor.fetchone()[0]:
print(f"{time.asctime()}开始更新最新数据")
for item in li:
cursor.execute(sql, item)
conn.commit() # 提交事务 update delete insert操作
print(f"{time.asctime()}更新最新数据完毕")
else:
print(f"{time.asctime()}已是最新数据!")
except:
traceback.print_exc()
finally:
close_conn(conn, cursor)
def insert_history():
"""
插入历史数据
:return:
"""
cursor = None
conn = None
try:
dic = get_tencent_data()[0] # 0 是历史数据字典,1 最新详细数据列表
print(f"{time.asctime()}开始插入历史数据")
conn, cursor = get_conn()
sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
for k, v in dic.items():
# item 格式 {'2020-01-13': {'confirm': 41, 'suspect': 0, 'heal': 0, 'dead': 1}
cursor.execute(sql, [k, v.get("confirm"), v.get("confirm_add"), v.get("suspect"),
v.get("suspect_add"), v.get("heal"), v.get("heal_add"),
v.get("dead"), v.get("dead_add")])
conn.commit() # 提交事务 update delete insert操作
print(f"{time.asctime()}插入历史数据完毕")
except:
traceback.print_exc()
finally:
close_conn(conn, cursor)
def update_history():
"""
更新历史数据
:return:
"""
cursor = None
conn = None
try:
dic = get_tencent_data()[0] # 0 是历史数据字典,1 最新详细数据列表
print(f"{time.asctime()}开始更新历史数据")
conn, cursor = get_conn()
sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
sql_query = "select confirm from history where ds=%s"
for k, v in dic.items():
# item 格式 {'2020-01-13': {'confirm': 41, 'suspect': 0, 'heal': 0, 'dead': 1}
if not cursor.execute(sql_query, k):
cursor.execute(sql, [k, v.get("confirm"), v.get("confirm_add"), v.get("suspect"),
v.get("suspect_add"), v.get("heal"), v.get("heal_add"),
v.get("dead"), v.get("dead_add")])
conn.commit() # 提交事务 update delete insert操作
print(f"{time.asctime()}历史数据更新完毕")
except:
traceback.print_exc()
finally:
close_conn(conn, cursor)
insert_history()
update_details()
Flask是一个使用Python编写的轻量级Web应用框架。
安装:pip install flask
基本格式:
$.ajax({
type:"post", #请求类型
url:"/目标路由", #请求地址
data:{
"k1":"v1","k2":"v2"}, #数据
datatype:"json", #返回的数据类型
success:function(datas) {
#请求成功的回调函数,datas是返回的数据
},
error:function() {
#请求失败时执行
}
})
ECharts,缩写来自 Enterprise Charts,商业级数据图表,是百度的一个开源的数据可视 化工具,提供了丰富的图表库,能够在 PC 端和移动设备上流畅运行
官网网站:https://echarts.apache.org/zh/index.html
def get_c1_data():
"""
:return: 返回大屏div id=c1 的数据
"""
# 因为会更新多次数据,取时间戳最新的那组数据
sql = "select sum(confirm)," \
"(select suspect from history order by ds desc limit 1)," \
"sum(heal)," \
"sum(dead) " \
"from details " \
"where update_time=(select update_time from details order by update_time desc limit 1) "
res = query(sql)
res_list = [str(i) for i in res[0]]
res_tuple=tuple(res_list)
return res_tuple
1、复制中国地图option,导入china.js
2、获取数据
def get_c2_data():
"""
:return: 返回各省数据
"""
# 因为会更新多次数据,取时间戳最新的那组数据
sql = "select province,sum(confirm) from details " \
"where update_time=(select update_time from details " \
"order by update_time desc limit 1) " \
"group by province"
res = query(sql)
return res
1、复制折线图option
2、获取数据
def get_l1_data():
"""
:return:返回每天历史累计数据
"""
sql = "select ds,confirm,suspect,heal,dead from history"
res = query(sql)
return res
1、复制折线图option
2、获取数据
def get_l2_data():
"""
:return:返回每天新增确诊和疑似数据
"""
sql = "select ds,confirm_add,suspect_add from history"
res = query(sql)
return res
1、复制柱状图option
2、获取数据
def get_r1_data():
"""
:return: 返回非湖北地区城市确诊人数前5名
"""
sql = 'SELECT city,confirm FROM ' \
'(select city,confirm from details ' \
'where update_time=(select update_time from details order by update_time desc limit 1) ' \
'and province not in ("湖北","北京","上海","天津","重庆") ' \
'union all ' \
'select province as city,sum(confirm) as confirm from details ' \
'where update_time=(select update_time from details order by update_time desc limit 1) ' \
'and province in ("北京","上海","天津","重庆") group by province) as a ' \
'ORDER BY confirm DESC LIMIT 5'
res = query(sql)
return res
1、复制词云图option,导入wordcloud.js,还要pip install jieba
2、获取数据
def get_r2_data():
"""
:return: 返回最近的20条热搜
"""
sql = 'select content from hotsearch order by id desc limit 20'
res = query(sql) # 格式 (('民警抗疫一线奋战16天牺牲1037364',), ('四川再派两批医疗队1537382',)
return res