利用爬虫实现全国疫情实时监控(手机版)

文中分为六个部分,介绍如何用python等工具实现定时爬虫,完成全国疫情实时监控项目。主要用到工具:PyCharm (代码编写),python(版本为3.7.6,后台代码结构设计),Hbuilder X(2.5.1)(网页前端设计),MySQL(8.0)/MySQL workbench(数据库建设),jupyter notebook(实时代码测试),secure CRT(云服务器连接),WinSCP(远程连接云服务器,传输数据)。

一 获取数据

主要获取腾讯数据中历史数据以及详细城市数据,并将之储存在MySQL中,代码如下。

# -*- coding:utf-8 -*-
"""
引入所需的库,这些均需要自行安装
"""
import pymysql
import time
import json
import traceback
import requests


def get_tencent_data():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/80.0.3987.116 Safari/537.36'}
    r = requests.get(url, headers)
    res = json.loads(r.text)
    data_all = json.loads(res['data'])

    history = {}
    update_time = data_all['lastUpdateTime']
    ds = update_time
    # ds = '2020.' + time[0]
    # tup = time.strptime(ds, '%Y.%m.%d')
    # ds = time.strftime('%Y-%m-%d', tup)
    datanew = data_all['chinaTotal']
    confirm = datanew['confirm']
    heal = datanew['heal']
    dead = datanew['dead']
    nowConfirm = datanew['nowConfirm']
    suspect = datanew['suspect']
    nowSevere = datanew['nowSevere']
    history[ds] = {'confirm': confirm, 'heal': heal, 'dead': dead, 'nowConfirm': nowConfirm,
                   'suspect': suspect, 'nowSevere': nowSevere}

    details = []
    update_time = data_all['lastUpdateTime']
    data_country = data_all['areaTree']
    data_province = data_country[0]['children']
    for pro_infos in data_province:
        province = pro_infos['name']
        for city_infos in pro_infos['children']:
            city = city_infos['name']
            confirm = city_infos['total']['confirm']
            confirm_add = city_infos['today']['confirm']
            heal = city_infos['total']['heal']
            dead = city_infos['total']['dead']
            details.append([update_time, province, city, confirm, confirm_add, heal, dead])
    return history, details


def get_conn():
    conn = pymysql.connect(host='212.64.83.45',
                           user='root',
                           password='cb322468437',
                           db='cov',
                           charset='utf8')
    cursor = conn.cursor()
    return conn, cursor


def close_conn(conn, cursor):
    if cursor:
        cursor.close()
    if conn:
        conn.close()


def updata_details():
    cursor = None
    conn = None
    try:
        li = get_tencent_data()[1]
        conn, cursor = get_conn()
        sql = "insert into details(update_time,province,city,confirm,confirm_add,heal,dead) values(%s,%s,%s,%s,%s,%s,%s)"
        sql_query = "select %s=(select update_time from details order by id desc limit 1)"
        cursor.execute(sql_query, li[0][0])
        if not cursor.fetchone()[0]:
            print(f'{time.asctime()}开始更新最新数据')
            for item in li:
                cursor.execute(sql, item)
            conn.commit()
            print(f'{time.asctime()}更新最新数据完毕')
        else:
            print(f'{time.asctime()}已是最新数据')
    except:
        traceback.print_exc()
    finally:
        close_conn(conn, cursor)


def insert_history():
    cursor = None
    conn = None
    try:
        dic = get_tencent_data()[0]
        print(f"{time.asctime()}开始插入历史数据")
        conn, cursor = get_conn()
        sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s)"
        for k, v in dic.items():
            cursor.execute(sql,
                           [k, v.get("confirm"), v.get("heal"), v.get("dead"), v.get("nowConfirm"), v.get("suspect"),
                            v.get("nowSevere")])
        conn.commit()
        print(f"{time.asctime()}插入历史数据完毕")
    except:
        traceback.print_exc()
    finally:
        close_conn(conn, cursor)


def update_history():
    cursor = None
    conn = None
    try:
        dic = get_tencent_data()[0]
        print(f'{time.asctime()}开始更新历史数据')
        conn, cursor = get_conn()
        sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s)"
        sql_query = "select confirm from history where ds=%s"
        for k, v in dic.items():
            if not cursor.execute(sql_query, k):
                cursor.execute(sql, [k, v.get("confirm"), v.get("heal"), v.get("dead"), v.get("nowConfirm"),
                                     v.get("suspect"), v.get("nowSevere")])
        conn.commit()
        print(f'{time.asctime()}历史数据更新完毕')
    except:
        traceback.print_exc()
    finally:
        close_conn(conn, cursor)


update_history()
updata_details()


其中*def get_conn():*及close_conn(conn,cursor):两个函数后期常用到,不再提及。

二 可视化布局

利用爬虫实现全国疫情实时监控(手机版)_第1张图片
图示为最终布局情况,HTML代码及相应css文件如下,使用python中Flask项目。
项目结构如下:
利用爬虫实现全国疫情实时监控(手机版)_第2张图片
cov.html代码:

<!DOCTYPE html>
<html>
	<head>
		<meta charset="utf-8">
		<title>疫情监控追踪</title>
		<script src="../static/js/jquery-3.4.1.min.js"></script>
		<script src="../static/js/echarts.min.js"></script>
		<script src="../static/js/china.js"></script>
		<link href="../static/css/main.css" rel="stylesheet"/>
	</head>
	<body>
		<div id="title">全国疫情实时追踪</div>
		<div id="tim"></div>
		<div id="l1">我是左一</div>
		<div id="l2">我是左二</div>
		<div id="c1">
			<div class="num"><h1></h1></div>
			<div class="num"><h1></h1></div>
			<div class="num"><h1></h1></div>
			<div class="num"><h1></h1></div>
            <div class="num"><h1></h1></div>
			<div class="num"><h1></h1></div>
			<div class="txt"><h2>累计确诊</h2></div>
			<div class="txt"><h2>累计治愈</h2></div>
			<div class="txt"><h2>累计死亡</h2></div>
			<div class="txt"><h2>现存确诊</h2></div>
            <div class="txt"><h2>现存疑似</h2></div>
			<div class="txt"><h2>现存重症</h2></div>
		</div>
		<div id="c2">我是中二</div>
		<div id="c3">C O P Y R I G H T @ C H  E N  G B I N 2 0 2 0</div>
		<div id="r1">我是右一</div>
		<div id="r2">我是右二</div>
        <script src="../static/js/ec_center.js"></script>
		<script src="../static/js/ec_left1.js"></script>
		<script src="../static/js/ec_left2.js"></script>
		<script src="../static/js/ec_right1.js"></script>
		<script src="../static/js/ec_right2.js"></script>
	    <script src="../static/js/controller.js"> </script>
	</body>
</html>

main.cs代码(图片1自己准备):

body{
	background-image: url(images/1.jpg);
}
#title{
	position: absolute;
	width: 100%;
	height: 20%;
	top: 0;
	left: 0;
	color: white;
	font-size: 5rem;
	display: flex;
	align-items: center;
	justify-content: center;
/* 	background: #333; */
}
#tim{
	position: absolute;
	height: 10%;
	top: 3%;
	right: 2%;
        color: #FFFFFF;
	font-size: 1.6rem;
}
#c1{
	position: absolute;
	width: 100%;
	height: 16%;
	top: 15%;
	left: 0;
/* 	background: #333; */
}
.num{
	width: 16.6666%;
	float: left;
	display: flex;
	align-items: center;
	justify-content: center;
	color: gold;
	font-size: 1.5rem;
}
.txt{
	width: 16.6666%;
	float: left;
	font-family: "幼圆";
	font-size: 14px;
	display: flex;
	align-items: center;
	justify-content: center;
	color: white;
}
.txt h2 {
	margin: 0;
}

#c2{
	position: absolute;
	width: 100%;
	height: 40%;
	top: 24.9999%;
	left: 0;
/* 	background: #333; */
}
#c3{
	position: absolute;
	width: 100%;
	height: 3%;
	top: 219%;
	left: 0;
	color: white;
	font-size: 0.6rem;
	display: flex;
	align-items: center;
	justify-content: center;
/* 	background: #333; */
}
#l1{
	position: absolute;
	width: 100%;
	height: 40%;
	top: 64.999%;
	left: 0%;
/* 	background: #333; */
}
#l2{
	position: absolute;
	width: 100%;
	height: 40%;
	top: 102%;
	left: 0%;
/* 	background: #333; */
}
#r1{
	position: absolute;
	width: 100%;
	height: 40%;
	top: 141%;
	left: 0;
/* 	background: #333; */
}
#r2{
	position: absolute;
	width: 100%;
	height: 40%;
	top: 180%;
	left: 0;
/* 	background: #333; */
}

三 关键数字、时间更新及Echarts设计

1/时间的更新
通过前端/后台的调用,实现时间更新(右上角,数据来自html本身)。
利用爬虫实现全国疫情实时监控(手机版)_第3张图片
2/总体概要数据更新(数据来自第一部分获取数据history)
利用爬虫实现全国疫情实时监控(手机版)_第4张图片
3/全国地图及现存确诊病例更新(数据来自第一部分获取数据details)
利用爬虫实现全国疫情实时监控(手机版)_第5张图片
用到china.js文件,自行在echarts官网下载。

4/饼图设计(数据来自第一部分获取数据history)
利用爬虫实现全国疫情实时监控(手机版)_第6张图片
图像设计皆用到Echarts。

5/贵州省现存病例设计(数据来自第一部分details)
利用爬虫实现全国疫情实时监控(手机版)_第7张图片
6/现存病例及疑似病例趋势设计(数据来自国家卫健委,手动)
利用爬虫实现全国疫情实时监控(手机版)_第8张图片
7/累计确诊(非湖北)TOP5城市设计(数据来自第一部分details)
利用爬虫实现全国疫情实时监控(手机版)_第9张图片
8/最终布局(已经启动服务器更新数据)

js代码如下:

(1)控制台js

function gettime(){
	$.ajax({
	url:"/time",
	timeout:10000,//超时时间设置为10秒;
	success:function(data){
		$("#tim").html(data)
	},
	error:function(xhr,type,errorThrown){

	},
});
}

function get_c1_data(){
    $.ajax({
	url:"/c1",
	timeout:10000,//超时时间设置为10秒;
	success:function(data){
		$(".num h1").eq(0).text(data.confirm);
		$(".num h1").eq(1).text(data.heal);
		$(".num h1").eq(2).text(data.dead);
		$(".num h1").eq(3).text(data.nowConfirm);
		$(".num h1").eq(4).text(data.suspect);
		$(".num h1").eq(5).text(data.nowSevere);
	},
	error:function(xhr,type,errorThrown){

	},
});
}

function get_c2_data(){
    $.ajax({
	url:"/c2",
	timeout:10000,//超时时间设置为10秒;
	success:function(data){
		ec_center_option.series[0].data=data.data
		ec_center.setOption(ec_center_option)
	},
	error:function(xhr,type,errorThrown){

	},
});
}
function get_l1_data(){
    $.ajax({
	url:"/l1",
	timeout:10000,//超时时间设置为10秒;
	success:function(data){
		ec_left1_option.series[0].data=data.data
		ec_left1.setOption(ec_left1_option)
	},
	error:function(xhr,type,errorThrown){

	},
});
}
function get_l2_data(){
    $.ajax({
	url:"/l2",
	timeout:10000,//超时时间设置为10秒;
	success:function(data){
		ec_left2_option.series[0].data=data.confirm
		ec_left2.setOption(ec_left2_option)
	},
	error:function(xhr,type,errorThrown){

	},
});
}
function get_r1_data(){
    $.ajax({
	url:"/r1",
	timeout:10000,//超时时间设置为10秒;
	success:function(data){
		ec_right1_option.xAxis[0].data=data.dtime
		ec_right1_option.series[0].data=data.confirm
		ec_right1_option.series[1].data=data.heal
		ec_right1_option.series[2].data=data.dead
		ec_right1_option.series[3].data=data.nowConfirm
		ec_right1.setOption(ec_right1_option)
	},
	error:function(xhr,type,errorThrown){

	},
});
}
function get_r2_data(){
    $.ajax({
	url:"/r2",
	timeout:10000,//超时时间设置为10秒;
	success:function(data){
		ec_right2_option.xAxis.data=data.city
		ec_right2_option.series[0].data=data.confirm
		ec_right2.setOption(ec_right2_option)
	},
	error:function(xhr,type,errorThrown){

	},
});
}

gettime()
get_c1_data()
get_c2_data()
get_l1_data()
get_l2_data()
// get_r1_data()
get_r2_data()

setInterval(gettime,1000)
setInterval(get_c1_data,100000)
setInterval(get_c2_data,100000)
setInterval(get_l1_data,100000)
setInterval(get_l2_data,100000)
// setInterval(get_r1_data,1000)
setInterval(get_r2_data,100000)

(2)ec_center.js

var ec_center = echarts.init(document.getElementById('c2'))

var mydata = [{'name':'上海', 'value': 318}, {'name':'云南', 'value': 162}]

var ec_center_option = {
    title: {
        text: '各省现存病例情况',
		left: 'center',
		textStyle:{
			color:'white',
            fontSize: 30,
		},
    },
	tooltip: {
		trigger:'item'
	},
	visualMap: {
		show:true,
		x:'left',
		y:'bottom',
		textStyle:{
			fontSize:15,
			color:'white',
		},
		splitList: [{start:0, end:0},
		    {start:1, end:49},
			{start:50, end:99},
			{start:100, end:499},
			{start:500, end:999},
			{start:1000, end:9999},
			{start:10000}],
		color: ['#1b0202',  '#4d1a1a', '#6b2f2f', '#874949', '#a56b6b', '#d7b7b7',  '#ffffff']
	},
	series: [{
		name:'现存确诊人数',
		type:'map',
		mapType:'china',
		roam:false,
		itemStyle: {
			normal: {
				borderwidth:.5,
				borderColor:'#009fe8',
				areaColor:'#ffefd5',
			},
			emphasis: {
				borderwidth:.5,
				borderColor:'#4b0082',
				areaColor:'#fff',
			}
		},
		lable: {
			normal: {
				show:true,
				fontSize:8,
			},
			emphasis: {
				show:true,
				fontSize:8,
			}
		},
		data:[]   //mydata
	}]
};
ec_center.setOption(ec_center_option)

(3)ec_left1_js

var ec_left1 = echarts.init(document.getElementById('l1'));

var mydata = [{value: 1, name: '累计治愈'},  {value: 2, name: '累计死亡'},  {value: 3, name: '现存确诊'},
                {value: 4, name: '现存疑似'}, {value: 5, name: '现存重症'}]

var ec_left1_option = {
    title: {
        text: '当日全国病例情况',
		textStyle:{
			color:'white',
            fontSize: 30,
		},
		subtext: '数据来自国家卫健委',
        left: 'center',
    },
    tooltip: {
        trigger: 'item',
        formatter: '{a} 
{b} : {c} ({d}%)'
}, legend: { orient: 'vertical', textStyle:{ fontSize:15, color: 'white', }, left: 'right', data: ['累计治愈', '累计死亡', '现存确诊', '现存疑似', '现存重症'] }, series: [ { name: '现存情况', type: 'pie', radius: '65%', center: ['50%', '50%'], data: [], //mydata, emphasis: { itemStyle: { shadowBlur: 10, shadowOffsetX: 0, shadowColor: 'rgba(0, 0, 0, 0.5)' } } } ] }; ec_left1.setOption(ec_left1_option)

(4)ec_left2_js

var ec_left2 = echarts.init(document.getElementById('l2'));

var ec_left2_option = {
    title: {
        text: '贵州省各地州市现存病例',
		textStyle:{
			color:'white',
		    fontSize: 30,
		},
        subtext: '数据来自贵州卫健委',
        left: "center"
    },

    tooltip: {
        trigger: 'axis',
        axisPointer: {
            type: 'shadow'
        }
    },
    xAxis: {
        type: 'value',
        boundaryGap: [0, 0.01],
	    // show: false,
		axisLine:{
			lineStyle:{
				color: '#ffffff',
			}
		},
		axisLable:{
			interval: 0,
			rotate: -30,
		},
    },
    yAxis: {
        type: 'category',
        // show: false,
        data: ["贵阳", "遵义", "毕节", "黔南州", "铜仁", "黔东南州", "六盘水", "安顺", "黔西南州"],
		axisLable:{
			interval: 0,
			rotate: -30,
		},
		axisLine:{
			lineStyle:{
				color: '#ffffff',
			}
		}
    },
    series: [
        {
            type: 'bar',
            data: []
        }
    ]
};


ec_left2.setOption(ec_left2_option)

(5)ec_right1_js

var ec_right1 = echarts.init(document.getElementById('r1'));

var ec_right1_option = {
    title: {
        text: '全国病例趋势图',
		textStyle:{
		fontSize:30,
		color: 'white',
    	},
		left: "center"
    },
    tooltip: {
        trigger: 'axis',
        axisPointer: {
            type: 'line',
            lineStyle: {
                color: '#7171c6'
            }
        },
    },
    legend: {
        data: ['现有疑似', '现存确诊'],
        left: 'right',
		textStyle:{
		fontSize:15,
		color: 'white',
    	},
    },
    grid: {
        left: '3%',
        right: '4%',
        bottom: '3%',
        containLabel: true
    },
    xAxis: {
        type: 'category',
        boundaryGap: false,
		axisLine:{
			lineStyle:{
				color: '#ffffff',
			}
		},
        data: ['1月20日','1月21日','1月22日','1月23日','1月24日','1月25日','1月26日','1月27日','1月28日','1月29日','1月30日','1月31日','2月1日','2月2日','2月3日','2月4日','2月5日','2月6日','2月7日','2月8日','2月9日','2月10日','2月11日','2月12日','2月13日', '2月14日','2月15日','2月16日','2月17日','2月18日','2月19日','2月20日','2月21日','2月22日','2月23日','2月24日','2月25日','2月26日','2月27日','2月28日','2月29日']
    },
    yAxis: {
        type: 'value',
		axisLine:{
			lineStyle:{
				color: '#ffffff',
			}
		}
    },
    series: [
        {
            name: '现有疑似',
            type: 'line',
            stack: '总量',
            data: [54,37,393,1072,1965,2684,5794,6973,9239,12167,15238,17988,19544,21558,23214,23260,24702,26359,27657,28942,23589,21675,16067,13435,10109,8969,8228,7264,6242,5248,4922,5206,5365,4148,3434,2824,2491,2358,2788,1418,851]
        },
        {
            name: '现存确诊',
            type: 'line',
            stack: '总量',
            data: [291,431,554,771,1208,1870,2613,4349,5739,7417,9308,11289,13748,16369,19381,22942,26302,28985,31774,33738,35982,37626,38800,52526,55748,56873,57416,57934,58016,57805,56303,54965,53284,51606,49824,47672,45604,43258,39919,37414,35329]
        },
    ]
};

ec_right1.setOption(ec_right1_option)

(6)ec_right2_js

var ec_right2 = echarts.init(document.getElementById('r2'));

var ec_right2_option = {
    title: {
        text: '非湖北地区城市确诊TOP5',
		textStyle:{
			color:'white',
                        fontSize: 30,
		},
		left:'center',
    },
	  color:['#3398DB'],
	  
		tooltip: {
			trigger: 'axis',
			axisPointer:{
				type:'shadow'
			}
		},
	xAxis: {
		type: 'category',
		data: [], //['Mon', 'Tue', 'Wed', 'Thu', 'Fri']
		axisLine:{
			lineStyle:{
				color: '#ffffff',
			}
		}
	},
	yAxis: {
		type: 'value',
	    show: true,
		axisLine:{
			lineStyle:{
				color: '#ffffff',
			}
		},
	},
	series: [{
		data: [], //[120, 200, 150, 80, 70],
		type: 'bar',
		barMaxWidth: "50%"
	}]
};

ec_right2.setOption(ec_right2_option)

2/app代码

from flask import Flask
from flask import render_template
from flask import jsonify
import time
import pymysql


app = Flask(__name__)


def get_conn():
    conn = pymysql.connect(host='212.64.83.45',
                           user='root',
                           password='cb322468437',
                           db='cov',
                           charset='utf8')
    cursor = conn.cursor()
    return conn, cursor


def close_conn(conn, cursor):
    if cursor:
        cursor.close()
    if conn:
        conn.close()


def query(sql, *args):
    conn, cursor = get_conn()
    cursor.execute(sql, args)
    res = cursor.fetchall()
    close_conn(conn, cursor)
    return res


@app.route('/')
def hello_world():
    return render_template("cov.html")


@app.route('/time')
def get_time():
    time_str = time.strftime("%Y{}%m{}%d{} %X")
    return time_str.format("年", "月", "日")


@app.route('/c1')
def get_c1_data():
    sql = "select confirm, " \
          "heal, " \
          "dead," \
          "nowConfirm," \
          "suspect," \
          "nowSevere " \
          "from history " \
          "where ds=(select ds from history order by ds desc limit 1) "
    res = query(sql)
    data = res[0]
    return {"confirm": int(data[0]), "heal": int(data[1]), "dead": int(data[2]), "nowConfirm": int(data[3]), "suspect": int(data[4]),
            "nowSevere": int(data[5])}


@app.route('/c2')
def get_c2_data():
    sql = "select province, sum(confirm), sum(heal), sum(dead)  from details where update_time=(select update_time from details order by update_time desc limit 1) group by province"
    res = query(sql)
    res1 = []
    for tup in res:
        print(tup)
        res1.append({"name": tup[0], "value": int(tup[1] - tup[2] - tup[3])})
    return jsonify({"data": res1})

@app.route('/l1')
def get_l1_data():
    sql = "select heal, " \
          "dead," \
          "nowConfirm," \
          "suspect," \
          "nowSevere " \
          "from history " \
          "where ds=(select ds from history order by ds desc limit 1) "
    res = query(sql)
    data = res[0]
    res1 = ({"value":  int(data[0]), "name": '累计治愈'}, {"value":  int(data[1]), "name": '累计死亡'}, {"value":  int(data[2]), "name": '现存确诊'},
            {"value":  int(data[3]), "name": '现存疑似'}, {"value":  int(data[4]), "name": '现存重症'})
    return jsonify({"data": res1})


@app.route('/l2')
def get_l2_data():
    sql = 'select city, confirm, heal, dead from details where update_time=(select update_time  from details order by update_time desc limit 1) and city in ("贵阳","遵义","毕节","黔南州","铜仁","黔东南州","六盘水","安顺","黔西南州")'
    res = query(sql)
    data = res
    city = []
    confirm = []
    for a, b, c, d in data:
        city.append(a)
        confirm.append(int(b) - int(c) - int(d))
    return jsonify({"city": city, "confirm": confirm})


@app.route('/r1')
def get_r1_data():
    sql = 'select ds, confirm, heal, dead, nowConfirm from history'
    res = query(sql)
    data = res
    day, confirm, heal, dead, nowConfirm = [], [], [], [], []
    for a, b, c, d, e in data:
        day.append(a.strftime("%m-%d, %X"))
        confirm.append(int(b))
        heal.append(int(c))
        dead.append(int(d))
        nowConfirm.append(int(e))
    return jsonify({"day": day, "confirm": confirm, "heal":  heal, "dead": dead, "nowConfirm": nowConfirm})


@app.route('/r2')
def get_r2_data():
    sql = 'SELECT city, confirm FROM ' \
          '(select city, confirm from details ' \
          'where update_time=(select update_time  from details order by update_time desc limit 1)' \
          'and province not in ("湖北","北京","上海","天津","重庆")' \
          'union all ' \
          'select province as city, sum(confirm) as confirm from details ' \
          'where update_time=(select update_time from details order by update_time desc limit 1)' \
          'and province in ("北京","上海","天津","重庆")group by province) as a ' \
          'ORDER BY confirm DESC LIMIT 5'
    res = query(sql)
    data = res
    city = []
    confirm = []
    for k, v in data:
        city.append(k)
        confirm.append(int(v))
    return jsonify({"city": city, "confirm": confirm})


if __name__ == "__main__":
    app.run(host="0.0.0.0", port=5000)

注:代码为测试所用,内部有很多测试代码,已经注释掉。
所用python为3.7.6。

四 开发模式部署

将之前设计的代码按项目结构部署,上传服务器(我是使用腾讯云服务器,Linux系统主机,sql也部署在这上面,提前安装好sql ,使用secure CRT(云服务器连接),WinSCP(远程连接云服务器,传输数据)。安装python3.7.6运行app.py(需要的pip文件自行安装),发现已经可以运行,但是此时关闭远程就停止了。

五 生产模式部署

安装Nginx:yum install nginx
安装gunicorn:pip install gunicorn
启动gunicorn:gunicorn -b 127.0.0.1:8080 -D app:app
编辑Nginx配置文件:vim /etc/nginx/nginx.conf
启动Nginx:/usr/sbin/nginx
利用爬虫实现全国疫情实时监控(手机版)_第10张图片利用爬虫实现全国疫情实时监控(手机版)_第11张图片
让python后台运行:

cd /root/cov
python3 app.py

cd /root/cov
nohup python3 app.py
六 使用crontab定时爬虫

编写spider代码(对第一部分更新):

import pymysql
import time
import json
import traceback
import requests
import sys


def get_tencent_data():
    url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'
    headers = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/80.0.3987.116 Safari/537.36'}
    r = requests.get(url, headers)
    res = json.loads(r.text)
    data_all = json.loads(res['data'])

    history = {}
    update_time = data_all['lastUpdateTime']
    ds = update_time
    # ds = '2020.' + time[0]
    # tup = time.strptime(ds, '%Y.%m.%d')
    # ds = time.strftime('%Y-%m-%d', tup)
    datanew = data_all['chinaTotal']
    confirm = datanew['confirm']
    heal = datanew['heal']
    dead = datanew['dead']
    nowConfirm = datanew['nowConfirm']
    suspect = datanew['suspect']
    nowSevere = datanew['nowSevere']
    history[ds] = {'confirm': confirm, 'heal': heal, 'dead': dead, 'nowConfirm': nowConfirm,
                   'suspect': suspect, 'nowSevere': nowSevere}

    details = []
    update_time = data_all['lastUpdateTime']
    data_country = data_all['areaTree']
    data_province = data_country[0]['children']
    for pro_infos in data_province:
        province = pro_infos['name']
        for city_infos in pro_infos['children']:
            city = city_infos['name']
            confirm = city_infos['total']['confirm']
            confirm_add = city_infos['today']['confirm']
            heal = city_infos['total']['heal']
            dead = city_infos['total']['dead']
            details.append([update_time, province, city, confirm, confirm_add, heal, dead])
    return history, details


def get_conn():
    conn = pymysql.connect(host='212.64.83.45',
                           user='root',
                           password='cb322468437',
                           db='cov',
                           charset='utf8')
    cursor = conn.cursor()
    return conn, cursor


def close_conn(conn, cursor):
    if cursor:
        cursor.close()
    if conn:
        conn.close()


def updata_details():
    cursor = None
    conn = None
    try:
        li = get_tencent_data()[1]
        conn, cursor = get_conn()
        sql = "insert into details(update_time,province,city,confirm,confirm_add,heal,dead) values(%s,%s,%s,%s,%s,%s,%s)"
        sql_query = "select %s=(select update_time from details order by id desc limit 1)"
        cursor.execute(sql_query, li[0][0])
        if not cursor.fetchone()[0]:
            print(f'{time.asctime()}开始更新最新数据')
            for item in li:
                cursor.execute(sql, item)
            conn.commit()
            print(f'{time.asctime()}更新最新数据完毕')
        else:
            print(f'{time.asctime()}已是最新数据')
    except:
        traceback.print_exc()
    finally:
        close_conn(conn, cursor)


def insert_history():
    cursor = None
    conn = None
    try:
        dic = get_tencent_data()[0]
        print(f"{time.asctime()}开始插入历史数据")
        conn, cursor = get_conn()
        sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s)"
        for k, v in dic.items():
            cursor.execute(sql,
                           [k, v.get("confirm"), v.get("heal"), v.get("dead"), v.get("nowConfirm"), v.get("suspect"),
                            v.get("nowSevere")])
        conn.commit()
        print(f"{time.asctime()}插入历史数据完毕")
    except:
        traceback.print_exc()
    finally:
        close_conn(conn, cursor)


def update_history():
    cursor = None
    conn = None
    try:
        dic = get_tencent_data()[0]
        print(f'{time.asctime()}开始更新历史数据')
        conn, cursor = get_conn()
        sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s)"
        sql_query = "select confirm from history where ds=%s"
        for k, v in dic.items():
            if not cursor.execute(sql_query, k):
                cursor.execute(sql, [k, v.get("confirm"), v.get("heal"), v.get("dead"), v.get("nowConfirm"),
                                     v.get("suspect"), v.get("nowSevere")])
        conn.commit()
        print(f'{time.asctime()}历史数据更新完毕')
    except:
        traceback.print_exc()
    finally:
        close_conn(conn, cursor)


if __name__ == "__main__":
    l = len(sys.argv)
    if l == 1:
        s = """
        请输入参数:
        参数说明:
        up_his 更新历史数据
        up_det 更新详情数据
        """
        print(s)
    else:
        order = sys.argv[1]
        if order == "up_his":
            update_history()
        elif order == "up_det":
            updata_details()

上传服务器,放在cov文件夹。
crontab定时调度:
在这里插入图片描述
最终结果:
利用爬虫实现全国疫情实时监控(手机版)_第12张图片
http://212.64.83.45:5000/

你可能感兴趣的:(利用爬虫实现全国疫情实时监控(手机版))