Python爬取2019Cov及基于Flask框架的数据可视化

Python爬取2019Cov及基于Flask框架的数据可视化

  • 项目概述
    • 获取腾讯疫情数据
      • 初步认识腾讯数据的结构
      • API数据结构化
      • 获取数据
        • 基于工具库
        • 代码
    • 爬取百度疫情热搜
      • 基于selenium工具爬取
      • 代码
    • 存储数据到MySQL
      • 导入工具库
      • 存储百度热搜数据
      • 存储腾讯数据
    • 搭建Flask框架的web服务
      • 搭建Flask基本框架
      • 搭建一个简单的Flask
        • app.py代码
    • Echarts可视化工具
    • 后台响应函数
      • 代码
  • 项目效果
      • 运行项目
      • 项目效果

项目概述

本博客是学习分享
1、基于腾讯提供的API爬取数据,部分涉及百度热搜爬取
2、项目基于Flask框架开发
3、运用百度的Echarts,进行可视化
4、数据存储到MySQL

获取腾讯疫情数据

初步认识腾讯数据的结构

首先了解一下腾讯疫情网站的相关数据结构:
腾讯疫情网站:https://news.qq.com/zt2020/page/feiyan.htm
API数据接口:

Python爬取2019Cov及基于Flask框架的数据可视化_第1张图片
1、当前数据接口:https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5
2、历史数据接口:https://view.inews.qq.com/g2/getOnsInfo?name=disease_other
3、外国数据接口:https://view.inews.qq.com/g2/getOnsInfo?name=disease_foreign
本项目只做国内数据,国外数据可视化同样道理

API数据结构化

先看历史数据结构
	data_history->
				data_history["chinaDayList"] #中国每日数据
								->confirm  确诊人数
								->suspect  疑似人数
								->dead    死亡人数
								->nowConfirm   现有确诊
								->nowSevere    现有重症
								->importedCase  境外输入
								->deadRate    死亡率  
								->healRate    治愈率
								->date       日期
				get_historyAdd["chinaDayAddList"]#每日新增数据
								->confirm  新增确诊人数
								->suspect新增疑似人数
								->dead新增死亡人数
								->heal新增治愈人数
								->importedCase新增境外输入人数
								->deadRate    新增死亡率  
								->healRate    新增治愈率
								->date       日期
当前数据结构
		data_all["lastUpdateTime"] #数据时间
    	data_all["chinaTotal"]  #中国总数据  ,字典
    	data_all["chinaAdd"]  #对比上日, , 字典
    	data_all["areaTree"][0]["total"] #全国现有的数据  ,字典
    	data_all["areaTree"][0]["children"]  #省级		

由于太多,不一一二叉展开了

获取数据

基于工具库

import requests
import json

代码

初始化类

class tencent_data():
    def __init__(self):
        url ='https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5'#国内数据
        url2 = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_other'#国内历史数据
        headers={
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36"
        }
        r =requests.get(url,headers)
        res = json.loads(r.text) #json字符串转换字典
        data_all = json.loads(res['data'])
        
        his = requests.get(url2,headers)
        res_history = json.loads(his.text) 
        data_history = json.loads(res_history['data']) #历史数据
                
        self.update_time =data_all["lastUpdateTime"]
        self.data_Total = data_all["chinaTotal"]  #中国总数据  ,字典
        self.data_Add = data_all["chinaAdd"]  #对比上日, , 字典
        self.data_Now = data_all["areaTree"][0]["total"] #全国现有的数据  ,字典
        self.data_province = data_all["areaTree"][0]["children"]  #省级
        
        #历史数据
        self.chinaDayList=data_history["chinaDayList"] #中国每日数据
        self.chinaDayAddList=data_history["chinaDayAddList"]#中国每日新增数据

历史数据:返回字典

    def get_history(self):
        history={}
        for daylist in self.chinaDayList:
            ds="2020."+daylist["date"]
            tup=time.strptime(ds,"%Y.%m.%d")
            ds=time.strftime("%Y-%m-%d",tup)#改变时间格式
            
            confirm=daylist["confirm"]
            suspect=daylist["suspect"]
            dead=daylist["dead"]
            heal=daylist["heal"]
            nowConfirm=daylist["nowConfirm"]
            nowSevere=daylist["nowSevere"]
            importedCase=daylist["importedCase"]
            deadRate=daylist["deadRate"]
            healRate=daylist["healRate"]
            
            history[ds]={"confirm":confirm,"suspect":suspect,"dead":dead,"heal":heal,"nowConfirm":nowConfirm,
                         "nowSevere":nowSevere,"importedCase":importedCase,"deadRate":deadRate,"healRate":healRate}
        return history

每日新增历史数据:返回字典

    def get_historyAdd(self):
        historyAdd={}
        for daylist in self.chinaDayAddList:
            ds="2020."+daylist["date"]
            tup=time.strptime(ds,"%Y.%m.%d")
            ds=time.strftime("%Y-%m-%d",tup)#改变时间格式
            
            confirm=daylist["confirm"]
            suspect=daylist["suspect"]
            dead=daylist["dead"]
            heal=daylist["heal"]
            importedCase=daylist["importedCase"]
            deadRate=daylist["deadRate"]
            healRate=daylist["healRate"]
            
            historyAdd[ds]={"confirm":confirm,"suspect":suspect,"dead":dead,"heal":heal,
                         "importedCase":importedCase,"deadRate":deadRate,"healRate":healRate}
        return historyAdd

各省及市数据整合(整合没啥用-下面有分开)

    def get_details(self):
        details = []
        for pro_infos in self.data_province:
            province = pro_infos["name"] #省名
            pro_total = pro_infos["total"] #省级累计总数数据
            pro_nowConfirm = pro_total["nowConfirm"]#现有确诊数
            pro_confirm = pro_total["confirm"]  #累计确诊数
            pro_dead = pro_total["dead"] #累计死亡数
            pro_deadRate = pro_total["deadRate"] #死亡率
            pro_heal = pro_total["heal"] #累计治愈
            pro_healRate = pro_total["healRate"] #治愈率

            for city_infos in pro_infos["children"]:  #市级
                city = city_infos["name"] #市名
                city_total = city_infos["total"] #市级累计总数数据
                city_nowConfirm = city_total["nowConfirm"]#现有确诊数
                city_confirm = city_total["confirm"]  #累计确诊数
                city_dead = city_total["dead"] #累计死亡数
                city_deadRate = city_total["deadRate"] #死亡率
                city_healRate = city_total["healRate"] #治愈率

                details.append([self.update_time,province,pro_nowConfirm,pro_confirm,pro_dead,
                               pro_deadRate,pro_heal,pro_healRate,city,city_nowConfirm,
                               city_confirm,city_dead,city_deadRate,city_healRate])
        return detail

省和市分开:

    def get_cityList(self):
        #各市数据
        details = []
        for pro_infos in self.data_province:
            province = pro_infos["name"] #省名
            for city_infos in pro_infos["children"]:  #市级
                city = city_infos["name"] #市名
                city_total = city_infos["total"] #市级累计总数数据
                city_nowConfirm = city_total["nowConfirm"]#现有确诊数
                city_confirm = city_total["confirm"]  #累计确诊数
                city_dead = city_total["dead"] #累计死亡数
                city_deadRate = city_total["deadRate"] #死亡率
                city_heal = city_total["heal"] #累计治愈
                city_healRate = city_total["healRate"] #治愈率

                details.append([self.update_time,province,city,city_nowConfirm,
                               city_confirm,city_dead,city_deadRate,city_heal,city_healRate])
        return details
    
    def get_provinceList(self):
        #各省数据
        details = []
        for pro_infos in self.data_province:
            province = pro_infos["name"] #省名
            pro_total = pro_infos["total"] #省级累计总数数据
            pro_nowConfirm = pro_total["nowConfirm"]#现有确诊数
            pro_confirm = pro_total["confirm"]  #累计确诊数
            pro_dead = pro_total["dead"] #累计死亡数
            pro_deadRate = pro_total["deadRate"] #死亡率
            pro_heal = pro_total["heal"] #累计治愈
            pro_healRate = pro_total["healRate"] #治愈率
            details.append([self.update_time,province,pro_nowConfirm,pro_confirm,pro_dead,
                               pro_deadRate,pro_heal,pro_healRate])
        return details

中国当前总数据,对比上日等(就是腾讯疫情网站的数据,看下表)
Python爬取2019Cov及基于Flask框架的数据可视化_第2张图片

    def get_data_Total(self):  #中国当前总数据
        Totallist = []
        confirm=self.data_Total["confirm"]  #累计确诊
        heal=self.data_Total["heal"]        #累计治愈
        dead=self.data_Total["dead"]        #累计死亡
        nowConfirm=self.data_Total["nowConfirm"]  #现有确诊
        nowSevere=self.data_Total["nowSevere"]    #现有重症
        suspect=self.data_Total["suspect"]     #现有疑似
        importedCase=self.data_Total["importedCase"]  #境外输入
        Totallist.extend([self.update_time,confirm,heal,dead,nowConfirm,nowSevere,suspect,importedCase])
        
        return Totallist
    
    def get_data_Add(self):#对比上日
        addlist=[]
        confirm=self.data_Add["confirm"]  #新增确诊
        heal=self.data_Add["heal"]    #新增治愈
        dead=self.data_Add["dead"]    #新增死亡
        nowConfirm=self.data_Add["nowConfirm"]  #新增现有确诊
        suspect=self.data_Add["suspect"]        #新增疑似
        nowSevere=self.data_Add["nowSevere"]    #新增现有重症
        importedCase=self.data_Add["importedCase"]  #新增境外输入
        #addlist.extend([self.update_time,confirm,heal,dead,nowConfirm,suspect,nowSevere,importedCase])
        addlist.extend([self.update_time,confirm,suspect,dead,heal,nowConfirm,nowSevere,importedCase])
        return addlist
    
    def get_data_Now(self):#全国现有的数据
        data_Now_list=[]
        nowConfirm=self.data_Now["nowConfirm"]  #现有确诊
        confirm=self.data_Now["confirm"]#累计确诊
        suspect=self.data_Now["suspect"]#现有疑似
        dead=self.data_Now["dead"]#累计死亡
        deadRate=self.data_Now["deadRate"]#死亡率
        heal=self.data_Now["heal"]#累计治愈
        healRate=self.data_Now["healRate"]#治愈率
        data_Now_list.extend([self.update_time,nowConfirm,confirm,suspect,dead,deadRate,heal,healRate])
        return data_Now_list

爬取百度疫情热搜

基于selenium工具爬取

		下载谷歌浏览器的版本驱动
		[下载地址](http://npm.taobao.org/mirrors/chromedriver/)

代码

from  selenium.webdriver  import Chrome,ChromeOptions
import time
url = "https://voice.baidu.com/act/virussearch/virussearch?from=osari_map&tab=0&infomore=1"
option = ChromeOptions()
option.add_argument("--headless")#隐藏浏览器
option.add_argument("--no-sandbox")#linux必需

browser = Chrome(options=option)
browser.get(url)
but =browser.find_element_by_css_selector('#ptab-0 > div > div.VirusHot_1-5-5_32AY4F.VirusHot_1-5-5_2RnRvg > section > div')
but.click()#点击展开
time.sleep(1)#等待1秒
c=browser.find_elements_by_xpath('//*[@id="ptab-0"]/div/div[2]/section/a/div/span[2]')
for i in c:
    print(i.text)
    
browser.close()

存储数据到MySQL

导入工具库

import pymysql
import time
import json
import traceback  #追踪异常

存储百度热搜数据

建表步骤省略

def get_conn():
    """
    连接,游标
    """
    #创建连接
    conn = pymysql.connect(host="127.0.0.1",
                      user="root",
                      password="123456",
                      db="19cov")
    #创建游标,默认是元祖型
    cursor = conn.cursor()
    return conn,cursor

def close_conn(conn,cursor):
    if cursor:
        cursor.close()
    if conn:
        conn.close()
def update_hotsearch():
    """
    将疫情热搜插入数据库
    :return:
    """
    cursor =None
    conn = None
    try:
        context = get_baidu_hot()
        print(f"{time.asctime()}开始更新数据")
        conn,cursor=get_conn()
        sql = "insert into hotsearch(dt,content) values(%s,%s)"
        ts = time.strftime("%Y-%m-%d %X")
        for i in context:
            cursor.execute(sql,(ts,i))#插入数据
        conn.commit()
        print(f"{time.asctime()}数据更新完毕")
    except:
        traceback.print_exc()
    finally:
        close_conn(conn,cursor)

存储腾讯数据

代码先运行插入代码(inster)来填充数据库
后面只需要运行update_类型的函数

import re
import datetime
import time

class dataMySQL():
    
    
    def get_conn(self):
        """
        连接,游标
        """
        #创建连接
        conn = pymysql.connect(host="127.0.0.1",
                          user="root",
                          password="123456",
                          db="19cov")
        #创建游标,默认是元祖型
        cursor = conn.cursor()
        return conn,cursor

    def close_conn(self,conn,cursor):
        if cursor:
            cursor.close()
        if conn:
            conn.close()
            
    def update_data_Total(self,dataall):
        """
        更新中国当前总数据
        """
        cursor = None
        conn =None
        try:
            li = dataall
            conn,cursor =self.get_conn()
            sql = "insert into data_Total(update_time,confirm,heal,dead,nowConfirm,nowSevere,suspect,importedCase) values(%s,%s,%s,%s,%s,%s,%s,%s)"
            sql_query = 'select %s=(select update_time from data_Total order by update_time desc limit 1)'#对比当前最大时间戳
            #sql_delete = 'delete update_time from data_Total order by update_time desc limit 1'
            #sql_delete = "delete from data_Total where update_time = %s"
            cursor.execute(sql_query,li[0])
            if not cursor.fetchone()[0]:
                print(f"{time.asctime()}开始更新data_Total数据")
                sql_old = 'select update_time from data_Total order by update_time desc limit 1'
                cursor.execute(sql_old,)
                result = cursor.fetchone() #fetchone()只取出第一条结果
                #(datetime.datetime(2020, 3, 29, 22, 2, 52),) 获得的是一个元组
                old_time=result[0].strftime("%Y-%m-%d")  #将最一条一条的时间戳变为字符串
                new_time=re.match("2020-(\d+)-(\d+)",li[0])[0]  #将时间变为2020-03-27
                if new_time == old_time:
                    #判断是否为同一天 ,如果是,删除最后一条数据
                    print("最后一条为今天数据,删除")
                    sql_delete = "delete from data_Total where update_time = %s"
                    cursor.execute(sql_delete,result[0])
                    conn.commit() #提交事务
                    print("删除完毕")
                else:
                    print("最后一条为昨天数据,保留")
                cursor.execute(sql,li)
                conn.commit() #提交事务
                print(f"{time.asctime()}更新data_Total数据完毕")
            else:
                print(f"{time.asctime()}已经是最新data_Total数据!")
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
            
    def insert_data_Total(self,dataall):
        """
        插入中国当前总数据
        """
        cursor = None
        conn =None
        try:
            li = dataall
            conn,cursor =self.get_conn()
            sql = "insert into data_Total(update_time,confirm,heal,dead,nowConfirm,nowSevere,suspect,importedCase) values(%s,%s,%s,%s,%s,%s,%s,%s)"
            cursor.execute(sql,li)
            conn.commit()
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
            
    def update_data_Add(self,dataall):
        """
        更新每日新增数据
        """
        #corsor = None
        #conn =None
        try:
            li = dataall
            li[0]=re.match("2020-(\d+)-(\d+)",li[0])[0]  #将时间变为2020-03-27
            mylist=li
           
            print(mylist[0:6])
            conn,cursor =self.get_conn()
            sql = "insert into datelist(update_time,confirm,suspect,dead,heal,importedCase) values(%s,%s,%s,%s,%s,%s)"
            sql_query = "select %s=(select update_time from datelist order by update_time desc limit 1)"#对比当前最大时间戳
            cursor.execute(sql_query,mylist[0])
            if not cursor.fetchone()[0]:
                print(f"{time.asctime()}开始更新datelist数据")
                cursor.execute(sql,mylist[0:6])
                conn.commit() #提交事务
                print(f"{time.asctime()}更新datelist数据完毕")
            else:
                print(f"{time.asctime()}已经是最新datelist数据!")
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
        
    def insert_provinceList(self,dataall):
        """
        插入省份总数据
        """
        cursor = None
        conn =None
        try:
            li = dataall
            conn,cursor =self.get_conn()
            sql = "insert into provinceList(update_time,province,pro_nowConfirm,pro_confirm,pro_dead,pro_deadRate,pro_heal,pro_healRate) values(%s,%s,%s,%s,%s,%s,%s,%s)"
            for i in li:
                cursor.execute(sql,i)
                conn.commit()
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
            
    def update_provinceList(self,dataall):
        """
        更新省份数据
        """
        #corsor = None
        #conn =None
        try:
            li = dataall
            #li[0]=re.match("2020-(\d+)-(\d+)",li[0])[0]  #将时间变为2020-03-27
            #mylist=li
            conn,cursor =self.get_conn()
            sql = "insert into provinceList(update_time,province,pro_nowConfirm,pro_confirm,pro_dead,pro_deadRate,pro_heal,pro_healRate) values(%s,%s,%s,%s,%s,%s,%s,%s)"
            sql_query = "select %s=(select update_time from provinceList order by update_time desc limit 1)"#对比当前最大时间戳
            cursor.execute(sql_query,li[0][0])
            if not cursor.fetchone()[0]:
                print(f"{time.asctime()}开始更新provinceList数据")
                for i in li:
                    cursor.execute(sql,i)
                    conn.commit()
                print(f"{time.asctime()}更新provinceList数据完毕")
            else:
                print(f"{time.asctime()}已经是最新provinceList数据!")
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
            
    def insert_cityList(self,dataall):
        """
        插入城市数据
        """
        cursor = None
        conn =None
        try:
            li = dataall
            conn,cursor =self.get_conn()
            sql = "insert into cityList(update_time,province,city,city_nowConfirm,city_confirm,city_dead,city_deadRate,city_heal,city_healRate) values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            for i in li:
                cursor.execute(sql,i)
                conn.commit()
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
            
    def update_cityList(self,dataall):
        """
        更新城市数据
        """
        #corsor = None
        #conn =None
        try:
            li = dataall
            #li[0]=re.match("2020-(\d+)-(\d+)",li[0])[0]  #将时间变为2020-03-27
            #mylist=li
            conn,cursor =self.get_conn()
            sql = "insert into cityList(update_time,province,city,city_nowConfirm,city_confirm,city_dead,city_deadRate,city_heal,city_healRate) values(%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            sql_query = "select %s=(select update_time from cityList order by update_time desc limit 1)"#对比当前最大时间戳
            cursor.execute(sql_query,li[0][0])
            if not cursor.fetchone()[0]:
                print(f"{time.asctime()}开始更新cityList数据")
                for i in li:
                    cursor.execute(sql,i)
                    conn.commit()
                print(f"{time.asctime()}更新cityList数据完毕")
            else:
                print(f"{time.asctime()}已经是最新cityList数据!最新数据时间为:{li[0][0]}")
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
            
    def insert_data_Add(self,dataall):
        """
        插入比较上日数据
        """
        cursor = None
        conn =None
        try:
            conn,cursor =self.get_conn()
            sql = "insert into data_Add(update_time,confirm,suspect,dead,heal,nowConfirm,nowSevere,importedCase) values(%s,%s,%s,%s,%s,%s,%s,%s)"   
            cursor.execute(sql,dataall)
            conn.commit()
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
    def update_data_Add(self,dataall):
        """
        更新城市数据
        """
        try:
            li = dataall
            conn,cursor =self.get_conn()
            sql = "insert into data_Add(update_time,confirm,suspect,dead,heal,nowConfirm,nowSevere,importedCase) values(%s,%s,%s,%s,%s,%s,%s,%s)"
            sql_query = "select %s=(select update_time from data_Add order by update_time desc limit 1)"#对比当前最大时间戳
            cursor.execute(sql_query,li[0])
            if not cursor.fetchone()[0]:
                print(f"{time.asctime()}开始更新data_Add数据")
                cursor.execute(sql,li)
                conn.commit()
                print(f"{time.asctime()}更新data_Add数据完毕")
            else:
                print(f"{time.asctime()}已经是最新data_Add数据!最新数据时间为:{li[0]}")
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
            
    def insert_data_Now(self,dataall):
        """
        插入全国现有的数据
        """
        cursor = None
        conn =None
        try:
            conn,cursor =self.get_conn()
            sql = "insert into data_Now(update_time,nowConfirm,confirm,suspect,dead,deadRate,heal,healRate) values(%s,%s,%s,%s,%s,%s,%s,%s)"   
            cursor.execute(sql,dataall)
            conn.commit()
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
            
    def update_data_Now(self,dataall):
        """
        更新城市数据
        """
        #corsor = None
        #conn =None
        try:
            li = dataall
            conn,cursor =self.get_conn()
            sql = "insert into data_Now(update_time,nowConfirm,confirm,suspect,dead,deadRate,heal,healRate) values(%s,%s,%s,%s,%s,%s,%s,%s)" 
            sql_query = "select %s=(select update_time from data_Now order by update_time desc limit 1)"#对比当前最大时间戳
            cursor.execute(sql_query,li[0])
            if not cursor.fetchone()[0]:
                print(f"{time.asctime()}开始更新data_Now数据")
                cursor.execute(sql,li)
                conn.commit()
                print(f"{time.asctime()}更新data_Now数据完毕")
            else:
                print(f"{time.asctime()}已经是最新data_Now数据!最新数据时间为:{li[0]}")
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
            
    def inster_history(self,dataall):
        """
            插入历史数据
        """
        cursor = None
        conn =None
        try:
            dic = dataall
            print(f"{time.asctime()}开始插入历史数据")
            conn,cursor =self.get_conn()
            sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)"
            for k,v in dic.items():
                cursor.execute(sql,[k,v.get("confirm"),v.get("suspect"),v.get("dead"),v.get("heal"),v.get("nowConfirm"),v.get("nowSevere"),v.get("importedCase"),v.get("deadRate"),v.get("healRate")])
            conn.commit()
            print(f"{time.asctime()}历史数据插入完毕")
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
        
    
    def update_history(self,dataall):
        """
        更新历史数据
        """
        #corsor = None
        #conn =None
        try:
            dic = dataall
            print(f"{time.asctime()}开始更新历史数据")
            conn,cursor =self.get_conn()
            sql = "insert into history values(%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)" 
            sql_query = "select confirm from history where update_time=%s"
            for k,v in dic.items():    
                if not cursor.execute(sql_query,k):
                    cursor.execute(sql,[k,v.get("confirm"),v.get("suspect"),v.get("dead"),v.get("heal"),v.get("nowConfirm"),v.get("nowSevere"),v.get("importedCase"),v.get("deadRate"),v.get("healRate")])
            conn.commit()
            print(f"{time.asctime()}历史数据更新完毕")
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
            
    def inster_historyAdd(self,dataall):
        """
            插入每日新增历史数据
        """
        cursor = None
        conn =None
        try:
            dic = dataall
            print(f"{time.asctime()}开始插入每日新增历史数据")
            conn,cursor =self.get_conn()
            sql = "insert into historyAdd values(%s,%s,%s,%s,%s,%s,%s,%s)"
            for k,v in dic.items():
                cursor.execute(sql,[k,v.get("confirm"),v.get("suspect"),v.get("dead"),v.get("heal"),v.get("importedCase"),v.get("deadRate"),v.get("healRate")])
            conn.commit()
            print(f"{time.asctime()}每日新增历史数据插入完毕")
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
            
    def update_historyAdd(self,dataall):
        """
        更新历史数据
        """
        #corsor = None
        #conn =None
        try:
            dic = dataall
            print(f"{time.asctime()}开始更新每日新增历史数据")
            conn,cursor =self.get_conn()
            sql = "insert into historyAdd values(%s,%s,%s,%s,%s,%s,%s,%s)"
            sql_query = "select confirm from historyAdd where update_time=%s"
            for k,v in dic.items():    
                if not cursor.execute(sql_query,k):
                    cursor.execute(sql,[k,v.get("confirm"),v.get("suspect"),v.get("dead"),v.get("heal"),v.get("importedCase"),v.get("deadRate"),v.get("healRate")])
            conn.commit()
            print(f"{time.asctime()}每日新增历史数据更新完毕")
        except:
            traceback.print_exc()
        finally:
            self.close_conn(conn,cursor)
        
        

当运行成功会出现这样

Python爬取2019Cov及基于Flask框架的数据可视化_第3张图片

搭建Flask框架的web服务

首先,确认有没有Flask工具库,没有 pip install flask

搭建Flask基本框架

因为我穷,所以只能用pycharm的社区版,所以只能自己建文件夹咯
static存放的是CSS,js文件
templates 存放的是html模板
app.py 运行文件
utils.py 存放函数
Python爬取2019Cov及基于Flask框架的数据可视化_第4张图片

搭建一个简单的Flask

在app.py文件中 导入flask的基本库,就可以运行了

from flask import Flask
from flask import request
from flask import render_template
from flask import jsonify
from time import strftime
from jieba.analyse import extract_tags  #提取关键字的库
import string
import utils
app = Flask(__name__)


@app.route("/")
def hello():
    return render_template("main.html")

app.py代码

from flask import Flask
from flask import request
from flask import render_template
from flask import jsonify
from time import strftime
from jieba.analyse import extract_tags  #提取关键字的库
import string
import utils
app = Flask(__name__)


@app.route("/")
def hello():
    return render_template("main.html")



@app.route("/c1")
def get_c1_data():
    data = utils.get_c1_data()
    return jsonify({"confirm":int(data[0]),"heal":int(data[1]),"dead":int(data[2]),"importedCase":int(data[3])})

@app.route("/c2")
def get_c2_data():
    res = []
    for tup in utils.get_c2_data():
        #print(tup)
        res.append({"name":tup[0],"value":int(tup[1])})
    return jsonify({"data":res})

@app.route("/l1")
def get_l1_data():
    data = utils.get_l1_data()
    day,confirm,suspect,heal,dead = [],[],[],[],[]
    for a,b,c,d,e in data[7:]:
        day.append(a.strftime("%m-%d"))
        confirm.append(b)
        suspect.append(c)
        heal.append(d)
        dead.append(e)
    return jsonify({"day":day,"confirm":confirm,"suspect":suspect,"heal":heal,"dead":dead})

@app.route("/l2")
def get_l2_data():
    data = utils.get_l2_data()
    day,confirm,suspect = [],[],[]
    for a,b,c in data[7:]:
        day.append(a.strftime("%m-%d"))
        confirm.append(b)
        suspect.append(c)
    return jsonify({"day":day,"confirm":confirm,"suspect":suspect})

@app.route("/r1")
def get_r1_data():
    data = utils.get_r1_data()
    city=[]
    confirm=[]
    for k,v in data:
        city.append(k)
        confirm.append(int(v))
    return jsonify({"city":city,"confirm":confirm})


@app.route("/r2")
def get_r2_data():
    data = utils.get_r2_data() #格式((strings),(strings))
    d=[]
    for i in data:
        k = i[0].rstrip(string.digits)  #移除热搜数字
        v= i[0][len(k):]#获取热搜数字
        ks = extract_tags(k)  #使用jieba提取关键字
        for j in ks:
            if not j.isdigit():
                d.append({'name':j,'value':v})
    return jsonify({'kws':d})

@app.route("/time")
def gettime():
    return utils.get_time()

@app.route("/ajax",methods=["get","post"])
def hello_world4():
    name = request.values.get("name")
    score = request.values.get("score")
    print(f"name:{name},score:{score}")
    return '10000'
if __name__ == "__main__":
    app.run()

Echarts可视化工具

echarts网址
这里涉及到js的运用,其实也就是熟悉数据的类型使用
其实也可以使用 pyecharts库,这库是基于echarts用Python运用进行二次开发,

后台响应函数

在utils.py中存放调用的后台处理函数,主要是返回给前端

代码

import time
import pymysql

def get_time():
    time_str = time.strftime("%Y{}%m{}%d{} %X")
    return time_str.format("年","月","日")

def get_conn():
    """
    连接,游标
    """
    # 创建连接
    conn = pymysql.connect(host="127.0.0.1",
                           user="root",
                           password="123456",
                           db="19cov")
    # 创建游标,默认是元祖型
    cursor = conn.cursor()
    return conn, cursor

def close_conn(conn, cursor):
    if cursor:
        cursor.close()
    if conn:
        conn.close()

def query(sql,*args):
    """
    封装通用查询
    :param sql:
    :param args:
    :return:#返回查询结果
    """
    conn,cursor=get_conn()
    cursor.execute(sql,args)
    res = cursor.fetchall()
    close_conn(conn,cursor)
    return res

def get_c1_data():
    """

    :return:
    """
    sql = "select confirm,heal,dead,importedCase " \
          "from data_total "\
          "where update_time=(select update_time from data_total order by update_time desc limit 1)"
    res = query(sql)

    return res[0]

def get_c2_data():
    """

    :return: 返回各省数据
    """
    #因为会更新多次数据,取时间戳最新的那组数据
    sql = "select province,pro_confirm from provincelist " \
		  "where update_time=(select update_time from provincelist " \
		  "order by update_time desc limit 1) " \
		  "group by province"
    res = query(sql)
	
    return res

def get_l1_data():
    sql ="select update_time,confirm,suspect,heal,dead from history"
    res = query(sql)
    return res

def get_l2_data():
    sql ="select update_time,confirm,suspect from historyadd"
    res = query(sql)
    return res

def get_r1_data():
    sql = 'SELECT city,city_confirm FROM ' \
          '(select city,city_confirm from citylist ' \
          'where update_time=(select update_time from citylist order by update_time desc limit 1) ' \
          'and province not in ("湖北","北京","上海","天津","重庆") ' \
          'union all ' \
          'select province as city,sum(city_confirm) as city_confirm from citylist ' \
          'where update_time=(select update_time from citylist order by update_time desc limit 1) ' \
          'and province in ("北京","上海","天津","重庆") group by province) as a ' \
          'ORDER BY city_confirm DESC LIMIT 6'

    sql2 = 'select province,city,city_confirm from citylist ' \
           'where update_time=(select update_time from citylist order by update_time desc limit 1) '
    sql3 = 'select update_time from citylist order by update_time desc limit 1'

    res = query(sql)

    tem = list(res)
    mylist = []
    for i in tem:
        if i[0] != '地区待确认':
            mylist.append(i)
        else:
            print("地区待确认存在")
    if len(mylist) != 5:
        mylist.pop()
    res = tuple(mylist)

    return res

def get_r2_data():
    """

    :return: 返回最近20条热搜
    """
    sql = 'select content from hotsearch order by id desc limit 20'
    res= query(sql)
#(('克里姆林宫工作人员感染病毒173682',), ('马克布鲁姆感染新冠去世183621',))
    return res
	
if __name__ == "__main__":
    print(get_r2_data())


项目效果

运行项目

Python爬取2019Cov及基于Flask框架的数据可视化_第5张图片

项目效果

Python爬取2019Cov及基于Flask框架的数据可视化_第6张图片

就这样项目基本完成了

你可能感兴趣的:(Python爬取2019Cov及基于Flask框架的数据可视化)