Python爬取2020新冠肺炎疫情数据及Tableau可视化分析

当前新冠病毒肆虐中国,全国上下统一部署全力防控疫情扩散。我们可以从多个渠道获取疫情发展的最新数据,网上也有不少程序爬取相关数据,并做可视化的案例。今天我也来小试一下。
目标:
1、爬取腾讯网新冠肺炎疫情数据;
2、Tableau可视化分析。
话不多说,直接上代码及效果图。
(附:由于网站结构调整,原来的代码无法成功爬取,今天3.14更新了代码,应该可以了。以下为更新的代码)

import requests
import json
import time
import csv

#解析中国疫情每日数据
def getdata_chinadaily(resp):
    d={} #空白字典用于存放数据,数据量不大,所以没有使用数据库
    #中国每日数据写入字典
    for i in resp['chinaDayList']:
        # print(i) #打印观察数据
        d[i['date']]={} #需要先定义空白的子字典
        d[i['date']]['acc_confirm'] = i['confirm']
        d[i['date']]['acc_dead']=i['dead']
        d[i['date']]['acc_heal'] = i['heal']
        d[i['date']]['now_confirm']=i['nowConfirm']
        d[i['date']]['dead_rate']=i['deadRate']

    # 中国每日新增人数写入字典
    for i in resp['dailyNewAddHistory']:
           d[i['date']]['dailyadd_confirm']=i['country']
    #写入csv文件
    with open('d:/cov_china_report2020.csv','w',newline='') as f:
        writer=csv.writer(f)
        column=['date','acc_confirm','acc_dead','acc_heal','now_confirm','dead_rate','dailyadd_confirm']
        writer.writerow(column)
        for i in d:
            try:
                row=['2020-' + i.replace('.','-'),d[i]['acc_confirm'], d[i]['acc_dead'],d[i]['acc_heal'],d[i]['now_confirm'],d[i]['dead_rate'],d[i]['dailyadd_confirm']]
                writer.writerow(row)
            except KeyError:
                row = ['2020-' + i.replace('.','-'), d[i]['acc_confirm'], d[i]['acc_dead'], d[i]['acc_heal'], d[i]['now_confirm'],d[i]['dead_rate'], 'NA']
                writer.writerow(row)

#解析全球感染人数
def getdata_worlddistribution(resp):
    with open('d:/coronavirus2020worlddistribution.csv','w',newline='') as f:
        writer=csv.writer(f)
        column=['country','confirm','suspect','dead','deadrate']
        writer.writerow(column)
        for i in resp['foreignList']:
            # print(i['name'],i['total']['confirm'],i['total']['suspect'], i['total']['dead'],i['total']['deadRate'])
            row=[i['name'],i['confirm'],i['suspect'], i['dead'],round(i['dead']/i['confirm'],2)]
            writer.writerow(row)

#解析中国各省感染人数
def getdata_chinadistrubution(resp):
    with open('d:/coronavirus2020chinadistribution.csv','w',newline='') as f:
        writer=csv.writer(f)
        column=['province','confirm','suspect','dead','deadrate']
        writer.writerow(column)
        for i in resp['areaTree'][0]['children']:
            # print(i['name'],i['total']['confirm'],i['total']['suspect'], i['total']['dead'],i['total']['deadRate'])
            row=[i['name'],i['total']['confirm'],i['total']['suspect'], i['total']['dead'],i['total']['deadRate']]
            writer.writerow(row)

if __name__=='__main__':
    url1 = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&callback=&_=%d' % int(time.time()) #全国各省疫情数据地址
    url2 = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_other&callback=&_=%d' % int(time.time())#中国每日数据及全球数据地址
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36',
        'Referer': 'https://news.qq.com/zt2020/page/feiyan.htm'}
    resp1 = requests.get(url1, headers=headers)
    resp2 = requests.get(url2, headers=headers)
    resp1= json.loads(resp1.json()['data'])
    resp2 = json.loads(resp2.json()['data'])
    getdata_chinadistrubution(resp1)
    getdata_chinadaily(resp2)
    getdata_worlddistribution(resp2)

技术关键点:
1、通过开发者工具找到进行请求的url地址。
2、请求返回的数据为json格式。这里的json数据有多重字典和列表的嵌套,需要小心处理。

可视化工具我选择了Tableau,虽然用python的matplotlib等包也可以实现,并且有更多的自定义功能。不过从效率来看,商业化Tableau可谓非常智能,非常高效,不亏是BI界的No.1了。以下为Tableau做的仪表盘效果图。只要简单的拖放操作,即可实现。

Python爬取2020新冠肺炎疫情数据及Tableau可视化分析_第1张图片

你可能感兴趣的:(python,数据分析,BI)