利用python获取每日疫情数据并数据可视化

利用python获取每日疫情数据并数据可视化

河北疫情严中,被封闭居家隔离(廊坊固安),闲的无聊,就编写了一个处理每日疫情的代码加上数据可是化,每日监控疫情情况,盼望早日解除隔离;

代码gitee地址:链接

数据来源

主要使用网易的关于每日疫情数据的API进行数据的收集,利用request库继续数据的爬取,并返回json数据包;

import requests
import json

def received_data():
    '''爬取并接收每日数据'''
    urls = "https://c.m.163.com/ug/api/wuhan/app/data/list-total"

    headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}

    initial_data = requests.get(url= urls, headers = headers)

    json_data = initial_data.json()['data'] #对网页返回的json文件进行初步的数据处理只保留数据

    #存储基础数据一份
    with open("E:\guan_quarantine\initial_data.txt" ,'w+', encoding="utf-8") as f:
        f.write(json.dumps(json_data
                            ,indent=4
                            , separators=(',', ': ')
                            ,ensure_ascii= False))  #设置参数indent,和separator用于格式化存储接送文件ensure_ascii用于处理中文显示

    return json_data

数据处理

数据处理主要是简单的对于josn数据进行了从新的数据格式化,并返回一个字典的数据包(PS:里面有些重复代码块还没优化,后期单独创建一个类),主要用到了pandas;

def cleaned_data(json_data):
    '''进行数据得初步清洗得出基础信息'''

    #获取相应需要的数据块
    china_total = json_data['chinaTotal']
    china_day_lists = json_data['chinaDayList']
    tree_lists = json_data['areaTree']

    #对数据分别进行分类
    daily_data = []
    daily_increase = []
    daily_total = []
    name_of_hebei_province = []
    daily_of_hebei_province = []
    total_of_hebei_province = []
    name_of_heilongjiang_province = []
    daily_of_heilongjiang_province = []
    total_of_heilongjiang_province = []
    name_of_countries = []
    daily_of_countries = []
    total_of_countries = []

    hebei_find_complete = 0
    heilongjiang_find_complete = 0



    #提取中国得每日信息,按照日期提取
    for china_day_list in china_day_lists:
        daily_data.append(china_day_list['date'])
        daily_increase.append(china_day_list['today']['confirm'])
        daily_total.append(china_day_list['today']['storeConfirm'])

    #提取中国河北得各个城市得每日新增和总体现存
    for tree_list in tree_lists:
        if tree_list['name'] == '中国':
            for children_list in tree_list['children']:
                if children_list['name'] == '河北':
                    for children_list_city in children_list['children']:
                        name_of_hebei_province.append(children_list_city['name'])
                        daily_of_hebei_province.append(children_list_city['today']['confirm'])
                        total_of_hebei_province.append(children_list_city['total']['confirm'])
                    hebei_find_complete = 1
                elif children_list['name'] == '黑龙江':
                    for children_list_city in children_list['children']:
                        name_of_heilongjiang_province.append(children_list_city['name'])
                        daily_of_heilongjiang_province.append(children_list_city['today']['confirm'])
                        total_of_heilongjiang_province.append(children_list_city['total']['confirm'])
                    heilongjiang_find_complete = 1
                elif hebei_find_complete and heilongjiang_find_complete:
                    break
            break 

    #提取全球每日新增和现存数量
    for tree_list in tree_lists:
        name_of_countries.append(tree_list['name']) 
        daily_of_countries.append(tree_list['today']['confirm'])  
        total_of_countries.append(tree_list['total']['confirm']) 

    #返回一个字典数据包   
    data_pack = {'daily_data':daily_data,
                'daily_increase':daily_increase,
                'daily_total':daily_total,
                'name_of_hebei_province':name_of_hebei_province,
                'daily_of_hebei_province':daily_of_hebei_province,
                'total_of_hebei_province':total_of_hebei_province,
                'name_of_heilongjiang_province':name_of_heilongjiang_province,
                'daily_of_heilongjiang_province':daily_of_heilongjiang_province,
                'total_of_heilongjiang_province':total_of_heilongjiang_province,
                'name_of_countries':name_of_countries,
                'daily_of_countries':daily_of_countries,
                'total_of_countries':total_of_countries
                }

    return data_pack

def process_data(data_pack):
    #处理河北省数据,并进行排序
    data_pack_hebei = pd.DataFrame(data_pack,columns = ['name_of_hebei_province','daily_of_hebei_province','total_of_hebei_province']).sort_values('total_of_hebei_province',ascending= False)
    data_pack_hebei.replace(np.nan,0,inplace= True)

    #处理黑龙江省数据,并进行排序
    data_pack_heilongjiang = pd.DataFrame(data_pack,columns = ['name_of_heilongjiang_province','daily_of_heilongjiang_province','total_of_heilongjiang_province']).sort_values('total_of_heilongjiang_province',ascending= False)
    data_pack_heilongjiang.replace(np.nan,0,inplace= True)

    #处理世界数据,由于数据量过大,处理后取前15
    data_pack_countries = pd.DataFrame(data_pack,columns = ['name_of_countries','daily_of_countries','total_of_countries']).sort_values('total_of_countries',ascending= False).head(15)
    data_pack_countries.replace(np.nan,0,inplace= True)

    return data_pack_hebei , data_pack_heilongjiang ,data_pack_countries

数据可视化

将处理后的数据使用条形图和折线图惊醒数据可视化处理(PS:该部分同样存在重复代码块,可做优化)主要用到了Matplotlib;

def autolabel(rects):
    """Attach a text label above each bar in *rects*, displaying its height.截取自官方文档,对柱状图进行数据标签添加"""
    for rect in rects:
        height = rect.get_height()
        plt.annotate('{}'.format(height),
                    xy=(rect.get_x() + rect.get_width() / 2, height),
                    xytext=(0, 3),  # 3 points vertical offset
                    textcoords="offset points",
                    ha='center', va='bottom')


def show_image(data_pack):
    '''用于进行分析后数据进行可视化显示'''
    data_pack_hebei ,data_pack_heilongjiang ,data_pack_countries = process_data(data_pack)
    
    plt.figure(figsize = (w / 100,h / 100))#设置显示屏幕大小

    #中国新增病例推移
    plt.subplot(2,2,1)
    plt.plot(data_pack["daily_data"],data_pack['daily_increase'])
    plt.xticks(rotation = 30,size = 'x-small')
    plt.title('中国新冠疫情推移图')

    #河北省新增和总体病例
    plt.subplot(2,2,2)
    plt.title('河北新冠疫情推移图')
    x = np.arange(len(data_pack_hebei['name_of_hebei_province']))
    width = 0.35
    bar1 = plt.bar(x - width/2, data_pack_hebei['total_of_hebei_province'], width, label='total')
    bar2 = plt.bar(x + width/2, data_pack_hebei['daily_of_hebei_province'], width, label='increase')
    plt.ylabel('人数')
    plt.xticks(x,data_pack_hebei['name_of_hebei_province'],rotation = 45)
    plt.xlabel('城市')
    #为柱状图增加标签
    autolabel(bar1)
    autolabel(bar2)

    #黑龙江省新增和总体病例
    plt.subplot(2,2,3)
    plt.title('黑龙江新冠疫情推移图')
    x = np.arange(len(data_pack_heilongjiang['name_of_heilongjiang_province']))
    width = 0.35
    bar3 = plt.bar(x - width/2, data_pack_heilongjiang['total_of_heilongjiang_province'], width, label='total')
    bar4 = plt.bar(x + width/2, data_pack_heilongjiang['daily_of_heilongjiang_province'], width, label='increase')
    plt.ylabel('人数')
    plt.xticks(x,data_pack_heilongjiang['name_of_heilongjiang_province'],rotation = 45)
    plt.xlabel('城市')
    #为柱状图增加标签
    autolabel(bar3)
    autolabel(bar4)

    # #中国新冠人数推移
    # plt.subplot(2,1,2)
    # plt.plot(data_pack["daily_data"],data_pack['daily_total'])
    # plt.xticks(rotation = 45,size = 'x-small')

    #世界新冠人数统计及新增统计
    plt.subplot(2,2,4)
    plt.title('世界疫情情况')
    x = np.arange(len(data_pack_countries['name_of_countries']))
    width = 0.35
    bar5 = plt.bar(x - width/2, data_pack_countries['total_of_countries'], width, label='total')
    bar6 = plt.bar(x + width/2, data_pack_countries['daily_of_countries'], width, label='increase')
    plt.ylabel('人数')  
    plt.xticks(x,data_pack_countries['name_of_countries'],rotation = 45)
    plt.xlabel('国家')  
    #为柱状图增加标签
    autolabel(bar5)
    autolabel(bar6)


    plt.show()

你可能感兴趣的:(笔记,python,数据分析,数据可视化)