河北疫情严中,被封闭居家隔离(廊坊固安),闲的无聊,就编写了一个处理每日疫情的代码加上数据可是化,每日监控疫情情况,盼望早日解除隔离;
代码gitee地址:链接
主要使用网易的关于每日疫情数据的API进行数据的收集,利用request库继续数据的爬取,并返回json数据包;
import requests
import json
def received_data():
'''爬取并接收每日数据'''
urls = "https://c.m.163.com/ug/api/wuhan/app/data/list-total"
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36'}
initial_data = requests.get(url= urls, headers = headers)
json_data = initial_data.json()['data'] #对网页返回的json文件进行初步的数据处理只保留数据
#存储基础数据一份
with open("E:\guan_quarantine\initial_data.txt" ,'w+', encoding="utf-8") as f:
f.write(json.dumps(json_data
,indent=4
, separators=(',', ': ')
,ensure_ascii= False)) #设置参数indent,和separator用于格式化存储接送文件ensure_ascii用于处理中文显示
return json_data
数据处理主要是简单的对于josn数据进行了从新的数据格式化,并返回一个字典的数据包(PS:里面有些重复代码块还没优化,后期单独创建一个类),主要用到了pandas;
def cleaned_data(json_data):
'''进行数据得初步清洗得出基础信息'''
#获取相应需要的数据块
china_total = json_data['chinaTotal']
china_day_lists = json_data['chinaDayList']
tree_lists = json_data['areaTree']
#对数据分别进行分类
daily_data = []
daily_increase = []
daily_total = []
name_of_hebei_province = []
daily_of_hebei_province = []
total_of_hebei_province = []
name_of_heilongjiang_province = []
daily_of_heilongjiang_province = []
total_of_heilongjiang_province = []
name_of_countries = []
daily_of_countries = []
total_of_countries = []
hebei_find_complete = 0
heilongjiang_find_complete = 0
#提取中国得每日信息,按照日期提取
for china_day_list in china_day_lists:
daily_data.append(china_day_list['date'])
daily_increase.append(china_day_list['today']['confirm'])
daily_total.append(china_day_list['today']['storeConfirm'])
#提取中国河北得各个城市得每日新增和总体现存
for tree_list in tree_lists:
if tree_list['name'] == '中国':
for children_list in tree_list['children']:
if children_list['name'] == '河北':
for children_list_city in children_list['children']:
name_of_hebei_province.append(children_list_city['name'])
daily_of_hebei_province.append(children_list_city['today']['confirm'])
total_of_hebei_province.append(children_list_city['total']['confirm'])
hebei_find_complete = 1
elif children_list['name'] == '黑龙江':
for children_list_city in children_list['children']:
name_of_heilongjiang_province.append(children_list_city['name'])
daily_of_heilongjiang_province.append(children_list_city['today']['confirm'])
total_of_heilongjiang_province.append(children_list_city['total']['confirm'])
heilongjiang_find_complete = 1
elif hebei_find_complete and heilongjiang_find_complete:
break
break
#提取全球每日新增和现存数量
for tree_list in tree_lists:
name_of_countries.append(tree_list['name'])
daily_of_countries.append(tree_list['today']['confirm'])
total_of_countries.append(tree_list['total']['confirm'])
#返回一个字典数据包
data_pack = {'daily_data':daily_data,
'daily_increase':daily_increase,
'daily_total':daily_total,
'name_of_hebei_province':name_of_hebei_province,
'daily_of_hebei_province':daily_of_hebei_province,
'total_of_hebei_province':total_of_hebei_province,
'name_of_heilongjiang_province':name_of_heilongjiang_province,
'daily_of_heilongjiang_province':daily_of_heilongjiang_province,
'total_of_heilongjiang_province':total_of_heilongjiang_province,
'name_of_countries':name_of_countries,
'daily_of_countries':daily_of_countries,
'total_of_countries':total_of_countries
}
return data_pack
def process_data(data_pack):
#处理河北省数据,并进行排序
data_pack_hebei = pd.DataFrame(data_pack,columns = ['name_of_hebei_province','daily_of_hebei_province','total_of_hebei_province']).sort_values('total_of_hebei_province',ascending= False)
data_pack_hebei.replace(np.nan,0,inplace= True)
#处理黑龙江省数据,并进行排序
data_pack_heilongjiang = pd.DataFrame(data_pack,columns = ['name_of_heilongjiang_province','daily_of_heilongjiang_province','total_of_heilongjiang_province']).sort_values('total_of_heilongjiang_province',ascending= False)
data_pack_heilongjiang.replace(np.nan,0,inplace= True)
#处理世界数据,由于数据量过大,处理后取前15
data_pack_countries = pd.DataFrame(data_pack,columns = ['name_of_countries','daily_of_countries','total_of_countries']).sort_values('total_of_countries',ascending= False).head(15)
data_pack_countries.replace(np.nan,0,inplace= True)
return data_pack_hebei , data_pack_heilongjiang ,data_pack_countries
将处理后的数据使用条形图和折线图惊醒数据可视化处理(PS:该部分同样存在重复代码块,可做优化)主要用到了Matplotlib;
def autolabel(rects):
"""Attach a text label above each bar in *rects*, displaying its height.截取自官方文档,对柱状图进行数据标签添加"""
for rect in rects:
height = rect.get_height()
plt.annotate('{}'.format(height),
xy=(rect.get_x() + rect.get_width() / 2, height),
xytext=(0, 3), # 3 points vertical offset
textcoords="offset points",
ha='center', va='bottom')
def show_image(data_pack):
'''用于进行分析后数据进行可视化显示'''
data_pack_hebei ,data_pack_heilongjiang ,data_pack_countries = process_data(data_pack)
plt.figure(figsize = (w / 100,h / 100))#设置显示屏幕大小
#中国新增病例推移
plt.subplot(2,2,1)
plt.plot(data_pack["daily_data"],data_pack['daily_increase'])
plt.xticks(rotation = 30,size = 'x-small')
plt.title('中国新冠疫情推移图')
#河北省新增和总体病例
plt.subplot(2,2,2)
plt.title('河北新冠疫情推移图')
x = np.arange(len(data_pack_hebei['name_of_hebei_province']))
width = 0.35
bar1 = plt.bar(x - width/2, data_pack_hebei['total_of_hebei_province'], width, label='total')
bar2 = plt.bar(x + width/2, data_pack_hebei['daily_of_hebei_province'], width, label='increase')
plt.ylabel('人数')
plt.xticks(x,data_pack_hebei['name_of_hebei_province'],rotation = 45)
plt.xlabel('城市')
#为柱状图增加标签
autolabel(bar1)
autolabel(bar2)
#黑龙江省新增和总体病例
plt.subplot(2,2,3)
plt.title('黑龙江新冠疫情推移图')
x = np.arange(len(data_pack_heilongjiang['name_of_heilongjiang_province']))
width = 0.35
bar3 = plt.bar(x - width/2, data_pack_heilongjiang['total_of_heilongjiang_province'], width, label='total')
bar4 = plt.bar(x + width/2, data_pack_heilongjiang['daily_of_heilongjiang_province'], width, label='increase')
plt.ylabel('人数')
plt.xticks(x,data_pack_heilongjiang['name_of_heilongjiang_province'],rotation = 45)
plt.xlabel('城市')
#为柱状图增加标签
autolabel(bar3)
autolabel(bar4)
# #中国新冠人数推移
# plt.subplot(2,1,2)
# plt.plot(data_pack["daily_data"],data_pack['daily_total'])
# plt.xticks(rotation = 45,size = 'x-small')
#世界新冠人数统计及新增统计
plt.subplot(2,2,4)
plt.title('世界疫情情况')
x = np.arange(len(data_pack_countries['name_of_countries']))
width = 0.35
bar5 = plt.bar(x - width/2, data_pack_countries['total_of_countries'], width, label='total')
bar6 = plt.bar(x + width/2, data_pack_countries['daily_of_countries'], width, label='increase')
plt.ylabel('人数')
plt.xticks(x,data_pack_countries['name_of_countries'],rotation = 45)
plt.xlabel('国家')
#为柱状图增加标签
autolabel(bar5)
autolabel(bar6)
plt.show()