本文档仅供参考和个人娱乐分享,请勿用作其他场合和其他用途
工作环境:windows/anaconda3 + python 3.5
python IDE:pycharm
数据来源:https://news.qq.com/zt2020/page/feiyan.htm#/
分析工具:Fidder
实际url: https://view.inews.qq.com/g2/getOnsInfo?name=disease_other&callback=jQuery3410801068472621489_1583674003060&_=1583674003061
程序结构:
主程序:
from num_process import downloader
from pic_process import pic_process
def main_fx():
target = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_other&callback=jQuery3410801068472621489_1583674003060&_=1583674003061'
downloader_fx = downloader(target)
new_confirm_np, new_dead_np = downloader_fx.get_contents()
pic_process_fx = pic_process(downloader_fx.date_num_list, new_confirm_np, new_dead_np)
pic_process_fx.pic_plot()
max_num, max_day, min_day, min_num, mean_num, median_num = downloader_fx.num_feature()
print('最大值是%d,日期是%s'%(max_num, max_day) +'\n')
print('最小值是%d,日期是%s'%(min_num, min_day)+'\n')
print('中位数是%d,众数是%d'%(mean_num, median_num))
if __name__ == '__main__':
main_fx()
数据获取处理:
import requests
import json
import numpy as np
import operator
class downloader():
def __init__(self,target):
self.target = target
self.date_num_list = []
self.confirm_list = []
self.dead_list = []
def get_download_url(self):
req = requests.get(url = self.target)
html = req.text
return html
def get_contents(self):
html = self.get_download_url()
data = json.loads(html.split('(')[-1][:-1])
data_str_2_json = json.loads(data["data"])
chinaDayList = data_str_2_json["chinaDayList"]
for num in range(len(chinaDayList)):
data_every_day = chinaDayList[num]
month = int(data_every_day['date'].split('.')[0])
day = int(data_every_day['date'].split('.')[1])
if (month == 2) or (month == 1 and day == 31):
self.date_num_list.append(data_every_day['date'])
self.confirm_list.append(data_every_day['confirm'])
self.dead_list.append(data_every_day['dead'])
confirm_np = np.array(self.confirm_list)
dead_np = np.array(self.dead_list)
new_confirm_np = confirm_np[1:]-confirm_np[:-1]
new_dead_np = dead_np[1:]-dead_np[:-1]
return new_confirm_np, new_dead_np
def num_feature(self):
new_confirm_np, new_dead_np = self.get_contents()
new_confirm_num = np.max(new_confirm_np)
max_num = np.max(new_confirm_np)
max_day = self.date_num_list[((np.where(new_confirm_np == np.max(new_confirm_np)))[0][0])+1]
# 两种方式找到最大最小值,实际上有很多种方法可以实现这个目的
min_index, min_num = min(enumerate(new_confirm_np), key=operator.itemgetter(1))
min_day = self.date_num_list[min_index+1]
mean_num = np.mean(new_confirm_np)
median_num = np.median(new_confirm_np)
return max_num, max_day, min_day, min_num, mean_num, median_num
绘图
import matplotlib.pyplot as plt
import seaborn as sns
import matplotlib
import numpy as np
class pic_process():
def __init__(self,date_num_list,new_confirm_np,new_dead_np):
self.date_num_list = date_num_list
self.new_confirm_np = new_confirm_np
self.new_dead_np = new_dead_np
def pic_plot(self):
sns.set(style="darkgrid")
myfont = matplotlib.font_manager.FontProperties(fname=r'C:/Windows/Fonts/STKAITI.TTF') # 这一行
fig = plt.figure(figsize=(10, 5))
plt.xticks(np.arange(len(self.new_confirm_np)), self.date_num_list[1:], rotation=90)
plt.title(u'2月份每日新增人数', fontproperties=myfont, fontsize=10)
plt.xlabel(u'日期', fontproperties=myfont, fontsize=10)
plt.ylabel(u'人数', fontproperties=myfont, fontsize=10)
plt.plot(self.date_num_list[1:], self.new_confirm_np, color='darkslategray', linewidth=0.9, label='全国每日新增确认人数')
plt.ylabel(u'人数', fontproperties=myfont, fontsize=10)
plt.legend(loc='best', prop=myfont)
plt.show()
结果示意