爬取网站:丁香园
数据来源:中国国家卫生健康委员会
import requests
import re
import pandas as pd
### 发送请求,获取信息
url = 'https://3g.dxy.cn/newh5/view/pneumonia?scene=2&clicktime=1579582238&enterid=1579582238&from=timeline&isappinstalled=0'
res = requests.get(url)
res.encoding = 'utf-8'
pat0 = re.compile('window.getAreaStat = ([\s\S]*?)')
data_list = pat0.findall(res.text)
data = data_list[0].replace('}catch(e){}','')
data = eval(data)
data
provinceShortNames = ''
currentConfirmedCounts = []
confirmedCounts = []
curedCounts = []
deadCounts = []
my_data = {}
for i in data:
my_data[i['provinceShortName']] = [i['currentConfirmedCount'],i['confirmedCount'],i['curedCount'],i['deadCount']]
print(my_data)
pd.DataFrame(my_data,index=['现存确诊','累计确诊','累计治愈','累计死亡'])
i = 0
select = input('请输入查询省份:')
while i <=0:
if i ==-1:
select = input('请输入查询省份:')
try:
print("\n\033[1;31;48m现存确诊:%d\033[0m"%my_data[select][0])
print("\033[1;33;48m累计确诊:%d\033[0m"%my_data[select][1])
print("\033[1;32;48m累计治愈:%d\033[0m"%my_data[select][2])
print("\033[1;30;48m累计死亡:%d\033[0m"%my_data[select][3])
i = 1
except:
print('没有该省份,请检查您的输入!')
i = -1
效果见第一张gif图。
cityName = ''
currentConfirmedCounts = []
confirmedCounts = []
curedCounts = []
deadCounts = []
my_city = {}
for i in range(len(data)):
for j in data[i]['cities']:
my_city[j['cityName']] = [j['currentConfirmedCount'],j['confirmedCount'],j['curedCount'],j['deadCount']]
# print(my_city)
pd.DataFrame(my_city,index=['现存确诊','累计确诊','累计治愈','累计死亡'])
i = 0
select = input('请输入查询省份:')
while i <=0:
if i ==-1:
select = input('请输入查询省份:')
try:
print("\n\033[1;31;48m现存确诊:%d\033[0m"%my_city[select][0])
print("\033[1;33;48m累计确诊:%d\033[0m"%my_city[select][1])
print("\033[1;32;48m累计治愈:%d\033[0m"%my_city[select][2])
print("\033[1;30;48m累计死亡:%d\033[0m"%my_city[select][3])
i = 1
except:
print('没有该省份,请检查您的输入!')
i = -1
效果见第二张gif图。
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'
# 输入上面代码,才能让Jupyter notebook 打印多个结果,否则只会打印最后一个结果过
from IPython.display import Image
import requests
url_1 = 'https://img1.dxycdn.com/2020/0310/514/3401205765200198356-135.png' #确诊
url_2 = 'https://img1.dxycdn.com/2020/0310/254/3401205861837225508-135.png' #治愈
url_3 = 'https://img1.dxycdn.com/2020/0310/007/3401205836067162687-135.png' #疑似
url_4 = 'https://img1.dxycdn.com/2020/0310/736/3401205876869612045-135.png' #死亡
response_1 = requests.get(url_1)
result_1 = response_1.content
response_2 = requests.get(url_2)
result_2 = response_2.content
response_3 = requests.get(url_3)
result_3 = response_3.content
response_4 = requests.get(url_4)
result_4 = response_4.content
with open('现存确诊.jpg','wb') as f:
f.write(result_1)
with open('治愈病例.jpg','wb') as f:
f.write(result_2)
with open('现存疑似.jpg','wb') as f:
f.write(result_3)
with open('死亡病例.jpg','wb') as f:
f.write(result_4)
Image(filename = "现存确诊.jpg", width=400, height=160)
Image(filename = "治愈病例.jpg", width=400, height=160)
Image(filename = "现存疑似.jpg", width=400, height=160)
Image(filename = "死亡病例.jpg", width=400, height=160)
笔记:
1.将整个视频转换成GIF,使用命令:
ffmpeg -i small.mp4 small.gif
2.输入下面代码,才能让Jupyter notebook 打印多个结果,
否则只会打印最后一个结果过
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity='all'
from IPython.display import Image