此次爬取的为腾讯网站api接口,网址:https://news.qq.com/zt2020/page/feiyan.htm#/global
采用csv文件存储数据,使用excel对数据处理
import requests
import csv
import urllib.parse #改变文字编码
with open('epidemic.csv', 'a', newline='', ) as file: # csv文件存储,一定要放在前边,放后面会随时调用
csv_file = csv.writer(file)
csv_file.writerow(['日期', '国家', '省份', '确诊', '治愈', '死亡']) # csv文件表头
provinces = ['辽宁', '吉林', '黑龙江', '河北', '山西', '陕西', '甘肃', '青海', '山东', '安徽', '江苏', '浙江', '河南', '湖北', '湖南', '江西', '台湾', '福建', '云南', '海南', '四川', '贵州', '广东', '内蒙古', '新疆', '广西', '西藏', '宁夏', '北京', '上海', '天津', '重庆', '香港', '澳门']
for name in provinces:
namegbk = name.encode('utf-8') # 转码
new_name = urllib.parse.quote(namegbk) # 转化成URL使用
url = 'https://api.inews.qq.com/newsqa/v1/query/pubished/daily/list?province='+new_name
headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36'}
res = requests.get(url=url, headers=headers)
res_json = res.json()
data_ = res_json['data']
for i in data_:
date = i['date']
country = i['country']
province = i['province']
confirm = i['confirm']
heal = i['heal']
dead = i['dead']
csv_file.writerow([date, country, province, confirm, heal, dead])