参照Python爬取全国各地区疫情风险等级-pudn.com ,对原代码进行优化,详细内容可参照原网页。
import hashlib
import os
import requests
import time
import sys
import json
import csv
# 显示某等级地区的小计
def show_level_count(x_list):
j = 0
for i in range(len(x_list)):
j += len(x_list[i]["communitys"])
print(j)
return j
# 获取risk_data.json中,将其写入risk_data.csv
def writer_to_csv(risk_txt):
risk_json = json.loads(risk_txt)
so_far_time = risk_json["data"]["end_update_time"]
highlist = risk_json["data"]["highlist"] #获取data/highlist下的所有数据,
# highlist为一个列表[元素1,元素2,.....],每个元素为一个字典{'type': '2', 'province': '北京市'......}
middlelist = risk_json["data"]["middlelist"]
lowlist = risk_json["data"]["lowlist"]
encoding = 'utf_8_sig'
path1='./risk_data_2022/risk_data_' + so_far_time + '.csv'
f = open(path1, 'w', encoding=encoding, newline='')
csv_writer = csv.writer(f) #准备写数据
level_dict = {} #建立一个空字典
level_dict["高风险"] = highlist #添加键"高风险",值为highlist,例如level_dict["老师"] = ['张三']
level_dict["中风险"] = middlelist
level_dict["低风险"] = lowlist
for level in level_dict.keys():
risk_level = level
for i in range(len(level_dict[level])):
province = level_dict[level][i]["province"]
city = level_dict[level][i]["city"]
county = level_dict[level][i]["county"]
for j in range(len(level_dict[level][i]["communitys"])):
csv_writer.writerow(
[risk_level, province, city, county, level_dict[level][i]["communitys"][j]])
# write_to_csv_file(csv_writer, highlist, "高风险")
# write_to_csv_file(csv_writer, middlelist, "中风险")
# write_to_csv_file(csv_writer, lowlist, "低风险")
f.close()
print("写入risk_data.csv完成.")
# 利用该函数可从网页获取Json数据(该数据就是一个大列表数据),
# 具体可参照Python爬取全国各地区疫情风险等级(https://www.pudn.com/news/631b15f60d6a7b3b66c9deca.html),
# 如果正确,返回一个值为response.text,并将其中\u2022替换为空, 另一个为response.status_code
# 获取risk_data.json中,将其写入risk_data.csv的另一种方法
def writer_to_csv(risk_txt):
risk_json = json.loads(risk_txt)
so_far_time = risk_json["data"]["end_update_time"]
data_all = risk_json["data"] #获取data下的所有数据,
# data_all为一个列表[元素1,元素2,.....],每个元素为一个字典
encoding = 'utf_8_sig'
path1='./risk_data_2022/risk_data_' + so_far_time + '.csv'
f = open(path1, 'w', encoding=encoding, newline='')
csv_writer = csv.writer(f) #准备写数据
level_dict = {} #建立一个空字典
level_dict["高风险"] = data_all['highlist'] #添加键"高风险",值为highlist,例如level_dict["老师"] = ['张三']
level_dict["中风险"] = data_all['middlelist']
level_dict["低风险"] = data_all['lowlist']
print(level_dict)
for level in level_dict.keys():
risk_level = level
for i in range(len(level_dict[level])):
province = level_dict[level][i]["province"]
city = level_dict[level][i]["city"]
county = level_dict[level][i]["county"]
for j in range(len(level_dict[level][i]["communitys"])):
csv_writer.writerow(
[risk_level, province, city, county, level_dict[level][i]["communitys"][j]])
# write_to_csv_file(csv_writer, highlist, "高风险")
# write_to_csv_file(csv_writer, middlelist, "中风险")
# write_to_csv_file(csv_writer, lowlist, "低风险")
f.close()
print("写入risk_data.csv完成.")
def get_risk_area_data():
timestamp = str(int(time.time())) #time.time()是统计的wall time(即墙上时钟),
# 也就是系统时钟的时间戳,从1970-01-01 00:00:00 UTC,开始到现在所经历的时间,以浮点数的'秒'来表示。
# 所以两次调用的时间差即为系统经过的总时间。
# timestamp = '1662646358'
x_wif_timestamp = timestamp
timestampHeader = timestamp
x_wif_nonce = 'QkjjtiLM2dCratiA'
x_wif_paasid = 'smt-application'
x_wif_signature_str = timestamp + \
'fTN2pfuisxTavbTuYVSsNJHetwq5bJvCQkjjtiLM2dCratiA' + timestamp
x_wif_signature = hashlib.sha256(
x_wif_signature_str.encode('utf-8')).hexdigest().upper()
signatureHeader_str = timestamp + \
'23y0ufFl5YxIyGrI8hWRUZmKkvtSjLQA' + '123456789abcdefg' + timestamp
signatureHeader = hashlib.sha256(
signatureHeader_str.encode('utf-8')).hexdigest().upper()
url = 'http://bmfw.www.gov.cn/bjww/interface/interfaceJson'
headerss = {
'Accept': "application/json, text/plain, */*",
'Content-Type': "application/json;charset=utf-8",
'x-wif-nonce': "QkjjtiLM2dCratiA",
'x-wif-paasid': "smt-application",
'x-wif-signature': x_wif_signature,
'x-wif-timestamp': x_wif_timestamp,
}
From_data = "{\"key\":\"3C502C97ABDA40D0A60FBEE50FAAD1DA\",\
\"appId\":\"NcApplication\",\"paasHeader\":\"zdww\",\
\"timestampHeader\":\"" + timestampHeader + "\",\
\"nonceHeader\":\"123456789abcdefg\",\"signatureHeader\":\"" + signatureHeader + "\"}"
# print(From_data)
response = requests.post(url=url, data=From_data, headers=headerss)
if not response.status_code == 200:
# print(response.status_code)
return "", response.status_code
#print(response.text)
return response.text.replace('\u2022', ''), response.status_code
if __name__ == '__main__':
risk_data = get_risk_area_data()
if risk_data[1] == 200: #如果正确,就获取第一个参数,即为response.text,并将该数写入risk_data.json中
with open('./risk_data_2022/risk_data.json', 'w', encoding='utf-8') as f:
f.write(risk_data[0])
print("写入risk_data.log完成.")
f = open('./risk_data_2022/risk_data.json', 'r', encoding='utf-8')
risk_txt = f.read()
f.close()
writer_to_csv(risk_txt)
print('全部程序完成,请勿频繁使用!')
os.system('pause')