python爬取药监局数据案例

import time

import requests
import json

if __name__ == "__main__":
    time_start = time.time()
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36dof (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36 Edg/96.0.1054.62'
    }
    url = 'http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList'
    id_list = []  # 存储id
    all_data_list = []  # 存储所有企业数据
    for page in range(1,3):
        page = str(page)
        time.sleep(1)
    # 参数的封装
        data = {
            'on': 'true',
            'page': page,
            'pageSize': '15',
            'productName': '',
            'conditionType': '1',
            'applyname': '',
            'applysn': ''
        }
        json_ids = requests.post(url=url, headers=headers, data=data).json()
        for dic in json_ids['list']:
            id_list.append(dic['ID'])

    post_url = 'http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsById'
    for id in id_list:
        data = dict(id=id)
        detail_json = requests.post(url=post_url,headers=headers,data=data).json()
        all_data_list.append(detail_json)


    fp = open('./allData.json','a+',encoding='utf-8')
    json.dump(all_data_list,fp=fp,ensure_ascii=False)
    print('over!!!')

    time_end = time.time()
    print('time cost', time_end - time_start, 's')

你可能感兴趣的:(python,python,开发语言,后端)