上交所最新公告爬取年报

部分上交所年报在定期报告栏目,发行上市公告栏目都取不到,在最新公告栏目能够拿到数据;
该程序爬取最新公告数据

import json
import requests
import datetime


def noticeToAnnals(stock_code, START_DATE='2022-01-01', END_DATE=datetime.datetime.now().strftime('%Y-%m-%d')):
    URL_QUERY_COMPANY = 'http://query.sse.com.cn/commonQuery.do'
    HEADER = {
        'Referer': 'http://www.sse.com.cn',
        'User-Agent': "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.121 Safari/537.36",
    }
    param = {
        'jsonCallBack': 'jsonpCallback72875491',
        'isPagination': 'true',
        'pageHelp.pageSize': '25',
        'pageHelp.cacheSize': '1',
        'type': 'inParams',
        'sqlId': 'COMMON_PL_SSGSXX_ZXGG_L',
        'START_DATE': START_DATE,
        'END_DATE': END_DATE,
        'SECURITY_CODE': stock_code,
        'TITLE': '',
        'BULLETIN_TYPE': '0101',
        'pageHelp.pageNo': '1',
        'pageHelp.beginPage': '1',
        'pageHelp.endPage': '1',
        '_': '1651221153479'
    }

    def listIndex(test):
        for info in test:
            if '年度报告' in info and '摘要' not in info:
                return test.index(info)

    data = requests.get(URL_QUERY_COMPANY, params=param, headers=HEADER).text.replace('jsonpCallback72875491(',
                                                                                      '').rstrip(')')
    info = json.loads(data)['result']
    for i in info:
        TITLE = str(i['TITLE']).split('
') index = listIndex(TITLE) SSEDATE = str(i['SSEDATE']).split('
') OLD_BULLETIN_TYPE = str(i['OLD_BULLETIN_TYPE']).split('
') BULLETIN_YEAR = str(i['BULLETIN_YEAR']).split('
') ORG_BULLETIN_TYPE = str(i['ORG_BULLETIN_TYPE']).split('
') URL = str(i['URL']).split('
') SECURITY_NAME = str(i['SECURITY_NAME']).split('
') print( f'SSEDATE:{SSEDATE[index]}\nOLD_BULLETIN_TYPE:{OLD_BULLETIN_TYPE[index]}\nBULLETIN_YEAR:{BULLETIN_YEAR[index]}\nORG_BULLETIN_TYPE:{ORG_BULLETIN_TYPE[index]}' f'\nURL:{URL[index]}\nTITLE:{TITLE[index]}') # print(f'SSEDATE:{SSEDATE[index]}\nOLD_BULLETIN_TYPE:{OLD_BULLETIN_TYPE[index]}\nBULLETIN_YEAR{BULLETIN_YEAR[index]}\nORG_BULLETIN_TYPE:{ORG_BULLETIN_TYPE[index]}\n ' # f'URL:{URL[index]}\nSECURITY_NAME:{SECURITY_NAME[index]}\nTITLE:{TITLE[index]}') if __name__ == '__main__': noticeToAnnals('603993')

你可能感兴趣的:(上交所最新公告爬取年报)