爬虫案例——股票信息

爬虫案例——股票信息_第1张图片

import requests
import math
import json
import re
# pandas用来导出数到csv/excel
import pandas as pd

# 每页有多少行
page_diff_count = 500 # 20~999
# 页数
page_num = 1
# 请求参数
params = {
    'cb':'jQuery1124037243639092465686_1673924065851',
    'pn':str(page_num),
    'pz':str(page_diff_count),
    'po':'1',
    'np':'1',
    'ut':'bd1d9ddb04089700cf9c27f6f7426281',
    'fltt':'2',
    'invt':'2',
    'fid':'f3',
    'fs':'m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048',
    'fields':'f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152',
    '_':'1673924065852'
}
# URL
url = 'http://55.push2.eastmoney.com/api/qt/clist/get'
# 获取每一页数据
def get_diff(page_num):
    params['pn'] = str(page_num)
    res_text = requests.get(url=url,params=params).text
    regex_str = 'jQuery1124037243639092465686_1673924065851\((.+?)\)'
    res_dict = json.loads(re.findall(regex_str, res_text)[0])
    diff = res_dict['data']['diff']
    data = []
    for d in diff[:]:
        line = [d['f12'], d['f14'], d['f2'], d['f3'], d['f4'], d['f5'],
            d['f6'], d['f7'], d['f8'], d['f9'], d['f115'], d['f10'],
            d['f15'], d['f16'], d['f17'], d['f18'], d['f20'], d['f21'], d['f23']]
        data.append(line)
    return data

if __name__ == "__main__":
    res_text = requests.get(url=url,params=params).text
    # 提取json
    regex_str = 'jQuery1124037243639092465686_1673924065851\((.+?)\)'
    res_dict = json.loads(re.findall(regex_str,res_text)[0])
    # 总行数
    diff_total = int(res_dict['data']['total'])
    # 计算总页数
    page_total = math.ceil(diff_total/page_diff_count)
    data = []
    for page_num in range(1,page_total+1):
        data += get_diff(page_num)
    colu = ['代码', '名称', '最新价', '涨跌幅', '涨跌额', '成交量',
            '成交额', '振幅', '换手率(%)', '市盈率(动态)', '市盈率TTM', '量比',
            '最高', '最低', '今开', '昨收', '总市值', '流通市值', '市净率']
    df = pd.DataFrame(data=data, index=None, columns=colu)
    df.index += 1
    # 导出到csv
    # df.to_csv('test.csv', encoding='utf_8_sig')
    # 导出到excel
    df.to_excel('test.xlsx', encoding='utf_8_sig')

你可能感兴趣的:(python爬虫,爬虫,python,pandas)