import requests
import math
import json
import re
import pandas as pd
page_diff_count = 500
page_num = 1
params = {
'cb':'jQuery1124037243639092465686_1673924065851',
'pn':str(page_num),
'pz':str(page_diff_count),
'po':'1',
'np':'1',
'ut':'bd1d9ddb04089700cf9c27f6f7426281',
'fltt':'2',
'invt':'2',
'fid':'f3',
'fs':'m:0+t:6,m:0+t:80,m:1+t:2,m:1+t:23,m:0+t:81+s:2048',
'fields':'f1,f2,f3,f4,f5,f6,f7,f8,f9,f10,f12,f13,f14,f15,f16,f17,f18,f20,f21,f23,f24,f25,f22,f11,f62,f128,f136,f115,f152',
'_':'1673924065852'
}
url = 'http://55.push2.eastmoney.com/api/qt/clist/get'
def get_diff(page_num):
params['pn'] = str(page_num)
res_text = requests.get(url=url,params=params).text
regex_str = 'jQuery1124037243639092465686_1673924065851\((.+?)\)'
res_dict = json.loads(re.findall(regex_str, res_text)[0])
diff = res_dict['data']['diff']
data = []
for d in diff[:]:
line = [d['f12'], d['f14'], d['f2'], d['f3'], d['f4'], d['f5'],
d['f6'], d['f7'], d['f8'], d['f9'], d['f115'], d['f10'],
d['f15'], d['f16'], d['f17'], d['f18'], d['f20'], d['f21'], d['f23']]
data.append(line)
return data
if __name__ == "__main__":
res_text = requests.get(url=url,params=params).text
regex_str = 'jQuery1124037243639092465686_1673924065851\((.+?)\)'
res_dict = json.loads(re.findall(regex_str,res_text)[0])
diff_total = int(res_dict['data']['total'])
page_total = math.ceil(diff_total/page_diff_count)
data = []
for page_num in range(1,page_total+1):
data += get_diff(page_num)
colu = ['代码', '名称', '最新价', '涨跌幅', '涨跌额', '成交量',
'成交额', '振幅', '换手率(%)', '市盈率(动态)', '市盈率TTM', '量比',
'最高', '最低', '今开', '昨收', '总市值', '流通市值', '市净率']
df = pd.DataFrame(data=data, index=None, columns=colu)
df.index += 1
df.to_excel('test.xlsx', encoding='utf_8_sig')