天天基金爬虫

import requests
from requests.exceptions import ConnectionError
from pyquery import PyQuery as pq
import operator
from functools import reduce
import re
import json
import pandas as pd
import numpy as np

headers = {
        'Host':'club.jd.com',
        'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.120 Safari/537.36'
        }


def getHTMLText(url):
    """提取页面HTML代码,并返回HTML文本"""
    try:
        r = requests.get(url, timeout=30,headers=headers)
        r.raise_for_status()
        r.encoding = r.apparent_encoding
        return r.text
    except:
        print("页面提取错误")
        return ""
    
    
fundCode = '519697'
pageIndex = 2
url = 'http://api.fund.eastmoney.com/f10/lsjz'



cookie = 'HAList=a-sh-603899-%u6668%u5149%u6587%u5177; em_hq_fls=js; qgqp_b_id=261272d980d240a9bd3df919a41ac2d4; EMFUND1=null; EMFUND2=null; EMFUND3=null; EMFUND4=null; EMFUND5=null; EMFUND6=null; EMFUND7=null; EMFUND8=null; EMFUND0=null; st_si=17382221904758; st_asi=delete; EMFUND9=07-31 13:00:08@#$%u666F%u987A%u957F%u57CE%u65B0%u5174%u6210%u957F%u6DF7%u5408@%23%24260108; st_pvi=53058681075087; st_sp=2019-10-30%2013%3A38%3A10; st_inirUrl=https%3A%2F%2Fwww.baidu.com%2Fs; st_sn=11; st_psi=20200804105742826-0-2701686462'
    
headers = {
    'Cookie': cookie,
    'Host': 'api.fund.eastmoney.com',
    'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
     'Referer': 'http://fundf10.eastmoney.com/jjjz_%s.html' % fundCode,
 }

dfs = []
for i in range(139):
    params = {
         'callback': 'jQuery18307633215694564663_1548321266367',
         'fundCode': fundCode,
         'pageIndex': i,
        'pageSize': 20,
     }
    r = requests.get(url=url, headers=headers, params=params)

    text = re.findall('\((.*?)\)', r.text)[0]  # 提取dict
    LSJZList = json.loads(text)['Data']['LSJZList']  # 获取历史净值数据
    TotalCount = json.loads(text)['TotalCount']  # 转化为dict
    LSJZ = pd.DataFrame(LSJZList)  # 转化为DataFrame格式
    LSJZ['fundCode'] = fundCode  # 新增一列fundCode
    dfs.append(LSJZ)
df = pd.concat(dfs)

df['FSRQ'] = pd.to_datetime(df['FSRQ'])
df['daynameofweek']=df['FSRQ'].dt.weekday_name
df = df[df['JZZZL']!='']
#df['JZZZL'].replace(regex = {' ':'0'},inplace=True)
df['JZZZL'] = df['JZZZL'].astype('float64')
df['isdown'] = np.select([df['JZZZL']>0,df['JZZZL']<=0],
                          [0,-1])
dd = df.groupby('daynameofweek')['isdown'].sum().sort_values()
print(dd)

你可能感兴趣的:(笔记,python基础)