Python爬虫案例三:获取天天基金网的数据并使用pandas保存成excel数据

1、基础知识简介:
excel两种文件后缀:  xlsx   xls
3种保存库的方式:
    1.openpyxl  --> 只能针对xlsx  
    2.pandas
    3.xlutils  ---> 只能针对xls
2、pandas保存成excel数据的具体流程:
# 1.导包  (取别名)
import pandas as pd
# 2.获取数据(字典)===> 每个键值对都是一个字段
dic = {
    '姓名': ['张三', '李四', '王五', '赵六'],
    '年龄': [18, 19, 20, 21],
    '性别': ['男', '女', '男', '女']
}
# 3.构造一个数据帧(即数据格式),DataFram
df = pd.DataFrame(dic)
# 3.保存数据
df.to_excel('第一个.xlsx', index=False)
测试链接:https://www.1234567.com.cn/

源代码为: 

import pandas as pd
import requests
class FirstSpider(object):
    def __init__(self):
        self.start_url = 'http://api.fund.eastmoney.com/f10/lsjz'
        self.headers = {
            'Accept': '*/*',
            'Accept-Encoding': 'gzip, deflate',
            'Accept-Language': 'zh-CN,zh;q=0.9',
            'Cache-Control': 'no-cache',
            'Connection': 'keep-alive',
            'Host': 'api.fund.eastmoney.com',
            'Pragma': 'no - cache',
            'Referer': 'http://fundf10.eastmoney.com/',
            'Cookie': 'EMFUND1=null; EMFUND2=null; EMFUND3=null; qgqp_b_id=ddfd70f91cca153271f0738c2d1e033e; HAList=ty-116-08112-%u57FA%u77F3%u91D1%u878D; EMFUND0=null; EMFUND4=06-02%2016%3A57%3A52@%23%24%u4E1C%u5434%u53CC%u4E09%u89D2%u80A1%u7968A@%23%24005209; EMFUND5=06-02%2017%3A00%3A40@%23%24%u5609%u5B9E%u8D44%u6E90%u7CBE%u9009%u80A1%u7968C@%23%24005661; EMFUND6=06-02%2017%3A01%3A13@%23%24%u4FE1%u6FB3%u65B0%u80FD%u6E90%u4EA7%u4E1A%u80A1%u7968@%23%24001410; EMFUND7=06-02%2017%3A22%3A43@%23%24%u6C38%u8D62%u4F4E%u78B3%u73AF%u4FDD%u667A%u9009%u6DF7%u5408%u53D1%u8D77A@%23%24016386; EMFUND8=06-02%2017%3A25%3A03@%23%24%u534E%u590F%u6210%u957F%u6DF7%u5408@%23%24000001; EMFUND9=06-02 17:26:18@#$%u4E2D%u6B27%u533B%u7597%u5065%u5EB7%u6DF7%u5408A@%23%24003095; st_si=51497540182544; st_asi=delete; st_pvi=28856337848570; st_sp=2024-06-02%2016%3A57%3A53; st_inirUrl=https%3A%2F%2Fwww.baidu.com%2Flink; st_sn=2; st_psi=20240623165351758-112200305283-1605677910',
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36'
        }
        # 构造一个时间和单位净值的数据
        self.dic = {}
        self.dic['时间'] = []
        self.dic['单位净值'] = []

    def request_start_url(self, page):
        # 构造请求参数
        params = {
            # 'callback': 'jQuery18301778649497528504_1719132831772',
            'fundCode': '000001',
            'pageIndex': '{}'.format(page),
            'pageSize': '20',
            'startDate': '',
            'endDate': '',
            '_': '1719132857884',
        }
        response = requests.get(self.start_url, headers=self.headers, params=params).json()
        self.parse_response(response)

    def parse_response(self, response):
        # 解析响应
        for data in response['Data']['LSJZList']:
            sj = data['FSRQ']
            jz = data['DWJZ']
            self.dic['时间'].append(sj)
            self.dic['单位净值'].append(jz)
            print(sj, jz)

    def save_data(self):
        # 保存数据
        df = pd.DataFrame(self.dic)
        df.to_excel('天天基金网.xlsx', index=False)

    def main(self):
        for page in range(1, 21):
            self.request_start_url(page)
        self.save_data()


if __name__ == '__main__':
    f = FirstSpider()
    f.main()

实现效果:

Python爬虫案例三:获取天天基金网的数据并使用pandas保存成excel数据_第1张图片

 

 

你可能感兴趣的:(python,爬虫,pandas)