爬虫 外汇牌价

需求:爬取每天00:00:05的个别币种中行外汇价格

import pandas as pd
from lxml import etree
import requests
import datetime

headers = {
    "Content-Type": "application/x-www-form-urlencoded",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36",
    "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"
}

date = datetime.datetime.now().strftime('%Y-%m-%d')
goal_time = date + ' 00:00:05'
goal_time = datetime.datetime.strptime(goal_time, '%Y-%m-%d %H:%M:%S')

url = 'http://srh.bankofchina.com/search/whpj/search.jsp'
page = '100'

df = pd.DataFrame({"币种":['澳大利亚元','加拿大元','瑞士法郎','丹麦克朗','欧元','英镑','港币','新加坡元','日元','韩国元','澳门元','泰国铢','挪威克朗','新西兰元','菲律宾比索','美元','瑞典克朗'],"code":['1325','1324','1317','1321','1326','1314','1315','1375','1323','1331','1327','1329','1322','1330','1328','1316','1320']})

df2 = pd.DataFrame()


def spider(date, code, page):
    data = 'erectDate=' + date + '¬hing=' + date + '&pjname=' + code + '&page=' + page
    response = requests.post(url=url, data=data, headers=headers).text
    # print(response)
    res = etree.HTML(response)
    # print(res)
    last_d = str(res.xpath("//body//div[@class='BOC_main publish']/table//tr[last()-1]/td[last()]//text()")[0])
    # last_d = datetime.datetime.strptime(last_d.replace('.', '-'), '%Y-%m-%d %H:%M:%S')
    #print(last_d)

    # 现汇买入价
    xhmrj = str(res.xpath("//body//div[@class='BOC_main publish']/table//tr[last()-1]/td[last()-5]//text()")[0])
    # 现钞买入价
    xcmrj = str(res.xpath("//body//div[@class='BOC_main publish']/table//tr[last()-1]/td[last()-4]//text()")[0])
    # 现汇卖出价
    xhmcj = str(res.xpath("//body//div[@class='BOC_main publish']/table//tr[last()-1]/td[last()-3]//text()")[0])
    # 现钞卖出价
    xcmcj = str(res.xpath("//body//div[@class='BOC_main publish']/table//tr[last()-1]/td[last()-2]//text()")[0])
    # 中行折算价
    zhzsj = str(res.xpath("//body//div[@class='BOC_main publish']/table//tr[last()-1]/td[last()-1]//text()")[0])
    # print(xhmrj, xcmrj, xhmcj, xcmcj, zhzsj,last_d)
    sr = [[code, xhmrj, xcmrj, xhmcj, xcmcj, zhzsj, last_d]]
    return sr


for i in df['code']:
    code = i
    sr = spider(date,code,page)
    df2 = df2.append(sr,ignore_index=True)

df2.columns=['code','现汇买入价', '现钞买入价','现汇卖出价','现钞卖出价','中行折算价','发布时间']
res_df = pd.merge(df, df2, on='code')
# print(res_df)

name = date + '外汇牌价.xlsx'
res_df.to_excel(name)

你可能感兴趣的:(爬虫 外汇牌价)