需求:爬取每天00:00:05的个别币种中行外汇价格
import pandas as pd
from lxml import etree
import requests
import datetime
headers = {
"Content-Type": "application/x-www-form-urlencoded",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.90 Safari/537.36",
"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3"
}
date = datetime.datetime.now().strftime('%Y-%m-%d')
goal_time = date + ' 00:00:05'
goal_time = datetime.datetime.strptime(goal_time, '%Y-%m-%d %H:%M:%S')
url = 'http://srh.bankofchina.com/search/whpj/search.jsp'
page = '100'
df = pd.DataFrame({"币种":['澳大利亚元','加拿大元','瑞士法郎','丹麦克朗','欧元','英镑','港币','新加坡元','日元','韩国元','澳门元','泰国铢','挪威克朗','新西兰元','菲律宾比索','美元','瑞典克朗'],"code":['1325','1324','1317','1321','1326','1314','1315','1375','1323','1331','1327','1329','1322','1330','1328','1316','1320']})
df2 = pd.DataFrame()
def spider(date, code, page):
data = 'erectDate=' + date + '¬hing=' + date + '&pjname=' + code + '&page=' + page
response = requests.post(url=url, data=data, headers=headers).text
# print(response)
res = etree.HTML(response)
# print(res)
last_d = str(res.xpath("//body//div[@class='BOC_main publish']/table//tr[last()-1]/td[last()]//text()")[0])
# last_d = datetime.datetime.strptime(last_d.replace('.', '-'), '%Y-%m-%d %H:%M:%S')
#print(last_d)
# 现汇买入价
xhmrj = str(res.xpath("//body//div[@class='BOC_main publish']/table//tr[last()-1]/td[last()-5]//text()")[0])
# 现钞买入价
xcmrj = str(res.xpath("//body//div[@class='BOC_main publish']/table//tr[last()-1]/td[last()-4]//text()")[0])
# 现汇卖出价
xhmcj = str(res.xpath("//body//div[@class='BOC_main publish']/table//tr[last()-1]/td[last()-3]//text()")[0])
# 现钞卖出价
xcmcj = str(res.xpath("//body//div[@class='BOC_main publish']/table//tr[last()-1]/td[last()-2]//text()")[0])
# 中行折算价
zhzsj = str(res.xpath("//body//div[@class='BOC_main publish']/table//tr[last()-1]/td[last()-1]//text()")[0])
# print(xhmrj, xcmrj, xhmcj, xcmcj, zhzsj,last_d)
sr = [[code, xhmrj, xcmrj, xhmcj, xcmcj, zhzsj, last_d]]
return sr
for i in df['code']:
code = i
sr = spider(date,code,page)
df2 = df2.append(sr,ignore_index=True)
df2.columns=['code','现汇买入价', '现钞买入价','现汇卖出价','现钞卖出价','中行折算价','发布时间']
res_df = pd.merge(df, df2, on='code')
# print(res_df)
name = date + '外汇牌价.xlsx'
res_df.to_excel(name)