发现这种写法解析不出来数据:(这种绝对路径的写法)
from lxml import etree
import requests
url = "http://xxxx"
res=requests.post(url)
tree = etree.HTML(res.text)
trs=tree.xpath('/html/body/table[2]/tbody/tr')
if trs:
for tr in trs[1:]:
a=tr.xpath('./td[1]/text()')[0]
b=tr.xpath('./td[2]/text()')[0]
print(a,b)
pass
这种相对路径的写法可以解析出数据(用的相对路径)
from lxml import etree
import requests
url = "http://xxxx"
res=requests.post(url)
tree = etree.HTML(res.text)
trs=tree.xpath('//table[@class="tablelist"]//tr')
if trs:
for tr in trs[1:]:
a=tr.xpath('./td[1]/text()')[0]
b=tr.xpath('./td[2]/text()')[0]
print(a,b)
pass
scrapy自带xpath,不需要通过etree.HTML解析。还有一点就是scrapy的xpath以后要.extract()一下,才变成列表类型,这一点和etree有点区别。
def parse(self, response):
data = {"index_id": response.meta['index_id'], "index_name": response.meta['index_name'],"statistic_date": response.meta['statistic_date']}
print(data)
trs=response.xpath('//table[@class="tablelist"]//tr')
for tr in trs[1:]:
time_horizon = tr.xpath('./td[1]/text()').extract()[0]
yield_return = tr.xpath('./td[2]/text()').extract()[0]
#这是该页面文本,可以测试看看
html_text="""
中债企业债收益率曲线(AA)(到期)
标准期限
收益率(%)
0.0y
2.515
0.08y
2.5348
0.25y
2.4493
0.5y
2.4984
0.75y
2.5554
1.0y
2.671
"""