XPath
XPath常用规则
text = '''
'''
from lxml import etree
selector = etree.HTML(text)
result = selector.xpath('//*')
print(result)
输出
[, , , , , , , , , , , , , ]
from lxml import etree
selector = etree.HTML(text)
result = selector.xpath('//li/a')
print(result)
输出
[, , , , ]
from lxml import etree
selector = etree.HTML(text)
result = selector.xpath('//li/..')
print(result)
输出
[]
from lxml import etree
selector = etree.HTML(text)
result = selector.xpath('//li[@class="item-0"]')
print(result)
输出
[, ]
from lxml import etree
selector = etree.HTML(text)
result1 = selector.xpath('//li[@class="item-0"]/text()')
result2 = selector.xpath('//li[@class="item-0"]/a/text()')
print(result1)
print(result2)
输出
['\n ']
['first item', 'fifth item']
注://li[@class="item-0"]/text()得到['\n '] 因"/"是获取直接子节点
from lxml import etree
selector = etree.HTML(text)
result = selector.xpath('//li[@class="item-0"]/a/@href')
print(result)
输出
['link1.html', 'link5.html']
from lxml import etree
text1 = '''
first item
'''
selector = etree.HTML(text1)
result1 = selector.xpath('//li[@calss="li"]/a/text()')
result2 = selector.xpath('//li[contains(@class,"li")]/a/text()')
print(result1)
print(result2)
输出
[]
['first item']
from lxml import etree
text2 = '''
first item
'''
selector = etree.HTML(text2)
result = selector.xpath('//li[contains(@class,"li") and @name="item"]/a/text()')
print(result
输出
['first item']
from lxml import etree
text = '''
'''
selector = etree.HTML(text)
result1 = selector.xpath('//li[1]/a/text()')
print(result1)
result2 = selector.xpath('//li[last()]/a/text()')
print(result2)
result3 = selector.xpath('//li[position()<3]/a/text()')
print(result3)
result4 = selector.xpath('//li[last()-2]/a/text()')
print(result4)
输出
['first item']
['fifth item']
['first item', 'second item']
['third item']
from lxml import etree
text3 = '''
'''
selector = etree.HTML(text3)
result1 = selector.xpath('//li[1]/ancestor::*')
print(result1)
result2 = selector.xpath('//li[1]/ancestor::div')
print(result2)
result3 = selector.xpath('//li[1]/attribute::*')
print(result3)
result4 = selector.xpath('//child::a[@href="link1.html"]')
print(result4)
result5 = selector.xpath('//li[1]/descendant::span')
print(result5)
result6 = selector.xpath('//li[1]/following::*[2]')
print(result6)
result7 = selector.xpath('//li[1]/following-sibling::*')
print(result7)
输出
[, , , ]
[]
['item-0']
[]
[]
[]
[, , , ]