Xpath定位元素(3)


from lxml import etree
text = '''

'''
html = etree.HTML(text)  # etree.fromstring()

# 测试 etree.tostring 功能

str = etree.tostring(html)
str_to_xml = etree.fromstring(str)
str_to_xml.xpath('//li/a/text()')
['first item', 'second item', 'third item', 'fourth item', 'fifth item']

# 注意解析特殊:索引从1开始,不是0
str_to_xml.xpath('//li[1]/a/text()')
['first item']

# ****************** 解析 ****************

# 1. 获取所有的li 标签
result = html.xpath('//li')
print(result)
print(len(result))
print(type(result))
print(type(result[0]))
[<Element li at 0x1adf676d340>, <Element li at 0x1adf676d640>, <Element li at 0x1adf676d7c0>, <Element li at 0x1adf676dd00>, <Element li at 0x1adf676d0c0>]
5
<class 'list'>
<class 'lxml.etree._Element'>

# 2.获取所有li的class 属性的值
 html.xpath('//li/@class')
['item-0', 'item-1', 'item-inactive', 'item-1', 'item-0']

result = html.xpath('//li//text()')
print(result)
['first item', 'second item', 'third item', 'fourth item', 'fifth item', '\n     ']


# 另一种用./@attr 方式获取:

result = html.xpath('//li')
for ele in result:
    # 获取当前元素的class 属性
    print(ele.xpath("./@class"))
['item-0']
['item-1']
['item-inactive']
['item-1']
['item-0']

# 3.根据href 属性找元素,找到打印属性和text 内容
result = html.xpath('//li/a[@href="link1.html"]')
print(result[0].text)         # 获取text 内容
print(result[0].get("href"))  # 获取属性值

# 4. 获取所有a元素的text()
result = html.xpath('//a/text()')
print(result)

# ************** 两种方法效果同归 ****************

html.xpath('//li/a')[0].text
html.xpath('//li//a')[0].text
html.xpath('//li/a/text()')[0]

# 5. 根据class 属性取找
result = html.xpath('//*[@class="item-inactive"]//text()')   # * 匹配任何元素节点
print(result)
['third item']

result = html.xpath('//li[@class="item-inactive"]//text()')
print(result)
['third item']


# 6. 获取最后一个li 元素下面a标签的href 属性
result = html.xpath('//li[last()]/a/@href') # last() 
print(result)
['link5.html']

# 7.获取到倒数二个li 元素下面a标签的href 属性
result = html.xpath('//li[last()-1]/a/@href')
print(result)

# 8. position() = num (num 从 1开始)
html.xpath('//li[position()>=4]/a/@href')
['link4.html', 'link5.html']















































你可能感兴趣的:(python,前端,javascript)