"""
Author: 余婷
Time: 2022/8/18 09:22
Good Good Study, Day Day Up!
"""
from lxml import etree
"""
1)树:整个html(xml)代码结构就是一个树结构
2)节点:树结构中的每一个元素(标签)就是一个节点
3)根节点(根元素): html或者xml最外面的那个标签(元素)
4)节点内容:标签内容
5)节点属性:标签属性
"""
"""
xml是通过标签(元素)的标签内容和标签属性来保存数据的。
示例:保存一个超市信息
1)json数据
{
"name": "永辉超市",
"address": "肖家河大厦",
"staffs":[
{"name":"小明", "id": "s001", "position": "收营员", "salary": 4000},
{"name":"小花", "id": "s002", "position": "促销员", "salary": 3500},
{"name":"张三", "id": "s003", "position": "保洁", "salary": 3000},
{"name":"李四", "id": "s004", "position": "收营员", "salary": 4000},
{"name":"王五", "id": "s005", "position": "售货员", "salary": 3800}
],
"goodsList":[
{"name": "泡面", "price": 3.5, "count": 120, "discount":0.9},
{"name": "火腿肠", "price": 1.5, "count": 332, "discount":1},
{"name": "矿泉水", "price": 2, "count": 549, "discount":1},
{"name": "面包", "price": 5.5, "count": 29, "discount":0.85}
]
}
xml数据:
小明
收营员
4000
小花
促销员
3500
张三
保洁
3000
李四
收营员
4000
王五
售货员
3800
泡面
3.5
120
火腿肠
1.5
332
矿泉水
2
549
面包
5.5
29
"""
f = open('files/data.xml', encoding='utf-8')
root = etree.XML(f.read())
f.close()
"""
路径的写法:
1. 绝对路径: 用"/"开头的路径 - /标签在树结构中的路径 (路径必须从根节点开始写)
2. 相对路径: 路径开头用"."标签当前节点(xpath前面是谁,'.'就代表谁), ".."表示当前节点的上层节点
3. 全路径: 用"//"开头的路径 - 在整个树中获取标签
注意:绝对路径和全路径的写法以及查找方式和是用谁去点的xpath无关
"""
result = root.xpath('/supermarket/staffs/staff/name/text()')
print(result)
result = root.xpath('./staffs/staff/name/text()')
print(result)
staff1 = root.xpath('./staffs/staff')[0]
result = staff1.xpath('./name/text()')
print(result)
result = staff1.xpath('../staff/name/text()')
print(result)
result = root.xpath('//name/text()')
print(result)
result = staff1.xpath('//goods/name/text()')
print(result)
result = root.xpath('//position/text()')
print(result)
result = root.xpath('/supermarket/@name')
print(result)
result = root.xpath('//staff/@id')
print(result)
"""
[N] - 第N个
[last()] - 最后一个
[last()-N]; [last()-1] - 倒数第2个 、 [last()-2] - 倒数第3个
[position()>N]、[position()=N]、[position()<=N]
"""
result = root.xpath('//staff[1]/name/text()')
print(result)
result = root.xpath('//staff[last()]/name/text()')
print(result)
result = root.xpath('//staff[last()-1]/name/text()')
print(result)
result = root.xpath('//staff[position()<3]/name/text()')
print(result)
"""
[@属性名=属性值] - 指定属性是指定值的标签
[@属性名] - 拥有指定属性的标签
"""
result = root.xpath('//staff[@class="c1"]/name/text()')
print(result)
result = root.xpath('//staff[@id="s003"]/name/text()')
print(result)
result = root.xpath('//goods[@discount]/name/text()')
print(result)
"""
[子标签名>数据]
[子标签名<数据]
[子标签名>=数据]
[子标签名<=数据]
[子标签名=数据]
"""
result = root.xpath('//goods[price=2]/name/text()')
print(result)
result = root.xpath('//staff[1]/*/text()')
print(result)
result = root.xpath('//*[@class="c1"]/name/text()')
print(result)
result = root.xpath('//goods[@*]/name/text()')
print(result)
result = root.xpath('/supermarket/@*')
print(result)
result = root.xpath('//goods/name/text()|//staff/position/text()')
print(result)