lxml.etree模式使用(一)

from lxml import etree
from copy import deepcopy


def prettyprint(element, **kwargs):
    print("/")
    xml = etree.tostring(element, pretty_print=True, **kwargs)
    print(xml.decode(), end='')


# 1. 创建元素
root = etree.Element("root")

# 2. 添加子元素
root.append(etree.Element("child1"))
root.append(etree.Element("child2"))
child3 = etree.SubElement(root,"child3")
child4 = etree.SubElement(root,"child4")
# 3. 格式化输出
# print(etree.tostring(root))
# print(prettyprint(root))

# 4. 元素列表
# 对元素列表进行遍历可以,但是不要元素间的相互赋值,即root[0]=root[-1],这会导致元素修改
# 拷贝元素 
child3.append(deepcopy(child4))

for item in root:
    print(item.tag)

# list
print(list(root))
# index
print("root.index(root[3]) = %d" % root.index(root[3]))
# insert
root.insert(0,etree.Element("child0"))

# slice
print("root[:1] is \t",root[:1])
print(root[:1][0].tag)
print("root[-1:] is \t",root[-1:])
print(root[-1:][0].tag)

# 5. 判断是否存在子元素
if len(root):
    print("there are some sub-elements")
else:
    print("nothing")

# 6. etree.iselement(element) 判断是否为Element元素
if etree.iselement(root):
    print("root is element")

# 7. getparent 获取元素父元素
etree.SubElement(root[0],"child01")

if root[0][0].getparent() is root[0] :
    print("%s's parent is %s"% (root[0][0].tag, root[0][0].getparent().tag))

# 8. element.getprevious()//element.getnext()

if root[0] is root[1].getprevious():
    print("root[0] is root[1].getprevious()")

if root[2] is root[1].getnext():
    print("root[2] is root[1].getnext()")


# 9. 元素属性-字典

# 设置属性
child5 = etree.Element("a",href="https://www.baidu.com")
root.append(child5)
child5.set("class","ccdd")
# 获取属性
print(child5.get("class"))
print(child5.get("href"))
# None 不存在的属性
print(child5.get("id"))
print(child5.items())
print(child5.keys())
print(child5.values())
# element.attrib
print(child5.attrib)


# 10. 元素内容 element.text//element.tail

html = etree.Element("html")
header = etree.SubElement(html,"header")
header.text = "this is title"
body = etree.SubElement(html,"body")
a = etree.SubElement(body,"a")
a.set("href","https://csdn.net")
a.text = "CSDN"
br = etree.SubElement(a,"br")
br.tail = "end"
# 打印时候注意二者区别
print(etree.tostring(br))
print(etree.tostring(br,with_tail=False))
print(etree.tostring(html, method="text"))

# 11. element.xpath("")//etree.XPath("")
print(root.xpath("//*[@class='ccdd']"))
print(type(root.xpath("//*[@class='ccdd']")))
print(len(root.xpath("//*[@class='ccdd']")))
print(root.xpath("//*[@class='ccdd']")[0].tag)

# 12. 迭代

for element in root.iter():
    print(f"{element.tag}--{element.text}")

print("/")

for element in root.iter("child1","child2"):
    print(f"{element.tag}--{element.text}")



prettyprint(root)

你可能感兴趣的:(python爬虫,python,前端,javascript)