python的lxml模块解析xml

xml内容如下:



    
E. F. Codd : A cloud data storage system for supporting both OLTP and OLAP. IBM Research Report, San Jose, California RJ909 August 1971
E. F. Codd <i>Entropy</i> Best Paper Award 2013. IBM Research Report, San Jose, California RJ909 August 1971 ibmTR/rj909.pdf db/labs/ibm/RJ909.html

 

解析程序如下:

from lxml import etree

def getxml_content():

    tree = etree.parse("xml.txt")  #获取树结构
    root = tree.getroot() # 获取根节点
    for elments in root: #遍历根节点获取子节点
        #######################
        # 方法1、获取节点下所有属性遍历获取,是一个字典
        #######################
        for key in elments.attrib.keys():
              print(key,":",elments.get(key))
        #######################
        # 方法2、根据已知的名称获取属性值
        #######################
        print("mdate:",elments.get("mdate"))  #.get获取标签里面的属性内容
        for e in elments:
            print(e.tag,':',e.text) #.tag获取节点(标签)名称,.text获取两个标签中间夹着的内容


    print(root)  #

xml内容如下:


def xml_parse_two(VISITSQNO,LOCALID,DOCNAME,FILENAME,XML_TWO):
    strs=bytes(bytearray(XML_TWO, encoding='utf-8')) #最好是byte,不然报错
    t=etree.XML(strs) #获取根template
    tree = etree.ElementTree(t)
    root=tree.getroot()  #获取第一层template根标签
    list = []
    for elments in root: #获取第二层标签
        for elment in elments:  #获取第三层标签
            for e in elment: #获取第四层标签
                attr_list=[]
                attr_list.append(VISITSQNO)
                attr_list.append(LOCALID)
                attr_list.append(DOCNAME)
                attr_list.append(FILENAME)
                for key in e.attrib.keys(): #获取第四层属性值
                    if 'controlName' in key or 'displayName' in key:
                        atrribut_value=e.get(key) #获取第四层属性及属性值
                        attr_list.append(atrribut_value)
                if len(e)>0:
                    attr_list.append(e[0].text) #第四层content的内容
                    list.append(tuple(attr_list))
    return list

你可能感兴趣的:(python学习)