python中lxml的应用

首先下载lxml, http://www.lfd.uci.edu/~gohlke/pythonlibs/ , 然后添加引用
from lxml import _elementpath as DONTUSE
from lxml import etree


具体示例:
1.添加命名空间
#set namespace
nsmap = { "xsi" : "http://www.w3.org/2001/XMLSchema-instance" }
g_statisticsRoot = etree.Element( "DcmStatistics" , nsmap = nsmap)

2.添加xml schema引用
#add xsd reference
g_statisticsRoot.set( "{http://www.w3.org/2001/XMLSchema-instance}noNamespaceSchemaLocation" , "DcmStatistics.xsd" )

3.添加注释
#add comment, 利用addprevious添加到根节点的前面
comment = etree.Comment( "create by jiangong.li" )
g_statisticsRoot.addprevious(comment)

4.尝试多种编码来解析xml
def decodingXml(xmlFile):
    tree = None
    encoding = "utf-8"

    while ( True ):
        try :
            parser = etree.XMLParser(remove_blank_text= True , encoding=encoding, remove_comments = False )
            tree = etree.parse(xmlFile, parser)
        except Exception as e:
            if (encoding != "gb18030" ):
                encoding = "gb18030"
                continue
            else :
                print( "\nPAR XML ERROR, decoding error." )
                break
        break
    return tree

5.遍历xml下的所有子节点,不止直属第一级子节点. iter()
     for element in root.iter():
          element . tail = None

6.遍历xml下的第一级子节点. iterchildren()
     for e in srcParentNode.iterchildren():
        if e is srcParentNode:
            continue

        name = ""
        #statistics node
        if   e.tag == "element" :
            name = "Element"
        elif e.tag == "sequence" :
            name = "Sequence"
        elif e.tag == "item" :
            name = "Item"
        else :
            print( "\nUnsupported element type: %s\n" %(e.tag))
            name = e.tag
            # Only parse element/sequence/item
             continue      

7.添加子节点到尾部. append()
def getXmlElement(nodeName, parentNode):
    if parentNode  == None:
        raise Exception( "parent node is None" )

    nodes = parentNode.xpath( './' +nodeName)

    if len(nodes) == 0:
        node = etree.Element(nodeName)
        parentNode.append(node)
        return node
    else :
        return nodes[0]

8.格式化成str输出
etree.tostring(g_statisticsRoot, encoding= "UTF-8" , xml_declaration= True , pretty_print= True , with_comments= True )

9.保存成xml文件
        statisticsResult =  open(g_xmlName, "bw+")
        statisticsResult.write(etree.tostring(g_statisticsRoot, encoding= "UTF-8" , xml_declaration= True , pretty_print= True , with_comments= True ))
        statisticsResult.flush()
        statisticsResult.close()



你可能感兴趣的:(xml,xsd,python,lxml,自我心的)